From e73bb6ed479655ec4f4fb0d1725f09a6bfd7457a Mon Sep 17 00:00:00 2001 From: EugenCepoi Date: Thu, 5 Nov 2015 18:14:25 -0800 Subject: [PATCH 1/4] Fixes #452 - supporting thrift 0.7 and 0.9 via multiple maven profiles --- .travis.yml | 27 ++++--- Readme.md | 2 +- core/pom.xml | 68 ++++++++++++++++ .../mapreduce/io/ThriftConverter.java | 6 +- .../thrift/ThriftBinaryDeserializer.java | 21 ++--- .../thrift/ThriftBinaryProtocol.java | 22 ++---- .../thrift/TestThriftBinaryProtocol.java | 79 ++----------------- .../test/java/org/apache/thrift/Fixtures.java | 5 -- .../elephantbird/thrift/ThriftCompat.java | 45 +++++++++++ .../thrift/TestThrift7BinaryProtocol.java | 75 ++++++++++++++++++ .../elephantbird/thrift/ThriftCompat.java | 20 +++++ .../pig/piggybank/BytesToThriftTuple.java | 5 +- pom.xml | 41 +++++----- 13 files changed, 282 insertions(+), 134 deletions(-) create mode 100644 core/thrift7/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java create mode 100644 core/thrift7/src/test/java/com/twitter/elephantbird/thrift/TestThrift7BinaryProtocol.java create mode 100644 core/thrift9/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java diff --git a/.travis.yml b/.travis.yml index 39792495f..829d60c59 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,11 +7,16 @@ before_install: - sudo apt-get update -qq - sudo apt-get install -qq protobuf-compiler - sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev - - wget -nv http://archive.apache.org/dist/thrift/0.7.0/thrift-0.7.0.tar.gz - - tar zxf thrift-0.7.0.tar.gz - - cd thrift-0.7.0 - - chmod +x ./configure - - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang + - git clone https://git-wip-us.apache.org/repos/asf/thrift.git + - cd thrift + - git checkout 0.7.0 + - ./bootstrap.sh + - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --prefix=/var/thrift-old + - sudo make install + - sudo make clean + - git checkout 0.9.1 + - ./bootstrap.sh + - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --prefix=/var/thrift-new - sudo make install - cd .. - sudo apt-get -qq install lzop liblzo2-dev # libzo2-dev for compiling hadoop-lzo @@ -22,12 +27,16 @@ before_install: - mv target/native/Linux-* ../hadoop-lzo-native - cd .. +install: true + script: - echo ============ Build and test with hadoop2 profile ===================== - - mvn clean test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 + - mvn clean test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 - echo ============ Build with hadoop2 and test with hadoop1 ================ - - mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop1 + - mvn test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true - echo ============ Build and test with hadoop1 profile ===================== - - mvn clean test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop1 + - mvn clean test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true - echo ============ Build with hadoop2 and test with hadoop1 ================ - - mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 + - mvn test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 + - echo ============ Build and test for thrift 0.9+ ================ + - mvn clean test -Dthrift.cmd=/var/thrift-new/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P thrift9 diff --git a/Readme.md b/Readme.md index db4b5f893..c8e51bb53 100644 --- a/Readme.md +++ b/Readme.md @@ -39,7 +39,7 @@ Elephant Bird release artifacts are published to the [Sonatype OSS](https://oss. 1. Pig 0.8+ 1. Protocol Buffers 2.5.0, 2.4.1, 2.3.0 (default build version is 2.4.1 can be changed with `-Dprotobuf.version=2.3.0`) 1. Hive 0.7 (with HIVE-1616) -1. Thrift 0.5.0, 0.6.0, 0.7.0 +1. Thrift 0.5.0, 0.6.0, 0.7.0, greater versions than 0.9 are provided via thrift9 maven profile 1. Mahout 0.6 1. Cascading2 (as the API is evolving, see libraries.properties for the currently supported version) 1. Crunch 0.8.1+ diff --git a/core/pom.xml b/core/pom.xml index f828729c2..ac29e7501 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -69,6 +69,74 @@ + + + thrift7 + + true + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + thrift7/src/main/java + + + + + add-test-source + generate-test-sources + + add-test-source + + + + thrift7/src/test/java + + + + + + + + + + thrift9 + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + thrift9/src/main/java + + + + + + + + + + diff --git a/core/src/main/java/com/twitter/elephantbird/mapreduce/io/ThriftConverter.java b/core/src/main/java/com/twitter/elephantbird/mapreduce/io/ThriftConverter.java index 0ca03c878..e3c3858f3 100644 --- a/core/src/main/java/com/twitter/elephantbird/mapreduce/io/ThriftConverter.java +++ b/core/src/main/java/com/twitter/elephantbird/mapreduce/io/ThriftConverter.java @@ -1,8 +1,5 @@ package com.twitter.elephantbird.mapreduce.io; -import java.io.IOException; - -import com.twitter.elephantbird.thrift.ThriftBinaryDeserializer; import org.apache.thrift.TBase; import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; @@ -10,6 +7,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.twitter.elephantbird.thrift.ThriftCompat; import com.twitter.elephantbird.util.TypeRef; public class ThriftConverter> implements BinaryConverter { @@ -51,7 +49,7 @@ public ThriftConverter(TypeRef typeRef) { public M fromBytes(byte[] messageBuffer) throws DecodeException { try { if (deserializer == null) - deserializer = new ThriftBinaryDeserializer(); + deserializer = ThriftCompat.createBinaryDeserializer(); M message = typeRef.safeNewInstance(); deserializer.deserialize(message, messageBuffer); return message; diff --git a/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryDeserializer.java b/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryDeserializer.java index 6ed9768a0..b68d27d69 100644 --- a/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryDeserializer.java +++ b/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryDeserializer.java @@ -4,11 +4,6 @@ import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TList; -import org.apache.thrift.protocol.TMap; -import org.apache.thrift.protocol.TProtocolFactory; -import org.apache.thrift.protocol.TSet; -import org.apache.thrift.protocol.TType; import org.apache.thrift.transport.TMemoryInputTransport; /** @@ -24,14 +19,19 @@ *
  • {@code deserialize(buf, offset, len)} method can avoid buffer copies. * Serialized struct need not span a entire byte array. * + * + * To obtain an instance of ThriftBinaryDeserializer use {@link ThriftCompat#createBinaryDeserializer()}. + * It will take care of cross version compatibility between thrift 0.7 and 0.9+ code. + * + * @see ThriftCompat */ public class ThriftBinaryDeserializer extends TDeserializer { // use protocol and transport directly instead of using ones in TDeserializer private final TMemoryInputTransport trans = new TMemoryInputTransport(); - private final TBinaryProtocol protocol = new ThriftBinaryProtocol(trans); + private final TBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(trans); - public ThriftBinaryDeserializer() { + ThriftBinaryDeserializer() { super(new ThriftBinaryProtocol.Factory()); } @@ -44,9 +44,12 @@ public void deserialize(TBase base, byte[] bytes) throws TException { * Same as {@link #deserialize(TBase, byte[])}, but much more buffer copy friendly. */ public void deserialize(TBase base, byte[] bytes, int offset, int len) throws TException { - protocol.reset(); - protocol.setReadLength(len); // reduces OutOfMemoryError exceptions + resetAndInitialize(protocol, len); trans.reset(bytes, offset, len); base.read(protocol); } + + protected void resetAndInitialize(TBinaryProtocol protocol, int newLength) { + protocol.reset(); + } } diff --git a/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryProtocol.java b/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryProtocol.java index 1ac28da57..495a771c6 100644 --- a/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryProtocol.java +++ b/core/src/main/java/com/twitter/elephantbird/thrift/ThriftBinaryProtocol.java @@ -16,10 +16,15 @@ * * Overwrites a few methods so that some malformed messages don't end up * taking excessively large amounts of cpu inside TProtocolUtil.skip(). + * + * To obtain an instance of ThriftBinaryProtocol use {@link ThriftCompat#createBinaryProtocol(TTransport)}. + * It will take care of cross version compatibility between thrift 0.7 and 0.9+ code. + * + * @see ThriftCompat */ public class ThriftBinaryProtocol extends TBinaryProtocol { - public ThriftBinaryProtocol(TTransport trans) { + ThriftBinaryProtocol(TTransport trans) { super(trans); } @@ -82,29 +87,18 @@ public TSet readSetBegin() throws TException { } /** - * Check if the container size if valid. - * - * NOTE: This assumes that the elements are one byte each. - * So this does not catch all cases, but does increase the chances of - * handling malformed lengths when the number of remaining bytes in - * the underlying Transport is clearly less than the container size - * that the Transport provides. + * Check if the container size is valid. */ protected void checkContainerSize(int size) throws TProtocolException { if (size < 0) { throw new TProtocolException("Negative container size: " + size); } - if (checkReadLength_) { - if ((readLength_ - size) < 0) { - throw new TProtocolException("Remaining message length is " + readLength_ + " but container size in underlying TTransport is set to at least: " + size); - } - } } public static class Factory implements TProtocolFactory { public TProtocol getProtocol(TTransport trans) { - return new ThriftBinaryProtocol(trans); + return ThriftCompat.createBinaryProtocol(trans); } } } diff --git a/core/src/test/java/com/twitter/elephantbird/thrift/TestThriftBinaryProtocol.java b/core/src/test/java/com/twitter/elephantbird/thrift/TestThriftBinaryProtocol.java index 511ca63ed..ae53072a6 100644 --- a/core/src/test/java/com/twitter/elephantbird/thrift/TestThriftBinaryProtocol.java +++ b/core/src/test/java/com/twitter/elephantbird/thrift/TestThriftBinaryProtocol.java @@ -21,9 +21,6 @@ public class TestThriftBinaryProtocol { - int METADATA_BYTES = 5; // type(1) + size(4) - int MAP_METADATA_BYTES = 6; // key type(1) + value type(1) + size(4) - // helper method to set container size correctly in the supplied byte array protected void setContainerSize(byte[] buf, int n) { byte[] b = ByteBuffer.allocate(4).putInt(n).array(); @@ -37,7 +34,7 @@ protected void setDataType(byte[] buf) { } // mock transport for Set and List container types - private TTransport getMockTransport(final int containerSize) throws TException { + protected TTransport getMockTransport(final int containerSize) throws TException { TTransport transport = createStrictMock(TTransport.class); // not using buffered mode for tests, so return -1 per the contract expect(transport.getBytesRemainingInBuffer()).andReturn(-1); @@ -68,7 +65,7 @@ public Integer answer() { } // mock transport for Map container type - private TTransport getMockMapTransport(final int containerSize) throws TException { + protected TTransport getMockMapTransport(final int containerSize) throws TException { TTransport transport = createStrictMock(TTransport.class); // not using buffered mode for tests, so return -1 per the contract expect(transport.getBytesRemainingInBuffer()).andReturn(-1); @@ -118,46 +115,19 @@ public void testCheckContainerSizeValid() throws TException { transport = getMockTransport(3); replay(transport); - protocol = new ThriftBinaryProtocol(transport); + protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readListBegin(); verify(transport); transport = getMockTransport(3); replay(transport); - protocol = new ThriftBinaryProtocol(transport); + protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readSetBegin(); verify(transport); transport = getMockMapTransport(3); replay(transport); - protocol = new ThriftBinaryProtocol(transport); - protocol.readMapBegin(); - verify(transport); - } - - @Test - public void testCheckContainerSizeValidWhenCheckReadLength() throws TException { - TTransport transport; - ThriftBinaryProtocol protocol; - - transport = getMockTransport(3); - replay(transport); - protocol = new ThriftBinaryProtocol(transport); - protocol.setReadLength(METADATA_BYTES + 3); - protocol.readListBegin(); - verify(transport); - - transport = getMockTransport(3); - replay(transport); - protocol = new ThriftBinaryProtocol(transport); - protocol.setReadLength(METADATA_BYTES + 3); - protocol.readSetBegin(); - verify(transport); - - transport = getMockMapTransport(3); - replay(transport); - protocol = new ThriftBinaryProtocol(transport); - protocol.setReadLength(MAP_METADATA_BYTES + 3); + protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readMapBegin(); verify(transport); } @@ -167,7 +137,7 @@ public void testCheckListContainerSizeInvalid() throws TException { // any negative value is considered invalid when checkReadLength is not enabled TTransport transport = getMockTransport(-1); replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readListBegin(); verify(transport); } @@ -176,7 +146,7 @@ public void testCheckListContainerSizeInvalid() throws TException { public void testCheckSetContainerSizeInvalid() throws TException { TTransport transport = getMockTransport(-1); replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readSetBegin(); verify(transport); } @@ -185,40 +155,7 @@ public void testCheckSetContainerSizeInvalid() throws TException { public void testCheckMapContainerSizeInvalid() throws TException { TTransport transport = getMockMapTransport(-1); replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); - protocol.readMapBegin(); - verify(transport); - } - - @Test(expected=TProtocolException.class) - public void testCheckListContainerSizeInvalidWhenCheckReadLength() throws TException { - TTransport transport = getMockTransport(400); - replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); - protocol.setReadLength(METADATA_BYTES + 3); - // this throws because size returned by Transport (400) > size per readLength (3) - protocol.readListBegin(); - verify(transport); - } - - @Test(expected=TProtocolException.class) - public void testCheckSetContainerSizeInvalidWhenCheckReadLength() throws TException { - TTransport transport = getMockTransport(400); - replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); - // this throws because size returned by Transport (400) > size per readLength (3) - protocol.setReadLength(METADATA_BYTES + 3); - protocol.readSetBegin(); - verify(transport); - } - - @Test(expected=TProtocolException.class) - public void testCheckMapContainerSizeInvalidWhenCheckReadLength() throws TException { - TTransport transport = getMockMapTransport(400); - replay(transport); - ThriftBinaryProtocol protocol = new ThriftBinaryProtocol(transport); - // this throws because size returned by Transport (400) > size per readLength (3) - protocol.setReadLength(MAP_METADATA_BYTES + 3); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); protocol.readMapBegin(); verify(transport); } diff --git a/core/src/test/java/org/apache/thrift/Fixtures.java b/core/src/test/java/org/apache/thrift/Fixtures.java index ed512adcd..41417ff0b 100644 --- a/core/src/test/java/org/apache/thrift/Fixtures.java +++ b/core/src/test/java/org/apache/thrift/Fixtures.java @@ -31,7 +31,6 @@ public class Fixtures { public static final OneOfEach oneOfEach; public static final Nesting nesting; public static final HolyMoley holyMoley; - public static final CompactProtoTestStruct compactProtoTestStruct; private static final byte[] kUnicodeBytes = { (byte)0xd3, (byte)0x80, (byte)0xe2, (byte)0x85, (byte)0xae, (byte)0xce, @@ -113,10 +112,6 @@ public class Fixtures { b.type = 5; b.message = "nevermore"; holyMoley.bonks.put("three", stage2); - - // superhuge compact proto test struct - compactProtoTestStruct = new CompactProtoTestStruct(thrift.test.Constants.COMPACT_TEST); - compactProtoTestStruct.a_binary = ByteBuffer.wrap(new byte[]{0,1,2,3,4,5,6,7,8}); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/core/thrift7/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java b/core/thrift7/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java new file mode 100644 index 000000000..1c1c97b33 --- /dev/null +++ b/core/thrift7/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java @@ -0,0 +1,45 @@ +package com.twitter.elephantbird.thrift; + +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.transport.TTransport; + +/** + * ThriftCompat is used to be a bridge between thrift cross version compatible code, which can leave in the main source + * directory, and isolated incompatible code. + * + * Version incompatible code should be isolated in ThriftCompat under folder thrift7 or thrift9, depending for what version + * your specific code is. + */ +public class ThriftCompat { + + public static ThriftBinaryDeserializer createBinaryDeserializer() { + return new ThriftBinaryDeserializer() { + protected void resetAndInitialize(TBinaryProtocol protocol, int newLength) { + super.resetAndInitialize(protocol, newLength); + protocol.setReadLength(newLength); // reduces OutOfMemoryError exceptions + } + }; + } + + public static ThriftBinaryProtocol createBinaryProtocol(TTransport tTransport) { + return new ThriftBinaryProtocol(tTransport) { + /** + * NOTE: This assumes that the elements are one byte each. + * So this does not catch all cases, but does increase the chances of + * handling malformed lengths when the number of remaining bytes in + * the underlying Transport is clearly less than the container size + * that the Transport provides. + */ + protected void checkContainerSize(int size) throws TProtocolException { + super.checkContainerSize(size); + if (checkReadLength_ && (readLength_ - size) < 0) { + + throw new TProtocolException( + "Remaining message length is " + readLength_ + " but container size in underlying TTransport is set to at least: " + size + ); + } + } + }; + } +} diff --git a/core/thrift7/src/test/java/com/twitter/elephantbird/thrift/TestThrift7BinaryProtocol.java b/core/thrift7/src/test/java/com/twitter/elephantbird/thrift/TestThrift7BinaryProtocol.java new file mode 100644 index 000000000..83f3c129a --- /dev/null +++ b/core/thrift7/src/test/java/com/twitter/elephantbird/thrift/TestThrift7BinaryProtocol.java @@ -0,0 +1,75 @@ +package com.twitter.elephantbird.thrift; + +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.transport.TTransport; +import org.junit.Test; + +import static org.easymock.EasyMock.replay; +import static org.easymock.EasyMock.verify; + +public class TestThrift7BinaryProtocol extends TestThriftBinaryProtocol { + + int METADATA_BYTES = 5; // type(1) + size(4) + int MAP_METADATA_BYTES = 6; // key type(1) + value type(1) + size(4) + + @Test + public void testCheckContainerSizeValidWhenCheckReadLength() throws TException { + TTransport transport; + ThriftBinaryProtocol protocol; + + transport = getMockTransport(3); + replay(transport); + protocol = ThriftCompat.createBinaryProtocol(transport); + protocol.setReadLength(METADATA_BYTES + 3); + protocol.readListBegin(); + verify(transport); + + transport = getMockTransport(3); + replay(transport); + protocol = ThriftCompat.createBinaryProtocol(transport); + protocol.setReadLength(METADATA_BYTES + 3); + protocol.readSetBegin(); + verify(transport); + + transport = getMockMapTransport(3); + replay(transport); + protocol = ThriftCompat.createBinaryProtocol(transport); + protocol.setReadLength(MAP_METADATA_BYTES + 3); + protocol.readMapBegin(); + verify(transport); + } + + @Test(expected=TProtocolException.class) + public void testCheckListContainerSizeInvalidWhenCheckReadLength() throws TException { + TTransport transport = getMockTransport(400); + replay(transport); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); + protocol.setReadLength(METADATA_BYTES + 3); + // this throws because size returned by Transport (400) > size per readLength (3) + protocol.readListBegin(); + verify(transport); + } + + @Test(expected=TProtocolException.class) + public void testCheckSetContainerSizeInvalidWhenCheckReadLength() throws TException { + TTransport transport = getMockTransport(400); + replay(transport); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); + // this throws because size returned by Transport (400) > size per readLength (3) + protocol.setReadLength(METADATA_BYTES + 3); + protocol.readSetBegin(); + verify(transport); + } + + @Test(expected=TProtocolException.class) + public void testCheckMapContainerSizeInvalidWhenCheckReadLength() throws TException { + TTransport transport = getMockMapTransport(400); + replay(transport); + ThriftBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(transport); + // this throws because size returned by Transport (400) > size per readLength (3) + protocol.setReadLength(MAP_METADATA_BYTES + 3); + protocol.readMapBegin(); + verify(transport); + } +} diff --git a/core/thrift9/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java b/core/thrift9/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java new file mode 100644 index 000000000..9d53d6399 --- /dev/null +++ b/core/thrift9/src/main/java/com/twitter/elephantbird/thrift/ThriftCompat.java @@ -0,0 +1,20 @@ +package com.twitter.elephantbird.thrift; + +import org.apache.thrift.transport.TTransport; + +/** + * ThriftCompat is used to be a bridge between thrift cross version compatible code, which can live in the main source + * directory, and isolated incompatible code. + * + * Version incompatible code should be isolated in ThriftCompat under folder thrift7 or thrift9, depending for what version + * your specific code is. + */ +public class ThriftCompat { + public static ThriftBinaryDeserializer createBinaryDeserializer() { + return new ThriftBinaryDeserializer(); + } + + public static ThriftBinaryProtocol createBinaryProtocol(TTransport tTransport) { + return new ThriftBinaryProtocol(tTransport); + } +} diff --git a/pig/src/main/java/com/twitter/elephantbird/pig/piggybank/BytesToThriftTuple.java b/pig/src/main/java/com/twitter/elephantbird/pig/piggybank/BytesToThriftTuple.java index edfa9c7b7..29a1a5a11 100644 --- a/pig/src/main/java/com/twitter/elephantbird/pig/piggybank/BytesToThriftTuple.java +++ b/pig/src/main/java/com/twitter/elephantbird/pig/piggybank/BytesToThriftTuple.java @@ -2,14 +2,13 @@ import java.io.IOException; -import com.twitter.elephantbird.thrift.ThriftBinaryDeserializer; +import com.twitter.elephantbird.thrift.ThriftCompat; import org.apache.pig.EvalFunc; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.thrift.TBase; import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; import com.twitter.elephantbird.pig.util.ThriftToPig; import com.twitter.elephantbird.util.TypeRef; @@ -30,7 +29,7 @@ */ public abstract class BytesToThriftTuple> extends EvalFunc { - private final TDeserializer deserializer_ = new ThriftBinaryDeserializer(); + private final TDeserializer deserializer_ = ThriftCompat.createBinaryDeserializer(); private ThriftToPig thriftToTuple_; private TypeRef typeRef_; diff --git a/pom.xml b/pom.xml index 68bc26111..2c8798171 100644 --- a/pom.xml +++ b/pom.xml @@ -78,14 +78,19 @@ 1.6.4 1.1.2 2.0.3-alpha + ${apache.hadoop1.version} 0.11.1 + 0.8.2 + ${apache.crunch.hadoop1.version} 0.8.2-hadoop2 0.8.0 0.6 4.0.0 2.4.1 0.4.19 + 0.7.0 + thrift @@ -248,7 +253,7 @@ org.apache.thrift libthrift - 0.7.0 + ${thrift.version} com.googlecode.json-simple @@ -415,21 +420,6 @@ - - hadoop1 - - true - - - - ${apache.hadoop1.version} - - - - ${apache.crunch.hadoop1.version} - - - hadoop2 @@ -442,6 +432,21 @@ + + thrift7 + + true + + + 0.7.0 + + + + thrift9 + + 0.9.1 + + @@ -583,7 +588,7 @@ org.codehaus.mojo build-helper-maven-plugin - 1.7 + 1.9.1 @@ -599,7 +604,7 @@ maven-thrift-plugin 0.1.10 - thrift + ${thrift.cmd} From ff41cf21d45a1586768fdad9eea97379bc093c6f Mon Sep 17 00:00:00 2001 From: EugenCepoi Date: Wed, 11 Nov 2015 17:26:43 -0800 Subject: [PATCH 2/4] Using travis matrix feature to speedup the build --- .travis.yml | 35 ++++++++++++++--------------------- pom.xml | 3 +-- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/.travis.yml b/.travis.yml index 829d60c59..f77221a65 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,14 @@ language: java -jdk: - - openjdk7 - before_install: - sudo apt-get update -qq - sudo apt-get install -qq protobuf-compiler - sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev - git clone https://git-wip-us.apache.org/repos/asf/thrift.git - cd thrift - - git checkout 0.7.0 - - ./bootstrap.sh - - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --prefix=/var/thrift-old - - sudo make install - - sudo make clean - - git checkout 0.9.1 + - git checkout $THRIFT_TAG - ./bootstrap.sh - - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --prefix=/var/thrift-new + - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang - sudo make install - cd .. - sudo apt-get -qq install lzop liblzo2-dev # libzo2-dev for compiling hadoop-lzo @@ -29,14 +21,15 @@ before_install: install: true -script: - - echo ============ Build and test with hadoop2 profile ===================== - - mvn clean test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 - - echo ============ Build with hadoop2 and test with hadoop1 ================ - - mvn test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true - - echo ============ Build and test with hadoop1 profile ===================== - - mvn clean test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true - - echo ============ Build with hadoop2 and test with hadoop1 ================ - - mvn test -Dthrift.cmd=/var/thrift-old/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2 - - echo ============ Build and test for thrift 0.9+ ================ - - mvn clean test -Dthrift.cmd=/var/thrift-new/bin/thrift -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P thrift9 +matrix: + include: + - jdk: openjdk7 + env: THRIFT_TAG=0.7.0 THRIFT_PROFILE=-Pthrift7 + - jdk: openjdk7 + env: THRIFT_TAG=0.7.0 THRIFT_PROFILE=-Pthrift7 HADOOP_PROFILE=-Phadoop2 + - jdk: openjdk7 + env: THRIFT_TAG=0.9.1 THRIFT_PROFILE=-Pthrift9 + - jdk: openjdk7 + env: THRIFT_TAG=0.9.1 THRIFT_PROFILE=-Pthrift9 HADOOP_PROFILE=-Phadoop2 + +script: "mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true $HADOOP_PROFILE $THRIFT_PROFILE" diff --git a/pom.xml b/pom.xml index 2c8798171..7b6f09159 100644 --- a/pom.xml +++ b/pom.xml @@ -90,7 +90,6 @@ 2.4.1 0.4.19 0.7.0 - thrift @@ -604,7 +603,7 @@ maven-thrift-plugin 0.1.10 - ${thrift.cmd} + thrift From 35c441c4f5e1c11f04a118a1b142232c263d56f9 Mon Sep 17 00:00:00 2001 From: EugenCepoi Date: Mon, 14 Dec 2015 18:54:30 -0800 Subject: [PATCH 3/4] Automatic release against thrift 7 and 9 --- core/pom.xml | 11 ++ pom.xml | 9 +- release.sh | 285 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 303 insertions(+), 2 deletions(-) create mode 100755 release.sh diff --git a/core/pom.xml b/core/pom.xml index ac29e7501..5287954bf 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -132,6 +132,17 @@ + + maven-jar-plugin + + + default-jar + + thrift9 + + + + diff --git a/pom.xml b/pom.xml index 7b6f09159..b96de933e 100644 --- a/pom.xml +++ b/pom.xml @@ -88,8 +88,10 @@ 0.6 4.0.0 2.4.1 + protoc 0.4.19 0.7.0 + thrift @@ -594,7 +596,10 @@ com.github.igor-petruk.protobuf protobuf-maven-plugin - 0.4 + 0.6.5 + + ${protoc.executable} + @@ -603,7 +608,7 @@ maven-thrift-plugin 0.1.10 - thrift + ${thrift.executable} diff --git a/release.sh b/release.sh new file mode 100755 index 000000000..cc6ec37a0 --- /dev/null +++ b/release.sh @@ -0,0 +1,285 @@ +#!/usr/bin/env bash + +set -ex + +# Trying to install it for the user so he doesn't have to bother with installing stuff by hand +sudo apt-get -qq install xmlstarlet + +# Global default vars used in this script +###################################################################################### +BASE_DIR=$PWD +WORK_DIR=/tmp/elephant-bird_release +COMMAND="release" +ACTUAL_VERSION=$(xmlstarlet sel -t -v "/_:project/_:version" pom.xml) +RELEASE_VERSION=$(echo $ACTUAL_VERSION|sed 's/-SNAPSHOT//') +BASE_BRANCH=$(git rev-parse --abbrev-ref HEAD) +DIRTY_SCM=false +GIT_REPO=git@github.com:twitter/elephant-bird.git +THRIFT7_PATH="/tmp/thrift7" +THRIFT9_PATH="/tmp/thrift9" +HADOOP_LZO_PATH="/tmp/hadoop-lzo-native" +PROTOBUF_PATH="/tmp/protobuf" + +###################################################################################### + +while [[ $# > 1 ]]; do +key="$1" + +case $key in + -c|--command) + COMMAND="$2" + ;; + -r|--release-version) + RELEASE_VERSION="$2" + ;; + -n|--next-version) + NEXT_DEV_VERSION="$2" + ;; + -d|--dirty-scm) + DIRTY_SCM="$2" + ;; + *) + echo "Unknown parameter $key" + echo "Usage: ./release.sh [OPTION] +-c,--comand commandName + possible values test|install|deploy|release +-r,--release-version versionNumber + will be used to deploy the artifacts and make the release branch +-n,--next-version versionNumber + will be used as the new version after the release +-d,--dirty-scm true/false + false by default, all changes must be in sync with origin" + exit 1 + ;; +esac +shift 2 +done + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +function checkNoUncommitedChanges { + if [ "$DIRTY_SCM" == "false" ]; then + echo "Checking that there are no uncommited changes" + git diff-index --quiet origin/$BASE_BRANCH -- + local RET=$? + if [ $RET != 0 ]; then + echo "You have uncommited changes, please commit and push to origin everything before deploying the doc." + exit $RET; + fi; + fi; +} + +# We don't care here about updating dependencies versions as we use the project version for dependencies between modules +function updateVersions { + local PROJECT=$1 + local PROJECT_POM=$1/pom.xml + local TARGET_VERSION=$2 + + xmlstarlet edit -L -u "/_:project/_:version" -v $TARGET_VERSION $PROJECT_POM + + for MODULE in $(xmlstarlet sel -t -v "/_:project/_:modules/_:module" $PROJECT_POM); do + # update here the parent reference version + xmlstarlet edit -L -u "/_:project/_:parent/_:version" -v $TARGET_VERSION "$PROJECT/$MODULE/pom.xml" + + updateVersions "$PROJECT/$MODULE" $TARGET_VERSION + done; +} + +function prepareFromLocal { + if [ -d $WORK_DIR ]; then + rm -Rf $WORK_DIR + fi + + mkdir $WORK_DIR + + cp -R . $WORK_DIR + cd $WORK_DIR +} + +function prepareFromRemote { + if [ -d $WORK_DIR ]; then + rm -Rf $WORK_DIR + fi + + git clone $GIT_REPO $WORK_DIR + cd $WORK_DIR +} + + +# Install deps required to build and run native thrift +function installNativeThrift { + local CURR_DIR=$PWD + + if [ ! -d $THRIFT7_PATH ] || [ ! -d $THRIFT9_PATH ]; then + cd /tmp + sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev + git clone https://git-wip-us.apache.org/repos/asf/thrift.git + cd thrift + + if [ ! -d $THRIFT7_PATH ]; then + git checkout 0.7.0 + ./bootstrap.sh + ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-python --without-erlang --prefix=$THRIFT7_PATH JAVA_PREFIX=$THRIFT7_PATH/lib/ + + # See https://issues.apache.org/jira/browse/THRIFT-1614 a solution would be to use two different versions of automake + # but this would be more complex. The other option is to change the include in thriftl.cc but I don't like that much either. + set +e + make install > /dev/null 2>&1 + set -e + mv compiler/cpp/thrifty.hh compiler/cpp/thrifty.h + + make install + make clean + fi + + if [ ! -d $THRIFT9_PATH ]; then + git checkout 0.9.1 + ./bootstrap.sh + ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-python --without-erlang --prefix=$THRIFT9_PATH JAVA_PREFIX=$THRIFT9_PATH/lib/ + make install + fi + + cd .. + rm -Rf thrift + fi + + cd $CURR_DIR +} + +function installProtobuf { + local CURR_DIR=$PWD + + if [ ! -d $PROTOBUF_PATH ]; then + cd /tmp + wget https://github.com/google/protobuf/releases/download/v2.4.1/protobuf-2.4.1.tar.gz -O - | tar -xz + cd protobuf-2.4.1 + ./configure --prefix=$PROTOBUF_PATH + make install + cd .. + rm -Rf protobuf-2.4.1 + fi + + cd $CURR_DIR +} + +# Install deps required to build hadoop lzo and native libgplcompression +function installHadoopLzo { + local CURR_DIR=$PWD + + if [ ! -d $HADOOP_LZO_PATH ]; then + + if [ -z ${JAVA_HOME+x} ]; then + echo "Please enter a value for JAVA_HOME:" + read JAVA_HOME + fi + + cd /tmp + sudo apt-get -qq install lzop liblzo2-dev + git clone git://github.com/twitter/hadoop-lzo.git + cd hadoop-lzo + mvn compile + mv target/native/Linux-* $HADOOP_LZO_PATH + cd .. + rm -Rf hadoop-lzo + fi + + cd $CURR_DIR +} + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +__MVN_THRIFT7="-Pthrift7 -Dthrift.executable=$THRIFT7_PATH/bin/thrift" +__MVN_THRIFT9="-Pthrift9 -Dthrift.executable=$THRIFT9_PATH/bin/thrift" +__MVN_HADOOP_LZO="-Dtest.library.path=$HADOOP_LZO_PATH/lib -Drequire.lzo.tests=true" +__MVN_PROTOC_EXECUTABLE="-Dprotoc.executable=$PROTOBUF_PATH/bin/protoc" + +case "$COMMAND" in +"test") + prepareFromLocal + git checkout $BASE_BRANCH + + installNativeThrift + installHadoopLzo + installProtobuf + + mvn clean test $__MVN_THRIFT7 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + mvn clean test $__MVN_THRIFT9 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + ;; +"install") + echo "Will install current version" + prepareFromLocal + git checkout $BASE_BRANCH + + installNativeThrift + installHadoopLzo + installProtobuf + + mvn clean install $__MVN_THRIFT7 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + mvn clean install $__MVN_THRIFT9 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + ;; +"deploy") + echo "Will deploy current version" + prepareFromLocal + git checkout $BASE_BRANCH + + installNativeThrift + installHadoopLzo + installProtobuf + + mvn clean deploy $__MVN_THRIFT7 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + mvn clean deploy $__MVN_THRIFT9 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE + ;; +"release") + while [ -z ${NEXT_DEV_VERSION+x} ] || [[ $NEXT_DEV_VERSION != *"-SNAPSHOT" ]]; do + echo "What is the next dev version (must be of the standard form XXX-SNAPSHOT)?" + read NEXT_DEV_VERSION + done + + echo "Will run full release including: release branch, deploy artifacts and update current branch to next version" + + checkNoUncommitedChanges + prepareFromRemote + + # We want to make the release from the initial branch, here we are in the working copy, not the original directory + git checkout $BASE_BRANCH + + installNativeThrift + installHadoopLzo + installProtobuf + + # Update the version to use the release version, sync with scm and deploy + updateVersions . $RELEASE_VERSION + + git add pom.xml **/pom.xml + git commit -m "[Release] - Prepare release $RELEASE_VERSION" + git tag "elephant-bird-$RELEASE_VERSION" + + mvn clean deploy $__MVN_THRIFT7 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE -DperformRelease=true + mvn clean deploy $__MVN_THRIFT9 $__MVN_HADOOP_LZO $__MVN_PROTOC_EXECUTABLE -DperformRelease=true + + git push origin $BASE_BRANCH + git push origin "elephant-bird-$RELEASE_VERSION" + + + # Update to the next development version and push those changes to master + updateVersions . $NEXT_DEV_VERSION + git add pom.xml **/pom.xml + git commit -m "[Release] - $RELEASE_VERSION, prepare for next development iteration $NEXT_DEV_VERSION" + + git push origin $BASE_BRANCH + + # Until here we are supposed to be able to easily revert things as we still have our unchanged clone + cd $BASE_DIR + git pull origin $BASE_BRANCH + ;; +*) + echo "Unknown command: $COMMAND" + exit 1; +esac + +# Cleaning after us (in case of an error we want the src to remain so we can debug things) +rm -Rf /tmp/elephant-bird_release From a0f954bcd02d834cf7e55c3a69a7f12723eef75c Mon Sep 17 00:00:00 2001 From: EugenCepoi Date: Mon, 14 Dec 2015 19:36:36 -0800 Subject: [PATCH 4/4] An attempt to fix the buffer overflow errors during pig tests in travis --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index f77221a65..fbd3ab5e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,10 @@ language: java before_install: +# An attempt to fix the buffer overflow in the pig tests +# /usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/libnet.so(Java_java_net_Inet4AddressImpl_getLocalHostName+0x190)[ +# https://github.com/travis-ci/travis-ci/issues/1484 + - echo "127.0.0.1 " `hostname` | sudo tee /etc/hosts - sudo apt-get update -qq - sudo apt-get install -qq protobuf-compiler - sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev