Skip to content

Commit

Permalink
Merge pull request #455 from EugenCepoi/master
Browse files Browse the repository at this point in the history
Fixes #452 - supporting thrift 0.7 and 0.9
  • Loading branch information
ianoc committed Dec 17, 2015
2 parents 5f20160 + a0f954b commit 2c28929
Show file tree
Hide file tree
Showing 14 changed files with 587 additions and 142 deletions.
38 changes: 22 additions & 16 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
language: java

jdk:
- openjdk7

before_install:
# An attempt to fix the buffer overflow in the pig tests
# /usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/libnet.so(Java_java_net_Inet4AddressImpl_getLocalHostName+0x190)[
# https://github.com/travis-ci/travis-ci/issues/1484
- echo "127.0.0.1 " `hostname` | sudo tee /etc/hosts
- sudo apt-get update -qq
- sudo apt-get install -qq protobuf-compiler
- sudo apt-get install -qq libboost-dev libboost-test-dev libboost-program-options-dev libevent-dev automake libtool flex bison pkg-config g++ libssl-dev
- wget -nv http://archive.apache.org/dist/thrift/0.7.0/thrift-0.7.0.tar.gz
- tar zxf thrift-0.7.0.tar.gz
- cd thrift-0.7.0
- chmod +x ./configure
- git clone https://git-wip-us.apache.org/repos/asf/thrift.git
- cd thrift
- git checkout $THRIFT_TAG
- ./bootstrap.sh
- ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang
- sudo make install
- cd ..
Expand All @@ -22,12 +23,17 @@ before_install:
- mv target/native/Linux-* ../hadoop-lzo-native
- cd ..

script:
- echo ============ Build and test with hadoop2 profile =====================
- mvn clean test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2
- echo ============ Build with hadoop2 and test with hadoop1 ================
- mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop1
- echo ============ Build and test with hadoop1 profile =====================
- mvn clean test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop1
- echo ============ Build with hadoop2 and test with hadoop1 ================
- mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true -P hadoop2
install: true

matrix:
include:
- jdk: openjdk7
env: THRIFT_TAG=0.7.0 THRIFT_PROFILE=-Pthrift7
- jdk: openjdk7
env: THRIFT_TAG=0.7.0 THRIFT_PROFILE=-Pthrift7 HADOOP_PROFILE=-Phadoop2
- jdk: openjdk7
env: THRIFT_TAG=0.9.1 THRIFT_PROFILE=-Pthrift9
- jdk: openjdk7
env: THRIFT_TAG=0.9.1 THRIFT_PROFILE=-Pthrift9 HADOOP_PROFILE=-Phadoop2

script: "mvn test -Dtest.library.path=$PWD/hadoop-lzo-native/lib -Drequire.lzo.tests=true $HADOOP_PROFILE $THRIFT_PROFILE"
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Elephant Bird release artifacts are published to the [Sonatype OSS](https://oss.
1. Pig 0.8+
1. Protocol Buffers 2.5.0, 2.4.1, 2.3.0 (default build version is 2.4.1 can be changed with `-Dprotobuf.version=2.3.0`)
1. Hive 0.7 (with HIVE-1616)
1. Thrift 0.5.0, 0.6.0, 0.7.0
1. Thrift 0.5.0, 0.6.0, 0.7.0, greater versions than 0.9 are provided via thrift9 maven profile
1. Mahout 0.6
1. Cascading2 (as the API is evolving, see libraries.properties for the currently supported version)
1. Crunch 0.8.1+
Expand Down
79 changes: 79 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,85 @@

</dependencies>

<profiles>
<profile>
<id>thrift7</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>thrift7/src/main/java</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>thrift7/src/test/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>thrift9</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>thrift9/src/main/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>default-jar</id>
<configuration>
<classifier>thrift9</classifier>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

<build>
<plugins>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package com.twitter.elephantbird.mapreduce.io;

import java.io.IOException;

import com.twitter.elephantbird.thrift.ThriftBinaryDeserializer;
import org.apache.thrift.TBase;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.thrift.ThriftCompat;
import com.twitter.elephantbird.util.TypeRef;

public class ThriftConverter<M extends TBase<?, ?>> implements BinaryConverter<M> {
Expand Down Expand Up @@ -51,7 +49,7 @@ public ThriftConverter(TypeRef<M> typeRef) {
public M fromBytes(byte[] messageBuffer) throws DecodeException {
try {
if (deserializer == null)
deserializer = new ThriftBinaryDeserializer();
deserializer = ThriftCompat.createBinaryDeserializer();
M message = typeRef.safeNewInstance();
deserializer.deserialize(message, messageBuffer);
return message;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TList;
import org.apache.thrift.protocol.TMap;
import org.apache.thrift.protocol.TProtocolFactory;
import org.apache.thrift.protocol.TSet;
import org.apache.thrift.protocol.TType;
import org.apache.thrift.transport.TMemoryInputTransport;

/**
Expand All @@ -24,14 +19,19 @@
* <li> {@code deserialize(buf, offset, len)} method can avoid buffer copies.
* Serialized struct need not span a entire byte array.
* </ul>
*
* To obtain an instance of ThriftBinaryDeserializer use {@link ThriftCompat#createBinaryDeserializer()}.
* It will take care of cross version compatibility between thrift 0.7 and 0.9+ code.
*
* @see ThriftCompat
*/
public class ThriftBinaryDeserializer extends TDeserializer {

// use protocol and transport directly instead of using ones in TDeserializer
private final TMemoryInputTransport trans = new TMemoryInputTransport();
private final TBinaryProtocol protocol = new ThriftBinaryProtocol(trans);
private final TBinaryProtocol protocol = ThriftCompat.createBinaryProtocol(trans);

public ThriftBinaryDeserializer() {
ThriftBinaryDeserializer() {
super(new ThriftBinaryProtocol.Factory());
}

Expand All @@ -44,9 +44,12 @@ public void deserialize(TBase base, byte[] bytes) throws TException {
* Same as {@link #deserialize(TBase, byte[])}, but much more buffer copy friendly.
*/
public void deserialize(TBase base, byte[] bytes, int offset, int len) throws TException {
protocol.reset();
protocol.setReadLength(len); // reduces OutOfMemoryError exceptions
resetAndInitialize(protocol, len);
trans.reset(bytes, offset, len);
base.read(protocol);
}

protected void resetAndInitialize(TBinaryProtocol protocol, int newLength) {
protocol.reset();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@
*
* Overwrites a few methods so that some malformed messages don't end up
* taking excessively large amounts of cpu inside TProtocolUtil.skip().
*
* To obtain an instance of ThriftBinaryProtocol use {@link ThriftCompat#createBinaryProtocol(TTransport)}.
* It will take care of cross version compatibility between thrift 0.7 and 0.9+ code.
*
* @see ThriftCompat
*/
public class ThriftBinaryProtocol extends TBinaryProtocol {

public ThriftBinaryProtocol(TTransport trans) {
ThriftBinaryProtocol(TTransport trans) {
super(trans);
}

Expand Down Expand Up @@ -82,29 +87,18 @@ public TSet readSetBegin() throws TException {
}

/**
* Check if the container size if valid.
*
* NOTE: This assumes that the elements are one byte each.
* So this does not catch all cases, but does increase the chances of
* handling malformed lengths when the number of remaining bytes in
* the underlying Transport is clearly less than the container size
* that the Transport provides.
* Check if the container size is valid.
*/
protected void checkContainerSize(int size) throws TProtocolException {
if (size < 0) {
throw new TProtocolException("Negative container size: " + size);
}
if (checkReadLength_) {
if ((readLength_ - size) < 0) {
throw new TProtocolException("Remaining message length is " + readLength_ + " but container size in underlying TTransport is set to at least: " + size);
}
}
}

public static class Factory implements TProtocolFactory {

public TProtocol getProtocol(TTransport trans) {
return new ThriftBinaryProtocol(trans);
return ThriftCompat.createBinaryProtocol(trans);
}
}
}
Loading

0 comments on commit 2c28929

Please sign in to comment.