Author: todd Date: Wed Jul 23 00:29:00 2014 New Revision: 1612732 URL: http://svn.apache.org/r1612732 Log: HADOOP-10855. Allow Text to be read with a known Length. Contributed by Todd Lipcon.
Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1612732&r1=1612731&r2=1612732&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt Wed Jul 23 00:29:00 2014 @@ -42,6 +42,8 @@ Release 2.6.0 - UNRELEASED HADOOP-10755. Support negative caching of user-group mapping. (Lei Xu via wang) + HADOOP-10855. Allow Text to be read with a known Length. (todd) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java?rev=1612732&r1=1612731&r2=1612732&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java Wed Jul 23 00:29:00 2014 @@ -288,9 +288,7 @@ public class Text extends BinaryComparab @Override public void readFields(DataInput in) throws IOException { int newLength = WritableUtils.readVInt(in); - setCapacity(newLength, false); - in.readFully(bytes, 0, newLength); - length = newLength; + readWithKnownLength(in, newLength); } public void readFields(DataInput in, int maxLength) throws IOException { @@ -302,9 +300,7 @@ public class Text extends BinaryComparab throw new IOException("tried to deserialize " + newLength + " bytes of data, but maxLength = " + maxLength); } - setCapacity(newLength, false); - in.readFully(bytes, 0, newLength); - length = newLength; + readWithKnownLength(in, newLength); } /** Skips over one Text in the input. */ @@ -313,6 +309,17 @@ public class Text extends BinaryComparab WritableUtils.skipFully(in, length); } + /** + * Read a Text object whose length is already known. + * This allows creating Text from a stream which uses a different serialization + * format. + */ + public void readWithKnownLength(DataInput in, int len) throws IOException { + setCapacity(len, false); + in.readFully(bytes, 0, len); + length = len; + } + /** serialize * write this object to out * length uses zero-compressed encoding Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java?rev=1612732&r1=1612731&r2=1612732&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java Wed Jul 23 00:29:00 2014 @@ -24,6 +24,7 @@ import java.nio.BufferUnderflowException import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Random; +import com.google.common.base.Charsets; import com.google.common.primitives.Bytes; /** Unit tests for LargeUTF8. */ @@ -363,6 +364,27 @@ public class TestText extends TestCase { fail("testReadWriteOperations error !!!"); } } + + public void testReadWithKnownLength() throws IOException { + String line = "hello world"; + byte[] inputBytes = line.getBytes(Charsets.UTF_8); + DataInputBuffer in = new DataInputBuffer(); + Text text = new Text(); + + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 5); + assertEquals("hello", text.toString()); + + // Read longer length, make sure it lengthens + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 7); + assertEquals("hello w", text.toString()); + + // Read shorter length, make sure it shortens + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 2); + assertEquals("he", text.toString()); + } /** * test {@code Text.bytesToCodePoint(bytes) }