AVRO-2048: Avro Binary Decoding - Gracefully Handle Long Strings
Project: http://git-wip-us.apache.org/repos/asf/avro/repo Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/14488e35 Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/14488e35 Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/14488e35 Branch: refs/heads/master Commit: 14488e35bc31f299de8cd88bd6d1ac07576eaa3e Parents: 91d4cc0 Author: BELUGA BEHR <[email protected]> Authored: Thu Jul 27 10:02:30 2017 +0200 Committer: Gabor Szadovszky <[email protected]> Committed: Thu Jul 27 10:17:49 2017 +0200 ---------------------------------------------------------------------- CHANGES.txt | 3 +++ .../java/org/apache/avro/io/BinaryDecoder.java | 23 ++++++++++++++++---- .../org/apache/avro/io/TestBinaryDecoder.java | 10 ++++++++- 3 files changed, 31 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/avro/blob/14488e35/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 72dd022..b91ba5b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -70,6 +70,9 @@ Trunk (not yet released) AVRO-2049: Remove Superfluous Configuration From AvroSerializer (Beluga Behr via gabor) + AVRO-2048: Avro Binary Decoding - Gracefully Handle Long Strings + (Beluga Behr via gabor) + BUG FIXES AVRO-1741: Python3: Fix error when codec is not in the header. http://git-wip-us.apache.org/repos/asf/avro/blob/14488e35/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java index 3711d2b..a2d935a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java @@ -38,6 +38,15 @@ import org.apache.avro.util.Utf8; */ public class BinaryDecoder extends Decoder { + + /** + * The maximum size of array to allocate. + * Some VMs reserve some header words in an array. + * Attempts to allocate larger arrays may result in + * OutOfMemoryError: Requested array size exceeds VM limit + */ + private static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L; + private ByteSource source = null; // we keep the buffer and its state variables in this class and not in a // container class for performance reasons. This improves performance @@ -256,11 +265,17 @@ public class BinaryDecoder extends Decoder { @Override public Utf8 readString(Utf8 old) throws IOException { - int length = readInt(); + long length = readLong(); + if (length > MAX_ARRAY_SIZE) { + throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes"); + } + if (length < 0L) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } Utf8 result = (old != null ? old : new Utf8()); - result.setByteLength(length); - if (0 != length) { - doReadBytes(result.getBytes(), 0, length); + result.setByteLength((int) length); + if (0L != length) { + doReadBytes(result.getBytes(), 0, (int) length); } return result; } http://git-wip-us.apache.org/repos/asf/avro/blob/14488e35/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java index 572be60..fe5e4be 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java @@ -358,7 +358,7 @@ public class TestBinaryDecoder { } @Test - public void testBadLengthEncoding() throws IOException { + public void testNegativeLengthEncoding() throws IOException { byte[] bad = new byte[] { (byte)1 }; Decoder bd = factory.binaryDecoder(bad, null); String message = ""; @@ -370,6 +370,14 @@ public class TestBinaryDecoder { Assert.assertEquals("Malformed data. Length is negative: -1", message); } + @Test(expected=UnsupportedOperationException.class) + public void testLongLengthEncoding() throws IOException { + // Size equivalent to Integer.MAX_VALUE + 1 + byte[] bad = new byte[] { (byte) -128, (byte) -128, (byte) -128, (byte) -128, (byte) 16 }; + Decoder bd = factory.binaryDecoder(bad, null); + bd.readString(); + } + @Test(expected=EOFException.class) public void testIntTooShort() throws IOException { byte[] badint = new byte[4];
