(qpid-protonj2) branch main updated: PROTON-2837 Fix issue with buffer allocation for multi-byte encodings

tabish Fri, 12 Jul 2024 09:48:54 -0700

This is an automated email from the ASF dual-hosted git repository.

tabish pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/qpid-protonj2.git



The following commit(s) were added to refs/heads/main by this push:
     new 93e05f8f PROTON-2837 Fix issue with buffer allocation for multi-byte 
encodings
93e05f8f is described below

commit 93e05f8f46b79a4e6f83dd099e95451014ed98c6
Author: Timothy Bish <[email protected]>
AuthorDate: Fri Jul 12 12:30:58 2024 -0400

    PROTON-2837 Fix issue with buffer allocation for multi-byte encodings
    
    When decoding some multi-byte encoded UTF8 strings the decoder is using the
    wrong value to allocat a buffer to hold the resulting string decoded from
    the multi byte segment plus and previously decoded single byte values.
    
    Adds several tests and some additional validation on inputs to decoder
---
 .../codec/decoders/ProtonDecoderState.java         |  25 +++-
 .../codec/primitives/StringTypeCodecTest.java      | 144 ++++++++++++++++++++-
 2 files changed, 162 insertions(+), 7 deletions(-)

diff --git 
a/protonj2/src/main/java/org/apache/qpid/protonj2/codec/decoders/ProtonDecoderState.java
 
b/protonj2/src/main/java/org/apache/qpid/protonj2/codec/decoders/ProtonDecoderState.java
index fd5d2668..d4b009f9 100644
--- 
a/protonj2/src/main/java/org/apache/qpid/protonj2/codec/decoders/ProtonDecoderState.java
+++ 
b/protonj2/src/main/java/org/apache/qpid/protonj2/codec/decoders/ProtonDecoderState.java
@@ -104,8 +104,13 @@ public final class ProtonDecoderState implements 
DecoderState {
     private static String internalDecode(ProtonBuffer buffer, final int 
length, CharsetDecoder decoder, char[] scratch) {
         final int bufferInitialPosition = buffer.getReadOffset();
 
-        int offset;
-        for (offset = 0; offset < length; offset++) {
+        if (length < 0) {
+            throw new IllegalArgumentException("Specified UTF length:" + 
length + " cannot be negative.");
+        }
+
+        int offset = 0;
+
+        for (; offset < length; offset++) {
             final byte b = buffer.getByte(bufferInitialPosition + offset);
             if (b < 0) {
                 break;
@@ -124,14 +129,24 @@ public final class ProtonDecoderState implements 
DecoderState {
 
     private static String internalDecodeUTF8(final ProtonBuffer buffer, final 
int length, final char[] chars, final int offset, final CharsetDecoder decoder) 
{
         final CharBuffer out = CharBuffer.wrap(chars);
+        final int remaining = length - offset;
+
+        if (offset < 0) {
+            throw new IllegalArgumentException("Specified offset:" + offset + 
" cannot be negative.");
+        }
+
+        if (remaining < 0) {
+            throw new IllegalArgumentException("Remaining UTF8 Bytes size 
cannot be negative, was " + remaining);
+        }
+
         out.position(offset);
 
         // Create a buffer from the remaining portion of the buffer and then 
use the decoder to complete the work
         // remember to move the main buffer position to consume the data 
processed.
-        ByteBuffer byteBuffer = ByteBuffer.allocate(buffer.getReadableBytes());
+        ByteBuffer byteBuffer = ByteBuffer.allocate(remaining);
 
-        buffer.copyInto(buffer.getReadOffset(), byteBuffer, 0, length - 
offset);
-        buffer.advanceReadOffset(length - offset);
+        buffer.copyInto(buffer.getReadOffset(), byteBuffer, 0, remaining);
+        buffer.advanceReadOffset(remaining);
 
         try {
             for (;;) {
diff --git 
a/protonj2/src/test/java/org/apache/qpid/protonj2/codec/primitives/StringTypeCodecTest.java
 
b/protonj2/src/test/java/org/apache/qpid/protonj2/codec/primitives/StringTypeCodecTest.java
index 4495a32f..e0153270 100644
--- 
a/protonj2/src/test/java/org/apache/qpid/protonj2/codec/primitives/StringTypeCodecTest.java
+++ 
b/protonj2/src/test/java/org/apache/qpid/protonj2/codec/primitives/StringTypeCodecTest.java
@@ -49,8 +49,12 @@ public class StringTypeCodecTest extends CodecTestSupport {
 
     private static final List<String> TEST_DATA = generateTestData();
 
-    private final String SMALL_STRING_VALUE = "Small String";
-    private final String LARGE_STRING_VALUE = "Large String: " +
+    private static final String SMALL_STRING_VALUE = "Small String";
+    private static final String LARGE_STRING_VALUE = "Large String: " +
+        "The quick brown fox jumps over the lazy dog. " +
+        "The quick brown fox jumps over the lazy dog. " +
+        "The quick brown fox jumps over the lazy dog. " +
+        "The quick brown fox jumps over the lazy dog. " +
         "The quick brown fox jumps over the lazy dog. " +
         "The quick brown fox jumps over the lazy dog. " +
         "The quick brown fox jumps over the lazy dog. " +
@@ -60,6 +64,26 @@ public class StringTypeCodecTest extends CodecTestSupport {
         "The quick brown fox jumps over the lazy dog. " +
         "The quick brown fox jumps over the lazy dog.";
 
+    private static final String EXTENDED_ASCII_STRING_VALUE = "Extended ASCII 
String: " +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢" +
+        "€ ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ ¡ ¢";
+
+    private static final String UNICODE_STRING_VALUE = "Unicode String: " +
+        Character.valueOf((char) 1000).toString() +
+        Character.valueOf((char) 1001).toString() +
+        Character.valueOf((char) 1002).toString() +
+        Character.valueOf((char) 1003).toString();
+
     @Test
     public void testDecoderThrowsWhenAskedToReadWrongTypeAsThisType() throws 
Exception {
         testDecoderThrowsWhenAskedToReadWrongTypeAsThisType(false);
@@ -123,6 +147,16 @@ public class StringTypeCodecTest extends CodecTestSupport {
         doTestEncodeDecode(LARGE_STRING_VALUE, false);
     }
 
+    @Test
+    public void testEncodeExtededASCIIString() throws IOException {
+        doTestEncodeDecode(EXTENDED_ASCII_STRING_VALUE, false);
+    }
+
+    @Test
+    public void testEncodeUnicodeString() throws IOException {
+        doTestEncodeDecode(UNICODE_STRING_VALUE, false);
+    }
+
     @Test
     public void testEncodeEmptyString() throws IOException {
         doTestEncodeDecode("", false);
@@ -143,6 +177,16 @@ public class StringTypeCodecTest extends CodecTestSupport {
         doTestEncodeDecode(LARGE_STRING_VALUE, true);
     }
 
+    @Test
+    public void testEncodeExtendedASCIIStringFS() throws IOException {
+        doTestEncodeDecode(EXTENDED_ASCII_STRING_VALUE, true);
+    }
+
+    @Test
+    public void testEncodeUnicodeStringFS() throws IOException {
+        doTestEncodeDecode(UNICODE_STRING_VALUE, true);
+    }
+
     @Test
     public void testEncodeEmptyStringFS() throws IOException {
         doTestEncodeDecode("", true);
@@ -224,6 +268,102 @@ public class StringTypeCodecTest extends CodecTestSupport 
{
         }
     }
 
+    @Test
+    public void testDecodeSmallSeriesOfMultiByteUTF8Strings() throws 
IOException {
+        doTestDecodeMultiByteUTF8StringSeries(SMALL_SIZE, false);
+    }
+
+    @Test
+    public void testDecodeLargeSeriesOfMultiByteUTF8Strings() throws 
IOException {
+        doTestDecodeMultiByteUTF8StringSeries(LARGE_SIZE, false);
+    }
+
+    @Test
+    public void testDecodeSmallSeriesOfMultieByteUTF8StringsFS() throws 
IOException {
+        doTestDecodeMultiByteUTF8StringSeries(SMALL_SIZE, true);
+    }
+
+    @Test
+    public void testDecodeLargeSeriesOfMultiByteUTF8StringsFS() throws 
IOException {
+        doTestDecodeMultiByteUTF8StringSeries(LARGE_SIZE, true);
+    }
+
+    private void doTestDecodeMultiByteUTF8StringSeries(int size, boolean 
fromStream) throws IOException {
+        ProtonBuffer buffer = 
ProtonBufferAllocator.defaultAllocator().allocate();
+
+        for (int i = 0; i < size; ++i) {
+            encoder.writeString(buffer, encoderState, 
EXTENDED_ASCII_STRING_VALUE);
+        }
+
+        final InputStream stream;
+        if (fromStream) {
+            stream = new ProtonBufferInputStream(buffer);
+        } else {
+            stream = null;
+        }
+
+        for (int i = 0; i < size; ++i) {
+            final Object result;
+            if (fromStream) {
+                result = streamDecoder.readObject(stream, streamDecoderState);
+            } else {
+                result = decoder.readObject(buffer, decoderState);
+            }
+
+            assertNotNull(result);
+            assertTrue(result instanceof String);
+            assertEquals(EXTENDED_ASCII_STRING_VALUE, result);
+        }
+    }
+
+    @Test
+    public void testDecodeSmallSeriesOfUnicodeStrings() throws IOException {
+        doTestDecodeUnicodeStringSeries(SMALL_SIZE, false);
+    }
+
+    @Test
+    public void testDecodeLargeSeriesOfUnicodeStrings() throws IOException {
+        doTestDecodeUnicodeStringSeries(LARGE_SIZE, false);
+    }
+
+    @Test
+    public void testDecodeSmallSeriesOfUnicodeStringsFS() throws IOException {
+        doTestDecodeUnicodeStringSeries(SMALL_SIZE, true);
+    }
+
+    @Test
+    public void testDecodeLargeSeriesOfUnicodeStringsFS() throws IOException {
+        doTestDecodeUnicodeStringSeries(LARGE_SIZE, true);
+    }
+
+    private void doTestDecodeUnicodeStringSeries(int size, boolean fromStream) 
throws IOException {
+        ProtonBuffer buffer = 
ProtonBufferAllocator.defaultAllocator().allocate();
+
+        for (int i = 0; i < size; ++i) {
+            encoder.writeString(buffer, encoderState, UNICODE_STRING_VALUE);
+        }
+
+        final InputStream stream;
+        if (fromStream) {
+            stream = new ProtonBufferInputStream(buffer);
+        } else {
+            stream = null;
+        }
+
+        for (int i = 0; i < size; ++i) {
+            final Object result;
+            if (fromStream) {
+                result = streamDecoder.readObject(stream, streamDecoderState);
+            } else {
+                result = decoder.readObject(buffer, decoderState);
+            }
+
+            assertNotNull(result);
+            assertTrue(result instanceof String);
+            assertEquals(UNICODE_STRING_VALUE, result);
+        }
+    }
+
     @Test
     public void testDecodeStringOfZeroLengthWithLargeEncoding() throws 
IOException {
         doTestDecodeStringOfZeroLengthWithGivenEncoding(EncodingCodes.STR32, 
false);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(qpid-protonj2) branch main updated: PROTON-2837 Fix issue with buffer allocation for multi-byte encodings

Reply via email to