This is an automated email from the ASF dual-hosted git repository. chaokunyang pushed a commit to branch releases-0.10 in repository https://gitbox.apache.org/repos/asf/fury.git
commit d0663421c3e4139e7a3749a388d5b8cec0138468 Author: Shawn Yang <[email protected]> AuthorDate: Sun Jan 19 23:13:51 2025 +0800 feat(java): make 4 bytes utf16 size header optional for utf8 encoding (#2010) ## What does this PR do? Currently fury serialize utf8 string in java will write num bytes of utf16 first, so that the deserializaiton can save one copy. But C++ and golang does not need this information. This PR makes the 4 bytes utf16 size header optional for utf8 encoding, so theat the xlang serialiation can use the standard fury string serialization spec, and align to other languages. For performance consideration, this PR introduce `writeNumUtf16BytesForUtf8Encoding` which can perserve current behaviour. ## Related issues #1890 ## Does this PR introduce any user-facing change? <!-- If any user-facing interface changes, please [open an issue](https://github.com/apache/fury/issues/new/choose) describing the need to do so and update the document if necessary. --> - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark This PR will introduce an extra copy for deserialization since we can't know the size of utf16 in advance before decoding utf8 string. --- .../main/java/org/apache/fury/config/Config.java | 8 + .../java/org/apache/fury/config/FuryBuilder.java | 15 ++ .../apache/fury/serializer/StringSerializer.java | 212 +++++++++++++++++++-- .../test/java/org/apache/fury/FuryTestBase.java | 10 + .../fury/serializer/StringSerializerTest.java | 67 +++++-- 5 files changed, 275 insertions(+), 37 deletions(-) diff --git a/java/fury-core/src/main/java/org/apache/fury/config/Config.java b/java/fury-core/src/main/java/org/apache/fury/config/Config.java index bc6afe87..62e45bc0 100644 --- a/java/fury-core/src/main/java/org/apache/fury/config/Config.java +++ b/java/fury-core/src/main/java/org/apache/fury/config/Config.java @@ -46,6 +46,7 @@ public class Config implements Serializable { private final boolean checkJdkClassSerializable; private final Class<? extends Serializer> defaultJDKStreamSerializerType; private final boolean compressString; + private final boolean writeNumUtf16BytesForUtf8Encoding; private final boolean compressInt; private final boolean compressLong; private final LongEncoding longEncoding; @@ -72,6 +73,7 @@ public class Config implements Serializable { timeRefIgnored = !trackingRef || builder.timeRefIgnored; copyRef = builder.copyRef; compressString = builder.compressString; + writeNumUtf16BytesForUtf8Encoding = builder.writeNumUtf16BytesForUtf8Encoding; compressInt = builder.compressInt; longEncoding = builder.longEncoding; compressLong = longEncoding != LongEncoding.LE_RAW_BYTES; @@ -176,6 +178,10 @@ public class Config implements Serializable { return compressString; } + public boolean writeNumUtf16BytesForUtf8Encoding() { + return writeNumUtf16BytesForUtf8Encoding; + } + public boolean compressInt() { return compressInt; } @@ -287,6 +293,7 @@ public class Config implements Serializable { && checkClassVersion == config.checkClassVersion && checkJdkClassSerializable == config.checkJdkClassSerializable && compressString == config.compressString + && writeNumUtf16BytesForUtf8Encoding == config.writeNumUtf16BytesForUtf8Encoding && compressInt == config.compressInt && compressLong == config.compressLong && bufferSizeLimitBytes == config.bufferSizeLimitBytes @@ -321,6 +328,7 @@ public class Config implements Serializable { checkJdkClassSerializable, defaultJDKStreamSerializerType, compressString, + writeNumUtf16BytesForUtf8Encoding, compressInt, compressLong, longEncoding, diff --git a/java/fury-core/src/main/java/org/apache/fury/config/FuryBuilder.java b/java/fury-core/src/main/java/org/apache/fury/config/FuryBuilder.java index e139e09f..3fe45415 100644 --- a/java/fury-core/src/main/java/org/apache/fury/config/FuryBuilder.java +++ b/java/fury-core/src/main/java/org/apache/fury/config/FuryBuilder.java @@ -69,6 +69,7 @@ public final class FuryBuilder { boolean compressInt = true; public LongEncoding longEncoding = LongEncoding.SLI; boolean compressString = false; + Boolean writeNumUtf16BytesForUtf8Encoding; CompatibleMode compatibleMode = CompatibleMode.SCHEMA_CONSISTENT; boolean checkJdkClassSerializable = true; Class<? extends Serializer> defaultJDKStreamSerializerType = ObjectStreamSerializer.class; @@ -185,6 +186,17 @@ public final class FuryBuilder { return this; } + /** + * Whether write num_bytes of utf16 for utf8 encoding. With this option enabled, fury will write + * the num_bytes of utf16 before write utf8 encoded data, so that the deserialization can create + * the appropriate utf16 array for store the data, thus save one copy. + */ + public FuryBuilder withWriteNumUtf16BytesForUtf8Encoding( + boolean writeNumUtf16BytesForUtf8Encoding) { + this.writeNumUtf16BytesForUtf8Encoding = writeNumUtf16BytesForUtf8Encoding; + return this; + } + /** * Sets the limit for Fury's internal buffer. If the buffer size exceeds this limit, it will be * reset to this limit after every serialization and deserialization. @@ -379,6 +391,9 @@ public final class FuryBuilder { ObjectStreamSerializer.class, Serializer.class); } + if (writeNumUtf16BytesForUtf8Encoding == null) { + writeNumUtf16BytesForUtf8Encoding = language == Language.JAVA; + } if (compatibleMode == CompatibleMode.COMPATIBLE) { checkClassVersion = false; if (deserializeNonexistentClass == null) { diff --git a/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java b/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java index e2b987d9..219e13e8 100644 --- a/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java +++ b/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java @@ -30,6 +30,7 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodType; import java.lang.reflect.Field; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.function.BiFunction; import java.util.function.Function; import org.apache.fury.Fury; @@ -107,12 +108,17 @@ public final class StringSerializer extends ImmutableSerializer<String> { } private final boolean compressString; + private final boolean writeNumUtf16BytesForUtf8Encoding; private byte[] byteArray = new byte[DEFAULT_BUFFER_SIZE]; private int smoothByteArrayLength = DEFAULT_BUFFER_SIZE; + private char[] charArray = new char[16]; + private int smoothCharArrayLength = DEFAULT_BUFFER_SIZE; + private byte[] byteArray2 = new byte[16]; public StringSerializer(Fury fury) { super(fury, String.class, fury.trackingRef() && !fury.isStringRefIgnored()); compressString = fury.compressString(); + writeNumUtf16BytesForUtf8Encoding = fury.getConfig().writeNumUtf16BytesForUtf8Encoding(); } @Override @@ -237,7 +243,13 @@ public final class StringSerializer extends ImmutableSerializer<String> { byte coder = (byte) (header & 0b11); int numBytes = (int) (header >>> 2); if (coder == UTF8) { - return newBytesStringZeroCopy(UTF16, readBytesUTF8(buffer, numBytes)); + byte[] data; + if (writeNumUtf16BytesForUtf8Encoding) { + data = readBytesUTF8PerfOptimized(buffer, numBytes); + } else { + data = readBytesUTF8(buffer, numBytes); + } + return newBytesStringZeroCopy(UTF16, data); } else if (coder == LATIN1 || coder == UTF16) { return newBytesStringZeroCopy(coder, readBytesUnCompressedUTF16(buffer, numBytes)); } else { @@ -254,7 +266,9 @@ public final class StringSerializer extends ImmutableSerializer<String> { if (coder == LATIN1) { chars = readCharsLatin1(buffer, numBytes); } else if (coder == UTF8) { - chars = readCharsUTF8(buffer, numBytes); + return writeNumUtf16BytesForUtf8Encoding + ? readCharsUTF8PerfOptimized(buffer, numBytes) + : readCharsUTF8(buffer, numBytes); } else if (coder == UTF16) { chars = readCharsUTF16(buffer, numBytes); } else { @@ -313,7 +327,11 @@ public final class StringSerializer extends ImmutableSerializer<String> { if (coder == LATIN1 || bestCoder(bytes) == UTF16) { writeBytesString(buffer, coder, bytes); } else { - writeBytesUTF8(buffer, bytes); + if (writeNumUtf16BytesForUtf8Encoding) { + writeBytesUTF8PerfOptimized(buffer, bytes); + } else { + writeBytesUTF8(buffer, bytes); + } } } @@ -324,7 +342,11 @@ public final class StringSerializer extends ImmutableSerializer<String> { if (coder == LATIN1) { writeCharsLatin1(buffer, chars, chars.length); } else if (coder == UTF8) { - writeCharsUTF8(buffer, chars); + if (writeNumUtf16BytesForUtf8Encoding) { + writeCharsUTF8PerfOptimized(buffer, chars); + } else { + writeCharsUTF8(buffer, chars); + } } else { writeCharsUTF16(buffer, chars, chars.length); } @@ -412,24 +434,39 @@ public final class StringSerializer extends ImmutableSerializer<String> { } public byte[] readBytesUTF8(MemoryBuffer buffer, int numBytes) { + byte[] tmpArray = getByteArray(numBytes << 1); + buffer.checkReadableBytes(numBytes); + int utf16NumBytes; + byte[] srcArray = buffer.getHeapMemory(); + if (srcArray != null) { + int srcIndex = buffer._unsafeHeapReaderIndex(); + utf16NumBytes = + StringEncodingUtils.convertUTF8ToUTF16(srcArray, srcIndex, numBytes, tmpArray); + buffer._increaseReaderIndexUnsafe(numBytes); + } else { + byte[] byteArray2 = getByteArray2(numBytes); + buffer.readBytes(byteArray2, 0, numBytes); + utf16NumBytes = StringEncodingUtils.convertUTF8ToUTF16(byteArray2, 0, numBytes, tmpArray); + } + return Arrays.copyOf(tmpArray, utf16NumBytes); + } + + private byte[] readBytesUTF8PerfOptimized(MemoryBuffer buffer, int numBytes) { int udf8Bytes = buffer.readInt32(); byte[] bytes = new byte[numBytes]; + // noinspection Duplicates buffer.checkReadableBytes(udf8Bytes); byte[] srcArray = buffer.getHeapMemory(); if (srcArray != null) { int srcIndex = buffer._unsafeHeapReaderIndex(); int readLen = StringEncodingUtils.convertUTF8ToUTF16(srcArray, srcIndex, udf8Bytes, bytes); - if (readLen != numBytes) { - throw new RuntimeException("Decode UTF8 to UTF16 failed"); - } + assert readLen == numBytes : "Decode UTF8 to UTF16 failed"; buffer._increaseReaderIndexUnsafe(udf8Bytes); } else { byte[] tmpArray = getByteArray(udf8Bytes); buffer.readBytes(tmpArray, 0, udf8Bytes); int readLen = StringEncodingUtils.convertUTF8ToUTF16(tmpArray, 0, udf8Bytes, bytes); - if (readLen != numBytes) { - throw new RuntimeException("Decode UTF8 to UTF16 failed"); - } + assert readLen == numBytes : "Decode UTF8 to UTF16 failed"; } return bytes; } @@ -483,28 +520,42 @@ public final class StringSerializer extends ImmutableSerializer<String> { return chars; } - public char[] readCharsUTF8(MemoryBuffer buffer, int numBytes) { + public String readCharsUTF8(MemoryBuffer buffer, int numBytes) { + char[] chars = getCharArray(numBytes); + int charsLen; + buffer.checkReadableBytes(numBytes); + byte[] srcArray = buffer.getHeapMemory(); + if (srcArray != null) { + int srcIndex = buffer._unsafeHeapReaderIndex(); + charsLen = StringEncodingUtils.convertUTF8ToUTF16(srcArray, srcIndex, numBytes, chars); + buffer._increaseReaderIndexUnsafe(numBytes); + } else { + byte[] tmpArray = getByteArray(numBytes); + buffer.readBytes(tmpArray, 0, numBytes); + charsLen = StringEncodingUtils.convertUTF8ToUTF16(tmpArray, 0, numBytes, chars); + } + return new String(chars, 0, charsLen); + } + + public String readCharsUTF8PerfOptimized(MemoryBuffer buffer, int numBytes) { int udf16Chars = numBytes >> 1; int udf8Bytes = buffer.readInt32(); char[] chars = new char[udf16Chars]; + // noinspection Duplicates buffer.checkReadableBytes(udf8Bytes); byte[] srcArray = buffer.getHeapMemory(); if (srcArray != null) { int srcIndex = buffer._unsafeHeapReaderIndex(); int readLen = StringEncodingUtils.convertUTF8ToUTF16(srcArray, srcIndex, udf8Bytes, chars); - if (readLen != udf16Chars) { - throw new RuntimeException("Decode UTF8 to UTF16 failed"); - } + assert readLen == udf16Chars : "Decode UTF8 to UTF16 failed"; buffer._increaseReaderIndexUnsafe(udf8Bytes); } else { byte[] tmpArray = getByteArray(udf8Bytes); buffer.readBytes(tmpArray, 0, udf8Bytes); int readLen = StringEncodingUtils.convertUTF8ToUTF16(tmpArray, 0, udf8Bytes, chars); - if (readLen != udf16Chars) { - throw new RuntimeException("Decode UTF8 to UTF16 failed"); - } + assert readLen == udf16Chars : "Decode UTF8 to UTF16 failed"; } - return chars; + return newCharsStringZeroCopy(chars); } public void writeCharsLatin1(MemoryBuffer buffer, char[] chars, int numBytes) { @@ -562,8 +613,51 @@ public final class StringSerializer extends ImmutableSerializer<String> { } public void writeCharsUTF8(MemoryBuffer buffer, char[] chars) { + int estimateMaxBytes = chars.length * 3; + // num bytes of utf8 should be smaller than utf16, otherwise we should + // utf16 instead. + // We can't use length in header since we don't know num chars in go/c++ + int approxNumBytes = (int) (chars.length * 1.5) + 1; + int writerIndex = buffer.writerIndex(); + // 9 for max bytes of header + buffer.ensure(writerIndex + 9 + estimateMaxBytes); + byte[] targetArray = buffer.getHeapMemory(); + if (targetArray != null) { + // noinspection Duplicates + int targetIndex = buffer._unsafeHeapWriterIndex(); + // keep this index in case actual num utf8 bytes need different bytes for header + int headerPos = targetIndex; + int arrIndex = targetIndex; + long header = ((long) approxNumBytes << 2) | UTF8; + int headerBytesWritten = LittleEndian.putVarUint36Small(targetArray, arrIndex, header); + arrIndex += headerBytesWritten; + writerIndex += headerBytesWritten; + // noinspection Duplicates + targetIndex = StringEncodingUtils.convertUTF16ToUTF8(chars, targetArray, arrIndex); + byte stashedByte = targetArray[arrIndex]; + int written = targetIndex - arrIndex; + header = ((long) written << 2) | UTF8; + int diff = + LittleEndian.putVarUint36Small(targetArray, headerPos, header) - headerBytesWritten; + if (diff != 0) { + handleWriteCharsUTF8UnalignedHeaderBytes(targetArray, arrIndex, diff, written, stashedByte); + } + buffer._unsafeWriterIndex(writerIndex + written + diff); + } else { + // noinspection Duplicates + final byte[] tmpArray = getByteArray(estimateMaxBytes); + int written = StringEncodingUtils.convertUTF16ToUTF8(chars, tmpArray, 0); + long header = ((long) written << 2) | UTF8; + writerIndex += buffer._unsafePutVarUint36Small(writerIndex, header); + buffer.put(writerIndex, tmpArray, 0, written); + buffer._unsafeWriterIndex(writerIndex + written); + } + } + + public void writeCharsUTF8PerfOptimized(MemoryBuffer buffer, char[] chars) { int estimateMaxBytes = chars.length * 3; int numBytes = MathUtils.doubleExact(chars.length); + // noinspection Duplicates int writerIndex = buffer.writerIndex(); long header = ((long) numBytes << 2) | UTF8; buffer.ensure(writerIndex + 9 + estimateMaxBytes); @@ -588,7 +682,55 @@ public final class StringSerializer extends ImmutableSerializer<String> { } } - public void writeBytesUTF8(MemoryBuffer buffer, byte[] bytes) { + private void handleWriteCharsUTF8UnalignedHeaderBytes( + byte[] targetArray, int arrIndex, int diff, int written, byte stashed) { + if (diff == 1) { + System.arraycopy(targetArray, arrIndex + 1, targetArray, arrIndex + 2, written - 1); + targetArray[arrIndex + 1] = stashed; + } else { + System.arraycopy(targetArray, arrIndex, targetArray, arrIndex - 1, written); + } + } + + private void writeBytesUTF8(MemoryBuffer buffer, byte[] bytes) { + int numBytes = bytes.length; + int estimateMaxBytes = bytes.length / 2 * 3; + int writerIndex = buffer.writerIndex(); + buffer.ensure(writerIndex + 9 + estimateMaxBytes); + byte[] targetArray = buffer.getHeapMemory(); + if (targetArray != null) { + // noinspection Duplicates + int targetIndex = buffer._unsafeHeapWriterIndex(); + // keep this index in case actual num utf8 bytes need different bytes for header + int headerPos = targetIndex; + int arrIndex = targetIndex; + long header = ((long) numBytes << 2) | UTF8; + int headerBytesWritten = LittleEndian.putVarUint36Small(targetArray, arrIndex, header); + arrIndex += headerBytesWritten; + writerIndex += arrIndex - targetIndex; + // noinspection Duplicates + targetIndex = StringEncodingUtils.convertUTF16ToUTF8(bytes, targetArray, arrIndex); + byte stashedByte = targetArray[arrIndex]; + int written = targetIndex - arrIndex; + header = ((long) written << 2) | UTF8; + int diff = + LittleEndian.putVarUint36Small(targetArray, headerPos, header) - headerBytesWritten; + if (diff != 0) { + handleWriteCharsUTF8UnalignedHeaderBytes(targetArray, arrIndex, diff, written, stashedByte); + } + buffer._unsafeWriterIndex(writerIndex + written + diff); + } else { + // noinspection Duplicates + final byte[] tmpArray = getByteArray(estimateMaxBytes); + int written = StringEncodingUtils.convertUTF16ToUTF8(bytes, tmpArray, 0); + long header = ((long) written << 2) | UTF8; + writerIndex += buffer._unsafePutVarUint36Small(writerIndex, header); + buffer.put(writerIndex, tmpArray, 0, written); + buffer._unsafeWriterIndex(writerIndex + written); + } + } + + private void writeBytesUTF8PerfOptimized(MemoryBuffer buffer, byte[] bytes) { int numBytes = bytes.length; int estimateMaxBytes = bytes.length / 2 * 3; int writerIndex = buffer.writerIndex(); @@ -862,6 +1004,22 @@ public final class StringSerializer extends ImmutableSerializer<String> { } } + private char[] getCharArray(int numElements) { + char[] charArray = this.charArray; + if (charArray.length < numElements) { + charArray = new char[numElements]; + this.charArray = charArray; + } + if (charArray.length > DEFAULT_BUFFER_SIZE) { + smoothCharArrayLength = + Math.max(((int) (smoothCharArrayLength * 0.9 + numElements * 0.1)), DEFAULT_BUFFER_SIZE); + if (smoothByteArrayLength <= DEFAULT_BUFFER_SIZE) { + this.charArray = new char[DEFAULT_BUFFER_SIZE]; + } + } + return charArray; + } + private byte[] getByteArray(int numElements) { byte[] byteArray = this.byteArray; if (byteArray.length < numElements) { @@ -877,4 +1035,20 @@ public final class StringSerializer extends ImmutableSerializer<String> { } return byteArray; } + + private byte[] getByteArray2(int numElements) { + byte[] byteArray2 = this.byteArray2; + if (byteArray2.length < numElements) { + byteArray2 = new byte[numElements]; + this.byteArray = byteArray2; + } + if (byteArray2.length > DEFAULT_BUFFER_SIZE) { + smoothByteArrayLength = + Math.max(((int) (smoothByteArrayLength * 0.9 + numElements * 0.1)), DEFAULT_BUFFER_SIZE); + if (smoothByteArrayLength <= DEFAULT_BUFFER_SIZE) { + this.byteArray2 = new byte[DEFAULT_BUFFER_SIZE]; + } + } + return byteArray2; + } } diff --git a/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java b/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java index 3242fa90..f19038f1 100644 --- a/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java +++ b/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java @@ -128,6 +128,16 @@ public abstract class FuryTestBase { return new Object[][] {{false}, {true}}; } + @DataProvider + public static Object[][] oneBoolOption() { + return new Object[][] {{false}, {true}}; + } + + @DataProvider + public static Object[][] twoBoolOptions() { + return new Object[][] {{false, false}, {true, false}, {false, true}, {true, true}}; + } + @DataProvider public static Object[][] compressNumberAndCodeGen() { return new Object[][] {{false, false}, {true, false}, {false, true}, {true, true}}; diff --git a/java/fury-core/src/test/java/org/apache/fury/serializer/StringSerializerTest.java b/java/fury-core/src/test/java/org/apache/fury/serializer/StringSerializerTest.java index 123f3e54..761cbd03 100644 --- a/java/fury-core/src/test/java/org/apache/fury/serializer/StringSerializerTest.java +++ b/java/fury-core/src/test/java/org/apache/fury/serializer/StringSerializerTest.java @@ -171,29 +171,46 @@ public class StringSerializerTest extends FuryTestBase { } /** Test for <a href="https://github.com/apache/fury/issues/1984">#1984</a> */ - @Test - public void testJavaCompressedString() { + @Test(dataProvider = "oneBoolOption") + public void testJavaCompressedString(boolean b) { Fury fury = Fury.builder() .withStringCompressed(true) + .withWriteNumUtf16BytesForUtf8Encoding(b) .withLanguage(Language.JAVA) .requireClassRegistration(false) .build(); - Simple a = new Simple( "STG@ON DEMAND Solutions@GeoComputing Switch/ Hub@Digi Edgeport/216 – 16 port Serial Hub"); + serDeCheck(fury, a); + } - byte[] bytes = fury.serialize(a); - - Simple b = (Simple) fury.deserialize(bytes); - assertEquals(a, b); + @Test + public void testCompressedStringEstimatedWrongSize() { + Fury fury = + Fury.builder() + .withStringCompressed(true) + .withWriteNumUtf16BytesForUtf8Encoding(false) + .withLanguage(Language.JAVA) + .requireClassRegistration(false) + .build(); + // estimated 41 bytes, header needs 2 byte. + // encoded utf8 is 31 bytes, took 1 byte for header. + serDeCheck(fury, StringUtils.random(25, 47) + "你好"); + // estimated 31 bytes, header needs 1 byte. + // encoded utf8 is 32 bytes, took 2 byte for header. + serDeCheck(fury, "hello, world. 你好,世界。"); } - @Test(dataProvider = "stringCompress") - public void testJavaString(boolean stringCompress) { + @Test(dataProvider = "twoBoolOptions") + public void testJavaString(boolean stringCompress, boolean writeNumUtf16BytesForUtf8Encoding) { Fury fury = - Fury.builder().withStringCompressed(stringCompress).requireClassRegistration(false).build(); + Fury.builder() + .withStringCompressed(stringCompress) + .withWriteNumUtf16BytesForUtf8Encoding(writeNumUtf16BytesForUtf8Encoding) + .requireClassRegistration(false) + .build(); MemoryBuffer buffer = MemoryUtils.buffer(32); StringSerializer serializer = new StringSerializer(fury); @@ -211,10 +228,15 @@ public class StringSerializerTest extends FuryTestBase { new String[] {"你好, Fury" + StringUtils.random(64), "你好, Fury" + StringUtils.random(64)}); } - @Test(dataProvider = "stringCompress") - public void testJavaStringOffHeap(boolean stringCompress) { + @Test(dataProvider = "twoBoolOptions") + public void testJavaStringOffHeap( + boolean stringCompress, boolean writeNumUtf16BytesForUtf8Encoding) { Fury fury = - Fury.builder().withStringCompressed(stringCompress).requireClassRegistration(false).build(); + Fury.builder() + .withStringCompressed(stringCompress) + .withWriteNumUtf16BytesForUtf8Encoding(writeNumUtf16BytesForUtf8Encoding) + .requireClassRegistration(false) + .build(); MemoryBuffer buffer = MemoryUtils.wrap(ByteBuffer.allocateDirect(1024)); Object o1 = "你好, Fury" + StringUtils.random(64); Object o2 = @@ -331,9 +353,14 @@ public class StringSerializerTest extends FuryTestBase { } } - @Test - public void testReadUtf8String() { - Fury fury = Fury.builder().withStringCompressed(true).requireClassRegistration(false).build(); + @Test(dataProvider = "oneBoolOption") + public void testReadUtf8String(boolean writeNumUtf16BytesForUtf8Encoding) { + Fury fury = + Fury.builder() + .withStringCompressed(true) + .withWriteNumUtf16BytesForUtf8Encoding(writeNumUtf16BytesForUtf8Encoding) + .requireClassRegistration(false) + .build(); for (MemoryBuffer buffer : new MemoryBuffer[] { MemoryUtils.buffer(32), MemoryUtils.wrap(ByteBuffer.allocateDirect(2048)) @@ -343,8 +370,12 @@ public class StringSerializerTest extends FuryTestBase { assertEquals(serializer.read(buffer), "abc你好"); byte[] bytes = "abc你好".getBytes(StandardCharsets.UTF_8); byte UTF8 = 2; - buffer.writeVarUint64(((long) "abc你好".length() << 1) << 2 | UTF8); - buffer.writeInt32(bytes.length); + if (writeNumUtf16BytesForUtf8Encoding) { + buffer.writeVarUint64(((long) "abc你好".length() << 1) << 2 | UTF8); + buffer.writeInt32(bytes.length); + } else { + buffer.writeVarUint64((((long) bytes.length) << 2 | UTF8)); + } buffer.writeBytes(bytes); assertEquals(serializer.read(buffer), "abc你好"); assertEquals(buffer.readerIndex(), buffer.writerIndex()); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
