This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-fury.git
The following commit(s) were added to refs/heads/main by this push:
new 21cf7394 feat(java/python/golang): concat meta string len with flags
(#1517)
21cf7394 is described below
commit 21cf73940d46bb99c1927c8d34c6d45c29df1c25
Author: Shawn Yang <[email protected]>
AuthorDate: Tue Apr 16 00:51:58 2024 +0800
feat(java/python/golang): concat meta string len with flags (#1517)
## What does this PR do?
This PR concats meta string len with flags and use varint encoding to
encode such info, which can reduce 2 bytes for every meta string at most
times.
This PPR reduced serialized size a little: 415 bytes -> 407 bytes when
class is not registered in java
```
Before this PR:
Fury | MEDIA_CONTENT | false | array | 415 |
With this PR:
Fury | MEDIA_CONTENT | false | array | 407 |
```
## Related issues
Closes #1518
Closes #1519
Closes #1520
Closes #1521
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/incubator-fury/issues/new/choose)
describing the need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
go/fury/type.go | 15 ++--
.../java/org/apache/fury/memory/MemoryBuffer.java | 17 +---
.../apache/fury/resolver/MetaStringResolver.java | 64 ++++++-------
.../test/java/org/apache/fury/FuryTestBase.java | 5 ++
.../org/apache/fury/memory/MemoryBufferTest.java | 6 +-
.../apache/fury/resolver/ClassResolverTest.java | 9 +-
.../serializer/UnexistedClassSerializersTest.java | 22 ++---
python/pyfury/_fury.py | 51 +++++------
python/pyfury/_serialization.pyx | 100 ++++++++++-----------
python/pyfury/_serializer.py | 28 +++---
python/pyfury/serializer.py | 6 +-
python/pyfury/tests/test_serializer.py | 3 +
12 files changed, 161 insertions(+), 165 deletions(-)
diff --git a/go/fury/type.go b/go/fury/type.go
index f1d507a5..b0239603 100644
--- a/go/fury/type.go
+++ b/go/fury/type.go
@@ -551,7 +551,7 @@ func (r *typeResolver) writeMetaString(buffer *ByteBuffer,
str string) error {
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
r.dynamicStringToId[str] = dynamicStringId
- buffer.WriteByte_(useStringValue)
+ buffer.WriteVarInt32(int32(len(str) << 1))
// TODO this hash should be unique, since we don't compare data
equality for performance
h := fnv.New64a()
if _, err := h.Write([]byte(str)); err != nil {
@@ -562,27 +562,26 @@ func (r *typeResolver) writeMetaString(buffer
*ByteBuffer, str string) error {
if len(str) > MaxInt16 {
return fmt.Errorf("too long string: %s", str)
}
- buffer.WriteInt16(int16(len(str)))
buffer.WriteBinary(unsafeGetBytes(str))
} else {
- buffer.WriteByte_(useStringId)
- buffer.WriteInt16(id)
+ buffer.WriteVarInt32(int32(((id + 1) << 1) | 1))
}
return nil
}
func (r *typeResolver) readMetaString(buffer *ByteBuffer) (string, error) {
- if buffer.ReadByte_() == useStringValue {
+ header := buffer.ReadVarInt32()
+ var length = int(header >> 1)
+ if header&0b1 == 0 {
// TODO support use computed hash
buffer.ReadInt64()
- bytesLength := buffer.ReadInt16()
- str := string(buffer.ReadBinary(int(bytesLength)))
+ str := string(buffer.ReadBinary(length))
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
r.dynamicIdToString[dynamicStringId] = str
return str, nil
} else {
- return r.dynamicIdToString[buffer.ReadInt16()], nil
+ return r.dynamicIdToString[int16(length-1)], nil
}
}
diff --git
a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
index 725eb0fe..293d1256 100644
--- a/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
+++ b/java/fury-core/src/main/java/org/apache/fury/memory/MemoryBuffer.java
@@ -397,12 +397,6 @@ public final class MemoryBuffer {
UNSAFE.putByte(heapMemory, pos, b);
}
- // CHECKSTYLE.OFF:MethodName
- public void _unsafePutByte(int index, byte b) {
- // CHECKSTYLE.ON:MethodName
- UNSAFE.putByte(heapMemory, address + index, b);
- }
-
public boolean getBoolean(int index) {
final long pos = address + index;
checkPosition(index, pos, 1);
@@ -445,15 +439,6 @@ public final class MemoryBuffer {
UNSAFE.putShort(heapMemory, pos, value);
}
- // CHECKSTYLE.OFF:MethodName
- public void _unsafePutInt16(int index, short value) {
- // CHECKSTYLE.ON:MethodName
- if (!LITTLE_ENDIAN) {
- value = Short.reverseBytes(value);
- }
- UNSAFE.putShort(heapMemory, address + index, value);
- }
-
public int getInt32(int index) {
final long pos = address + index;
checkPosition(index, pos, 4);
@@ -510,7 +495,7 @@ public final class MemoryBuffer {
}
// CHECKSTYLE.OFF:MethodName
- public void _unsafePutInt64(int index, long value) {
+ private void _unsafePutInt64(int index, long value) {
// CHECKSTYLE.ON:MethodName
if (!LITTLE_ENDIAN) {
value = Long.reverseBytes(value);
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
index baab09fd..95b786f5 100644
---
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
+++
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
@@ -80,73 +80,70 @@ public final class MetaStringResolver {
public void writeMetaStringBytes(MemoryBuffer buffer, MetaStringBytes
byteString) {
short id = byteString.dynamicWriteStringId;
- int writerIndex = buffer.writerIndex();
if (id == MetaStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
id = dynamicWriteStringId++;
byteString.dynamicWriteStringId = id;
MetaStringBytes[] dynamicWrittenMetaString = this.dynamicWrittenString;
if (dynamicWrittenMetaString.length <= id) {
- MetaStringBytes[] tmp = new MetaStringBytes[id * 2];
- System.arraycopy(dynamicWrittenMetaString, 0, tmp, 0,
dynamicWrittenMetaString.length);
- dynamicWrittenMetaString = tmp;
- this.dynamicWrittenString = tmp;
+ dynamicWrittenMetaString = growWrite(id);
}
dynamicWrittenMetaString[id] = byteString;
- int bytesLen = byteString.bytes.length;
- buffer.increaseWriterIndex(11 + bytesLen);
- buffer._unsafePutByte(writerIndex, USE_STRING_VALUE);
- // Since duplicate enum string writing are avoided by dynamic id,
- // use 8-byte hash won't increase too much space.
- buffer._unsafePutInt64(writerIndex + 1, byteString.hashCode);
- buffer._unsafePutInt16(writerIndex + 9, (short) bytesLen);
- buffer.put(writerIndex + 11, byteString.bytes, 0, bytesLen);
+ buffer.writeVarUint32Small7(byteString.bytes.length << 1);
+ buffer.writeInt64(byteString.hashCode);
+ buffer.writeBytes(byteString.bytes);
} else {
- buffer.increaseWriterIndex(3);
- buffer._unsafePutByte(writerIndex, USE_STRING_ID);
- buffer._unsafePutInt16(writerIndex + 1, id);
+ buffer.writeVarUint32Small7(((id + 1) << 1) | 1);
}
}
+ private MetaStringBytes[] growWrite(int id) {
+ MetaStringBytes[] tmp = new MetaStringBytes[id * 2];
+ System.arraycopy(dynamicWrittenString, 0, tmp, 0,
dynamicWrittenString.length);
+ return this.dynamicWrittenString = tmp;
+ }
+
MetaStringBytes readMetaStringBytes(MemoryBuffer buffer) {
- if (buffer.readByte() == USE_STRING_VALUE) {
+ int header = buffer.readVarUint32Small7();
+ int len = header >>> 1;
+ if ((header & 0b1) == 0) {
long hashCode = buffer.readInt64();
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, hashCode);
+ MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
updateDynamicString(byteString);
return byteString;
} else {
- return dynamicReadStringIds[buffer.readInt16()];
+ return dynamicReadStringIds[len - 1];
}
}
MetaStringBytes readMetaStringBytes(MemoryBuffer buffer, MetaStringBytes
cache) {
- if (buffer.readByte() == USE_STRING_VALUE) {
+ int header = buffer.readVarUint32Small7();
+ int len = header >>> 1;
+ if ((header & 0b1) == 0) {
long hashCode = buffer.readInt64();
if (cache.hashCode == hashCode) {
// skip byteString data
- buffer.increaseReaderIndex(2 + cache.bytes.length);
+ buffer.increaseReaderIndex(len);
updateDynamicString(cache);
return cache;
} else {
- MetaStringBytes byteString = trySkipMetaStringBytes(buffer, hashCode);
+ MetaStringBytes byteString = trySkipMetaStringBytes(buffer, len,
hashCode);
updateDynamicString(byteString);
return byteString;
}
} else {
- return dynamicReadStringIds[buffer.readInt16()];
+ return dynamicReadStringIds[len - 1];
}
}
/** Read enum string by try to reuse previous read {@link MetaStringBytes}
object. */
- private MetaStringBytes trySkipMetaStringBytes(MemoryBuffer buffer, long
hashCode) {
+ private MetaStringBytes trySkipMetaStringBytes(MemoryBuffer buffer, int len,
long hashCode) {
MetaStringBytes byteString = hash2MetaStringBytesMap.get(hashCode);
if (byteString == null) {
- int strBytesLength = buffer.readInt16();
- byte[] strBytes = buffer.readBytes(strBytesLength);
- byteString = new MetaStringBytes(strBytes, hashCode);
+ byteString = new MetaStringBytes(buffer.readBytes(len), hashCode);
hash2MetaStringBytesMap.put(hashCode, byteString);
} else {
// skip byteString data
- buffer.increaseReaderIndex(2 + byteString.bytes.length);
+ buffer.increaseReaderIndex(len);
}
return byteString;
}
@@ -155,14 +152,17 @@ public final class MetaStringResolver {
short currentDynamicReadId = dynamicReadStringId++;
MetaStringBytes[] dynamicReadStringIds = this.dynamicReadStringIds;
if (dynamicReadStringIds.length <= currentDynamicReadId) {
- MetaStringBytes[] tmp = new MetaStringBytes[currentDynamicReadId * 2];
- System.arraycopy(dynamicReadStringIds, 0, tmp, 0,
dynamicReadStringIds.length);
- dynamicReadStringIds = tmp;
- this.dynamicReadStringIds = tmp;
+ dynamicReadStringIds = growRead(currentDynamicReadId);
}
dynamicReadStringIds[currentDynamicReadId] = byteString;
}
+ private MetaStringBytes[] growRead(int id) {
+ MetaStringBytes[] tmp = new MetaStringBytes[id * 2];
+ System.arraycopy(dynamicReadStringIds, 0, tmp, 0,
dynamicReadStringIds.length);
+ return this.dynamicReadStringIds = tmp;
+ }
+
public void reset() {
resetRead();
resetWrite();
diff --git a/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java
b/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java
index af379661..1ec1660c 100644
--- a/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java
+++ b/java/fury-core/src/test/java/org/apache/fury/FuryTestBase.java
@@ -32,6 +32,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.fury.config.CompatibleMode;
+import org.apache.fury.config.FuryBuilder;
import org.apache.fury.config.Language;
import org.apache.fury.io.ClassLoaderObjectInputStream;
import org.apache.fury.memory.MemoryBuffer;
@@ -53,6 +54,10 @@ public abstract class FuryTestBase {
return javaFuryLocal.get();
}
+ public static FuryBuilder builder() {
+ return
Fury.builder().withLanguage(Language.JAVA).requireClassRegistration(false);
+ }
+
@DataProvider
public static Object[][] referenceTrackingConfig() {
return new Object[][] {{false}, {true}};
diff --git
a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
index acd7a683..18d54abb 100644
--- a/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
+++ b/java/fury-core/src/test/java/org/apache/fury/memory/MemoryBufferTest.java
@@ -109,13 +109,13 @@ public class MemoryBufferTest {
{
MemoryBuffer buffer = MemoryUtils.buffer(1024);
int index = 0;
- buffer._unsafePutByte(index, Byte.MIN_VALUE);
+ buffer.putByte(index, Byte.MIN_VALUE);
index += 1;
- buffer._unsafePutInt16(index, Short.MAX_VALUE);
+ buffer.putInt16(index, Short.MAX_VALUE);
index += 2;
buffer.putInt32(index, Integer.MIN_VALUE);
index += 4;
- buffer._unsafePutInt64(index, Long.MAX_VALUE);
+ buffer.putInt64(index, Long.MAX_VALUE);
index += 8;
buffer.putFloat64(index, -1);
index += 8;
diff --git
a/java/fury-core/src/test/java/org/apache/fury/resolver/ClassResolverTest.java
b/java/fury-core/src/test/java/org/apache/fury/resolver/ClassResolverTest.java
index 7179b390..fb100842 100644
---
a/java/fury-core/src/test/java/org/apache/fury/resolver/ClassResolverTest.java
+++
b/java/fury-core/src/test/java/org/apache/fury/resolver/ClassResolverTest.java
@@ -178,6 +178,13 @@ public class ClassResolverTest extends FuryTestBase {
interface Interface2 {}
+ @Test
+ public void testSerializeClassesShared() {
+ Fury fury = builder().build();
+ serDeCheck(fury, Foo.class);
+ serDeCheck(fury, Arrays.asList(Foo.class, Foo.class));
+ }
+
@Test(dataProvider = "referenceTrackingConfig")
public void testSerializeClasses(boolean referenceTracking) {
Fury fury =
@@ -214,7 +221,7 @@ public class ClassResolverTest extends FuryTestBase {
classResolver.writeClassInternal(buffer, getClass());
int writerIndex = buffer.writerIndex();
classResolver.writeClassInternal(buffer, getClass());
- Assert.assertEquals(buffer.writerIndex(), writerIndex + 7);
+ Assert.assertEquals(buffer.writerIndex(), writerIndex + 3);
buffer.writerIndex(0);
}
{
diff --git
a/java/fury-core/src/test/java/org/apache/fury/serializer/UnexistedClassSerializersTest.java
b/java/fury-core/src/test/java/org/apache/fury/serializer/UnexistedClassSerializersTest.java
index 0575a8ac..1fb65633 100644
---
a/java/fury-core/src/test/java/org/apache/fury/serializer/UnexistedClassSerializersTest.java
+++
b/java/fury-core/src/test/java/org/apache/fury/serializer/UnexistedClassSerializersTest.java
@@ -60,7 +60,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
.toArray(Object[][]::new);
}
- private FuryBuilder builder() {
+ private FuryBuilder furyBuilder() {
return Fury.builder()
.withLanguage(Language.JAVA)
.withCompatibleMode(CompatibleMode.COMPATIBLE)
@@ -72,7 +72,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
public void testSkipUnexisted(
boolean referenceTracking, boolean enableCodegen1, boolean
enableCodegen2) {
Fury fury =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen1)
.withCompatibleMode(CompatibleMode.COMPATIBLE)
@@ -86,7 +86,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
Object pojo = Struct.createPOJO(structClass);
byte[] bytes = fury.serialize(pojo);
Fury fury2 =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen2)
.withClassLoader(classLoader)
@@ -103,7 +103,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
boolean enableCodegen2,
boolean enableCodegen3) {
Fury fury =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen1)
.withMetaContextShare(true)
@@ -119,7 +119,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
fury.getSerializationContext().setMetaContext(context1);
byte[] bytes = fury.serialize(pojo);
Fury fury2 =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen2)
.withMetaContextShare(true)
@@ -132,7 +132,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
fury2.getSerializationContext().setMetaContext(context2);
byte[] bytes2 = fury2.serialize(o2);
Fury fury3 =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen3)
.withMetaContextShare(true)
@@ -153,7 +153,7 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
boolean enableCodegen2,
boolean enableCodegen3) {
Fury fury =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen1)
.withMetaContextShare(true)
@@ -168,14 +168,14 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
Struct.createStructClass("TestSkipUnexistedClass3", 2)
}) {
Fury fury2 =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen2)
.withMetaContextShare(true)
.withClassLoader(classLoader)
.build();
Fury fury3 =
- builder()
+ furyBuilder()
.withRefTracking(referenceTracking)
.withCodegen(enableCodegen3)
.withMetaContextShare(true)
@@ -202,12 +202,12 @@ public class UnexistedClassSerializersTest extends
FuryTestBase {
@Test
public void testThrowExceptionIfClassNotExist() {
- Fury fury = builder().withDeserializeUnexistedClass(false).build();
+ Fury fury = furyBuilder().withDeserializeUnexistedClass(false).build();
ClassLoader classLoader = getClass().getClassLoader();
Class<?> structClass =
Struct.createNumberStructClass("TestSkipUnexistedClass1", 2);
Object pojo = Struct.createPOJO(structClass);
Fury fury2 =
-
builder().withDeserializeUnexistedClass(false).withClassLoader(classLoader).build();
+
furyBuilder().withDeserializeUnexistedClass(false).withClassLoader(classLoader).build();
byte[] bytes = fury.serialize(pojo);
Assert.assertThrows(RuntimeException.class, () ->
fury2.deserialize(bytes));
}
diff --git a/python/pyfury/_fury.py b/python/pyfury/_fury.py
index 5a062144..3d9b39ed 100644
--- a/python/pyfury/_fury.py
+++ b/python/pyfury/_fury.py
@@ -48,8 +48,6 @@ from pyfury._serializer import (
PYBOOL_CLASS_ID,
STRING_CLASS_ID,
PICKLE_CLASS_ID,
- USE_CLASSNAME,
- USE_CLASS_ID,
NOT_NULL_STRING_FLAG,
NOT_NULL_PYINT_FLAG,
NOT_NULL_PYBOOL_FLAG,
@@ -466,30 +464,30 @@ class ClassResolver:
def write_classinfo(self, buffer: Buffer, classinfo: ClassInfo):
class_id = classinfo.class_id
if class_id != NO_CLASS_ID:
- buffer.write_int16(class_id)
+ buffer.write_varint32(class_id << 1)
return
- buffer.write_int16(NO_CLASS_ID)
+ buffer.write_varint32(1)
self.write_enum_string_bytes(buffer, classinfo.class_name_bytes)
def read_classinfo(self, buffer):
- class_id = buffer.read_int16()
- if (
- class_id > NO_CLASS_ID
- ): # registered class id are greater than `NO_CLASS_ID`.
+ header = buffer.read_varint32()
+ if header & 0b1 == 0:
+ class_id = header >> 1
classinfo = self._registered_id2_class_info[class_id]
if classinfo.serializer is None:
classinfo.serializer = self._create_serializer(classinfo.cls)
return classinfo
- if buffer.read_int8() == USE_CLASS_ID:
- return self._dynamic_id_to_classinfo_list[buffer.read_int16()]
+ meta_str_header = buffer.read_varint32()
+ length = meta_str_header >> 1
+ if meta_str_header & 0b1 != 0:
+ return self._dynamic_id_to_classinfo_list[length - 1]
class_name_bytes_hash = buffer.read_int64()
- class_name_bytes_length = buffer.read_int16()
reader_index = buffer.reader_index
- buffer.check_bound(reader_index, class_name_bytes_length)
- buffer.reader_index = reader_index + class_name_bytes_length
+ buffer.check_bound(reader_index, length)
+ buffer.reader_index = reader_index + length
classinfo = self._hash_to_classinfo.get(class_name_bytes_hash)
if classinfo is None:
- classname_bytes = buffer.get_bytes(reader_index,
class_name_bytes_length)
+ classname_bytes = buffer.get_bytes(reader_index, length)
full_class_name = classname_bytes.decode(encoding="utf-8")
cls = load_class(full_class_name)
classinfo = self.get_or_create_classinfo(cls)
@@ -506,19 +504,18 @@ class ClassResolver:
enum_string_bytes.dynamic_write_string_id = dynamic_write_string_id
self._dynamic_write_string_id += 1
self._dynamic_written_enum_string.append(enum_string_bytes)
- buffer.write_int8(USE_CLASSNAME)
+ buffer.write_varint32(enum_string_bytes.length << 1)
buffer.write_int64(enum_string_bytes.hashcode)
- buffer.write_int16(enum_string_bytes.length)
buffer.write_bytes(enum_string_bytes.data)
else:
- buffer.write_int8(USE_CLASS_ID)
- buffer.write_int16(dynamic_write_string_id)
+ buffer.write_varint32(((dynamic_write_string_id + 1) << 1) | 1)
def read_enum_string_bytes(self, buffer: Buffer) -> MetaStringBytes:
- if buffer.read_int8() != USE_CLASSNAME:
- return self._dynamic_id_to_enum_str_list[buffer.read_int16()]
+ header = buffer.read_varint32()
+ length = header >> 1
+ if header & 0b1 != 0:
+ return self._dynamic_id_to_enum_str_list[length - 1]
hashcode = buffer.read_int64()
- length = buffer.read_int16()
reader_index = buffer.reader_index
buffer.check_bound(reader_index, length)
buffer.reader_index = reader_index + length
@@ -748,15 +745,15 @@ class Fury:
def serialize_ref(self, buffer, obj, classinfo=None):
cls = type(obj)
if cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(obj)
return
elif cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(obj)
return
elif cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(obj)
return
if self.ref_resolver.write_ref_or_null(buffer, obj):
@@ -769,15 +766,15 @@ class Fury:
def serialize_nonref(self, buffer, obj):
cls = type(obj)
if cls is str:
- buffer.write_int16(STRING_CLASS_ID)
+ buffer.write_varint32(STRING_CLASS_ID << 1)
buffer.write_string(obj)
return
elif cls is int:
- buffer.write_int16(PYINT_CLASS_ID)
+ buffer.write_varint32(PYINT_CLASS_ID << 1)
buffer.write_varint64(obj)
return
elif cls is bool:
- buffer.write_int16(PYBOOL_CLASS_ID)
+ buffer.write_varint32(PYBOOL_CLASS_ID << 1)
buffer.write_bool(obj)
return
else:
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index df435e70..108d0eaa 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -223,15 +223,15 @@ cdef int8_t STRING_CLASS_ID = 4
cdef int8_t PICKLE_CLASS_ID = 5
cdef int8_t PICKLE_STRONG_CACHE_CLASS_ID = 6
cdef int8_t PICKLE_CACHE_CLASS_ID = 7
-# `NOT_NULL_VALUE_FLAG` + `CLASS_ID` in little-endian order
+# `NOT_NULL_VALUE_FLAG` + `CLASS_ID<<1` in little-endian order
cdef int32_t NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
- (PYINT_CLASS_ID << 8)
+ (PYINT_CLASS_ID << 9)
cdef int32_t NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
- (PYFLOAT_CLASS_ID << 8)
+ (PYFLOAT_CLASS_ID << 9)
cdef int32_t NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
- (PYBOOL_CLASS_ID << 8)
+ (PYBOOL_CLASS_ID << 9)
cdef int32_t NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
- (STRING_CLASS_ID << 8)
+ (STRING_CLASS_ID << 9)
cdef class BufferObject:
@@ -611,19 +611,21 @@ cdef class ClassResolver:
return serializer
cpdef inline write_classinfo(self, Buffer buffer, ClassInfo classinfo):
- cdef int16_t class_id = classinfo.class_id
+ cdef int32_t class_id = classinfo.class_id
if class_id != NO_CLASS_ID:
- buffer.write_int16(class_id)
+ buffer.write_varint32((class_id << 1))
return
- buffer.write_int16(NO_CLASS_ID)
+ buffer.write_varint32(1)
self._write_enum_string_bytes(buffer, classinfo.class_name_bytes)
cpdef inline ClassInfo read_classinfo(self, Buffer buffer):
- cdef int16_t class_id = buffer.read_int16()
+ cdef int32_t h1 = buffer.read_varint32()
+ cdef int32_t class_id = h1 >> 1
cdef ClassInfo classinfo
cdef PyObject* classinfo_ptr
# registered class id are greater than `NO_CLASS_ID`.
- if class_id > NO_CLASS_ID:
+ if h1 & 0b1 == 0:
+ assert class_id >= 0, class_id
classinfo_ptr = self._c_registered_id2_class_info[class_id]
if classinfo_ptr == NULL:
raise ValueError(f"Unexpected class_id {class_id} "
@@ -632,19 +634,19 @@ cdef class ClassResolver:
if classinfo.serializer is None:
classinfo.serializer = self._create_serializer(classinfo.cls)
return classinfo
- if buffer.read_int8() == USE_CLASS_ID:
- return
<ClassInfo>self._c_dynamic_id_to_classinfo_vec[buffer.read_int16()]
+ cdef int32_t header = buffer.read_varint32()
+ cdef int32_t length = header >> 1
+ if header & 0b1 != 0:
+ return <ClassInfo>self._c_dynamic_id_to_classinfo_vec[length - 1]
cdef int64_t class_name_bytes_hash = buffer.read_int64()
- cdef int16_t class_name_bytes_length = buffer.read_int16()
cdef int32_t reader_index = buffer.reader_index
- buffer.check_bound(reader_index, class_name_bytes_length)
- buffer.reader_index = reader_index + class_name_bytes_length
+ buffer.check_bound(reader_index, length)
+ buffer.reader_index = reader_index + length
classinfo_ptr = self._c_hash_to_classinfo[class_name_bytes_hash]
if classinfo_ptr != NULL:
self._c_dynamic_id_to_classinfo_vec.push_back(classinfo_ptr)
return <ClassInfo>classinfo_ptr
- cdef bytes classname_bytes = buffer.get_bytes(
- reader_index, class_name_bytes_length)
+ cdef bytes classname_bytes = buffer.get_bytes(reader_index, length)
cdef str full_class_name = classname_bytes.decode(encoding="utf-8")
cls = load_class(full_class_name)
classinfo = self.get_or_create_classinfo(cls)
@@ -661,20 +663,18 @@ cdef class ClassResolver:
enum_string_bytes.dynamic_write_string_id = dynamic_class_id
self.dynamic_write_string_id += 1
self._c_dynamic_written_enum_string.push_back(<PyObject*>enum_string_bytes)
- buffer.write_int8(USE_CLASSNAME)
+ buffer.write_varint32(enum_string_bytes.length << 1)
buffer.write_int64(enum_string_bytes.hashcode)
- buffer.write_int16(enum_string_bytes.length)
buffer.write_bytes(enum_string_bytes.data)
else:
- buffer.write_int8(USE_CLASS_ID)
- buffer.write_int16(dynamic_class_id)
+ buffer.write_varint32(((dynamic_class_id + 1) << 1) | 1)
cdef inline MetaStringBytes _read_enum_string_bytes(self, Buffer buffer):
- if buffer.read_int8() != USE_CLASSNAME:
- return <MetaStringBytes>self._c_dynamic_id_to_enum_string_vec[
- buffer.read_int16()]
+ cdef int32_t header = buffer.read_varint32()
+ cdef int32_t length = header >> 1
+ if header & 0b1 != 0:
+ return
<MetaStringBytes>self._c_dynamic_id_to_enum_string_vec[length - 1]
cdef int64_t hashcode = buffer.read_int64()
- cdef int16_t length = buffer.read_int16()
cdef int32_t reader_index = buffer.reader_index
buffer.check_bound(reader_index, length)
buffer.reader_index = reader_index + length
@@ -947,19 +947,19 @@ cdef class Fury:
self, Buffer buffer, obj, ClassInfo classinfo=None):
cls = type(obj)
if cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(obj)
return
elif cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(obj)
return
elif cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(obj)
return
elif cls is float:
- buffer.write_int24(NOT_NULL_PYFLOAT_FLAG)
+ buffer.write_int16(NOT_NULL_PYFLOAT_FLAG)
buffer.write_double(obj)
return
if self.ref_resolver.write_ref_or_null(buffer, obj):
@@ -972,19 +972,19 @@ cdef class Fury:
cpdef inline serialize_nonref(self, Buffer buffer, obj):
cls = type(obj)
if cls is str:
- buffer.write_int16(STRING_CLASS_ID)
+ buffer.write_varint32(STRING_CLASS_ID << 1)
buffer.write_string(obj)
return
elif cls is int:
- buffer.write_int16(PYINT_CLASS_ID)
+ buffer.write_varint32(PYINT_CLASS_ID << 1)
buffer.write_varint64(obj)
return
elif cls is bool:
- buffer.write_int16(PYBOOL_CLASS_ID)
+ buffer.write_varint32(PYBOOL_CLASS_ID << 1)
buffer.write_bool(obj)
return
elif cls is float:
- buffer.write_int16(PYFLOAT_CLASS_ID)
+ buffer.write_varint32(PYFLOAT_CLASS_ID << 1)
buffer.write_double(obj)
return
cdef ClassInfo classinfo =
self.class_resolver.get_or_create_classinfo(cls)
@@ -1606,16 +1606,16 @@ cdef class CollectionSerializer(Serializer):
for s in value:
cls = type(s)
if cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(s)
elif cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(s)
elif cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(s)
elif cls is float:
- buffer.write_int24(NOT_NULL_PYFLOAT_FLAG)
+ buffer.write_int16(NOT_NULL_PYFLOAT_FLAG)
buffer.write_double(s)
else:
if not ref_resolver.write_ref_or_null(buffer, s):
@@ -1793,7 +1793,7 @@ cdef class MapSerializer(Serializer):
for k, v in value.items():
key_cls = type(k)
if key_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(k)
else:
if not self.ref_resolver.write_ref_or_null(buffer, k):
@@ -1802,16 +1802,16 @@ cdef class MapSerializer(Serializer):
key_classinfo.serializer.write(buffer, k)
value_cls = type(v)
if value_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(v)
elif value_cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(v)
elif value_cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(v)
elif value_cls is float:
- buffer.write_int24(NOT_NULL_PYFLOAT_FLAG)
+ buffer.write_int16(NOT_NULL_PYFLOAT_FLAG)
buffer.write_double(v)
else:
if not self.ref_resolver.write_ref_or_null(buffer, v):
@@ -1907,7 +1907,7 @@ cdef class SubMapSerializer(Serializer):
for k, v in value.items():
key_cls = type(k)
if key_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(k)
else:
if not self.ref_resolver.write_ref_or_null(buffer, k):
@@ -1916,16 +1916,16 @@ cdef class SubMapSerializer(Serializer):
key_classinfo.serializer.write(buffer, k)
value_cls = type(v)
if value_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(v)
elif value_cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(v)
elif value_cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(v)
elif value_cls is float:
- buffer.write_int24(NOT_NULL_PYFLOAT_FLAG)
+ buffer.write_int16(NOT_NULL_PYFLOAT_FLAG)
buffer.write_double(v)
else:
if not self.ref_resolver.write_ref_or_null(buffer, v):
@@ -2214,7 +2214,7 @@ cdef class SliceSerializer(Serializer):
start, stop, step = value.start, value.stop, value.step
if type(start) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(start)
else:
if start is None:
@@ -2224,7 +2224,7 @@ cdef class SliceSerializer(Serializer):
self.fury.serialize_nonref(buffer, start)
if type(stop) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(stop)
else:
if stop is None:
@@ -2234,7 +2234,7 @@ cdef class SliceSerializer(Serializer):
self.fury.serialize_nonref(buffer, stop)
if type(step) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(step)
else:
if step is None:
diff --git a/python/pyfury/_serializer.py b/python/pyfury/_serializer.py
index 25a70014..e3c8dbea 100644
--- a/python/pyfury/_serializer.py
+++ b/python/pyfury/_serializer.py
@@ -54,11 +54,11 @@ STRING_CLASS_ID = 4
PICKLE_CLASS_ID = 5
PICKLE_STRONG_CACHE_CLASS_ID = 6
PICKLE_CACHE_CLASS_ID = 7
-# `NOT_NULL_VALUE_FLAG` + `CLASS_ID` in little-endian order
-NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYINT_CLASS_ID << 8)
-NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYFLOAT_CLASS_ID
<< 8)
-NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYBOOL_CLASS_ID <<
8)
-NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID <<
8)
+# `NOT_NULL_VALUE_FLAG` + `CLASS_ID << 1` in little-endian order
+NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYINT_CLASS_ID << 9)
+NOT_NULL_PYFLOAT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYFLOAT_CLASS_ID
<< 9)
+NOT_NULL_PYBOOL_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (PYBOOL_CLASS_ID <<
9)
+NOT_NULL_STRING_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | (STRING_CLASS_ID <<
9)
class _PickleStub:
@@ -460,13 +460,13 @@ class CollectionSerializer(Serializer):
for s in value:
cls = type(s)
if cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(s)
elif cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(s)
elif cls is bool:
- buffer.write_int24(NOT_NULL_PYBOOL_FLAG)
+ buffer.write_int16(NOT_NULL_PYBOOL_FLAG)
buffer.write_bool(s)
else:
if not self.ref_resolver.write_ref_or_null(buffer, s):
@@ -578,7 +578,7 @@ class MapSerializer(Serializer):
for k, v in value.items():
key_cls = type(k)
if key_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(k)
else:
if not self.ref_resolver.write_ref_or_null(buffer, k):
@@ -587,10 +587,10 @@ class MapSerializer(Serializer):
classinfo.serializer.write(buffer, k)
value_cls = type(v)
if value_cls is str:
- buffer.write_int24(NOT_NULL_STRING_FLAG)
+ buffer.write_int16(NOT_NULL_STRING_FLAG)
buffer.write_string(v)
elif value_cls is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(v)
else:
if not self.ref_resolver.write_ref_or_null(buffer, v):
@@ -652,7 +652,7 @@ class SliceSerializer(Serializer):
start, stop, step = value.start, value.stop, value.step
if type(start) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(start)
else:
if start is None:
@@ -662,7 +662,7 @@ class SliceSerializer(Serializer):
self.fury.serialize_nonref(buffer, start)
if type(stop) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(stop)
else:
if stop is None:
@@ -672,7 +672,7 @@ class SliceSerializer(Serializer):
self.fury.serialize_nonref(buffer, stop)
if type(step) is int:
# TODO support varint128
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(step)
else:
if step is None:
diff --git a/python/pyfury/serializer.py b/python/pyfury/serializer.py
index 100fe4b1..b348be83 100644
--- a/python/pyfury/serializer.py
+++ b/python/pyfury/serializer.py
@@ -216,7 +216,7 @@ class PandasRangeIndexSerializer(Serializer):
stop = value.stop
step = value.step
if type(start) is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(start)
else:
if start is None:
@@ -225,7 +225,7 @@ class PandasRangeIndexSerializer(Serializer):
buffer.write_int8(NOT_NULL_VALUE_FLAG)
fury.serialize_nonref(buffer, start)
if type(stop) is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(stop)
else:
if stop is None:
@@ -234,7 +234,7 @@ class PandasRangeIndexSerializer(Serializer):
buffer.write_int8(NOT_NULL_VALUE_FLAG)
fury.serialize_nonref(buffer, stop)
if type(step) is int:
- buffer.write_int24(NOT_NULL_PYINT_FLAG)
+ buffer.write_int16(NOT_NULL_PYINT_FLAG)
buffer.write_varint64(step)
else:
if step is None:
diff --git a/python/pyfury/tests/test_serializer.py
b/python/pyfury/tests/test_serializer.py
index d91ea6d3..dadfe61f 100644
--- a/python/pyfury/tests/test_serializer.py
+++ b/python/pyfury/tests/test_serializer.py
@@ -432,6 +432,9 @@ def test_unsupported_callback():
def test_slice():
fury = Fury(language=Language.PYTHON, ref_tracking=True)
+ assert fury.deserialize(fury.serialize(slice(1, None, "10"))) == slice(
+ 1, None, "10"
+ )
assert fury.deserialize(fury.serialize(slice(1, 100, 10))) == slice(1,
100, 10)
assert fury.deserialize(fury.serialize(slice(1, None, 10))) == slice(1,
None, 10)
assert fury.deserialize(fury.serialize(slice(10, 10, None))) == slice(10,
10, None)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]