(fury) branch main updated: perf(go): Optimize the type registration system && Reduce useless code (#2262)

pandalee Wed, 28 May 2025 10:42:31 -0700

This is an automated email from the ASF dual-hosted git repository.

pandalee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git



The following commit(s) were added to refs/heads/main by this push:
     new 9cdf84e7 perf(go): Optimize the type registration system && Reduce 
useless code  (#2262)
9cdf84e7 is described below

commit 9cdf84e7613bdae031757d45d699259b534d0e33
Author: PAN <[email protected]>
AuthorDate: Thu May 29 01:41:29 2025 +0800

    perf(go): Optimize the type registration system && Reduce useless code  
(#2262)
    
    <!--
    **Thanks for contributing to Fury.**
    
    **If this is your first time opening a PR on fury, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fury (incubating)** community has restrictions on the
    naming of pr titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
    
    - Fury has a strong focus on performance. If the PR you submit will have
    an impact on performance, please benchmark it first and provide the
    benchmark result here.
    -->
    
    ## What does this PR do?
    
    1、Optimize the type registration system，Use type instead of String to
    reduce copying，Reduce useless code
    
    2、Add the buffer tool. For subsequent use, continue to optimize
    metastringbytes
    
    
    <!-- Describe the purpose of this PR. -->
    
    ## Related issues
    
    <!--
    Is there any related issue? Please attach here.
    
    - #xxxx0
    - #xxxx1
    - #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fury/issues/new/choose) describing the
    need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    -->
---
 go/fury/buffer.go           | 168 ++++++++++++++++++++++++++++++++++++++++++++
 go/fury/meta/meta_string.go |  25 +++++++
 go/fury/type.go             |   9 ++-
 3 files changed, 197 insertions(+), 5 deletions(-)

diff --git a/go/fury/buffer.go b/go/fury/buffer.go
index 5b7d2b61..294d801a 100644
--- a/go/fury/buffer.go
+++ b/go/fury/buffer.go
@@ -568,3 +568,171 @@ func (b *ByteBuffer) readVarUint32Slow() uint32 {
 func (b *ByteBuffer) PutUint8(writerIndex int, value uint8) {
        b.data[writerIndex] = byte(value)
 }
+
+// WriteVarUint32Small7 writes a uint32 in variable-length small-7 format
+func (b *ByteBuffer) WriteVarUint32Small7(value uint32) int {
+       b.grow(8)
+       if value>>7 == 0 {
+               b.data[b.writerIndex] = byte(value)
+               b.writerIndex++
+               return 1
+       }
+       return b.continueWriteVarUint32Small7(value)
+}
+
+func (b *ByteBuffer) continueWriteVarUint32Small7(value uint32) int {
+       encoded := uint64(value & 0x7F)
+       encoded |= uint64((value&0x3f80)<<1) | 0x80
+       idx := b.writerIndex
+       if value>>14 == 0 {
+               b.unsafePutInt32(idx, int32(encoded))
+               b.writerIndex += 2
+               return 2
+       }
+       d := b.continuePutVarInt36(idx, encoded, uint64(value))
+       b.writerIndex += d
+       return d
+}
+
+func (b *ByteBuffer) continuePutVarInt36(index int, encoded, value uint64) int 
{
+       // bits 14
+       encoded |= ((value & 0x1fc000) << 2) | 0x8000
+       if value>>21 == 0 {
+               b.unsafePutInt32(index, int32(encoded))
+               return 3
+       }
+       // bits 21
+       encoded |= ((value & 0xfe00000) << 3) | 0x800000
+       if value>>28 == 0 {
+               b.unsafePutInt32(index, int32(encoded))
+               return 4
+       }
+       // bits 28
+       encoded |= ((value & 0xff0000000) << 4) | 0x80000000
+       b.unsafePutInt64(index, encoded)
+       return 5
+}
+
+func (b *ByteBuffer) unsafePutInt32(index int, v int32) {
+       binary.LittleEndian.PutUint32(b.data[index:], uint32(v))
+}
+
+func (b *ByteBuffer) unsafePutInt64(index int, v uint64) {
+       binary.LittleEndian.PutUint64(b.data[index:], v)
+}
+
+// ByteBuffer methods for variable-length integers
+func (b *ByteBuffer) ReadVarUint32Small7() int {
+       readIdx := b.readerIndex
+       if len(b.data)-readIdx > 0 {
+               v := b.data[readIdx]
+               readIdx++
+               if v&0x80 == 0 {
+                       b.readerIndex = readIdx
+                       return int(v)
+               }
+       }
+       return b.readVarUint32Small14()
+}
+
+func (b *ByteBuffer) readVarUint32Small14() int {
+       readIdx := b.readerIndex
+       if len(b.data)-readIdx >= 5 {
+               four := b.unsafeGetInt32(readIdx)
+               readIdx++
+               value := four & 0x7F
+               if four&0x80 != 0 {
+                       readIdx++
+                       value |= (four >> 1) & 0x3f80
+                       if four&0x8000 != 0 {
+                               return b.continueReadVarUint32(readIdx, four, 
value)
+                       }
+               }
+               b.readerIndex = readIdx
+               return value
+       }
+       return int(b.readVarUint36Slow())
+}
+
+func (b *ByteBuffer) continueReadVarUint32(readIdx, bulkRead, value int) int {
+       readIdx++
+       value |= (bulkRead >> 2) & 0x1fc000
+       if bulkRead&0x800000 != 0 {
+               readIdx++
+               value |= (bulkRead >> 3) & 0xfe00000
+               if bulkRead&0x80000000 != 0 {
+                       v := b.data[readIdx]
+                       readIdx++
+                       value |= int(v&0x7F) << 28
+               }
+       }
+       b.readerIndex = readIdx
+       return value
+}
+
+func (b *ByteBuffer) readVarUint36Slow() uint64 {
+       // unrolled loop
+       b0, _ := b.ReadByte()
+       result := uint64(b0 & 0x7F)
+       if b0&0x80 != 0 {
+               b1, _ := b.ReadByte()
+               result |= uint64(b1&0x7F) << 7
+               if b1&0x80 != 0 {
+                       b2, _ := b.ReadByte()
+                       result |= uint64(b2&0x7F) << 14
+                       if b2&0x80 != 0 {
+                               b3, _ := b.ReadByte()
+                               result |= uint64(b3&0x7F) << 21
+                               if b3&0x80 != 0 {
+                                       b4, _ := b.ReadByte()
+                                       result |= uint64(b4) << 28
+                               }
+                       }
+               }
+       }
+       return result
+}
+
+// unsafeGetInt32 reads little-endian int32 at index
+func (b *ByteBuffer) unsafeGetInt32(idx int) int {
+       return int(int32(binary.LittleEndian.Uint32(b.data[idx:])))
+}
+
+// IncreaseReaderIndex advances readerIndex
+func (b *ByteBuffer) IncreaseReaderIndex(n int) {
+       b.readerIndex += n
+}
+
+// ReadBytesAsInt64 reads up to 8 bytes and returns as uint64
+// fast path using underlying 64-bit read
+func (b *ByteBuffer) ReadBytesAsInt64(length int) uint64 {
+       readerIdx := b.readerIndex
+       remaining := len(b.data) - readerIdx
+       if remaining >= length {
+               // fast: read full 8 bytes then mask
+               v := binary.LittleEndian.Uint64(b.data[readerIdx:])
+               b.readerIndex = readerIdx + length
+               // mask off unused high bytes
+               mask := uint64(0xffffffffffffffff) >> uint((8-length)*8)
+               return v & mask
+       }
+       return b.slowReadBytesAsInt64(remaining, length)
+}
+
+func (b *ByteBuffer) slowReadBytesAsInt64(remaining, length int) uint64 {
+       // fill buffer omitted: assume data available
+       readerIdx := b.readerIndex
+       b.readerIndex = readerIdx + length
+       var result uint64
+       for i := 0; i < length; i++ {
+               result |= uint64(b.data[readerIdx+i]&0xff) << (i * 8)
+       }
+       return result
+}
+
+// ReadBytes reads n bytes
+func (b *ByteBuffer) ReadBytes(n int) []byte {
+       p := b.data[b.readerIndex : b.readerIndex+n]
+       b.readerIndex += n
+       return p
+}
diff --git a/go/fury/meta/meta_string.go b/go/fury/meta/meta_string.go
index 429e6e58..3c72a64c 100644
--- a/go/fury/meta/meta_string.go
+++ b/go/fury/meta/meta_string.go
@@ -17,6 +17,8 @@
 
 package meta
 
+import "errors"
+
 // Encoding Algorithms Flags
 type Encoding uint8
 
@@ -37,6 +39,20 @@ type MetaString struct {
        encodedBytes []byte // serialized data
 }
 
+func NewMetaString(input string, encoding Encoding, special1, special2 byte, 
encoded []byte) *MetaString {
+       return &MetaString{
+               inputString:  input,
+               encoding:     encoding,
+               specialChar1: special1,
+               specialChar2: special2,
+               encodedBytes: encoded,
+       }
+}
+
+func NewEmptyMetaString() *MetaString {
+       return NewMetaString("", UTF_8, 0, 0, []byte{})
+}
+
 func (ms *MetaString) GetInputString() string { return ms.inputString }
 
 func (ms *MetaString) GetEncoding() Encoding { return ms.encoding }
@@ -47,6 +63,15 @@ func (ms *MetaString) GetSpecialChar2() byte { return 
ms.specialChar2 }
 
 func (ms *MetaString) GetEncodedBytes() []byte { return ms.encodedBytes }
 
+// EncodingFromByte maps a byte value to an Encoding
+func EncodingFromByte(b byte) (Encoding, error) {
+       switch Encoding(b) {
+       case UTF_8, LOWER_SPECIAL, LOWER_UPPER_DIGIT_SPECIAL, 
FIRST_TO_LOWER_SPECIAL, ALL_TO_LOWER_SPECIAL:
+               return Encoding(b), nil
+       }
+       return 0, errors.New("Encoding flag not recognized: " + string(b))
+}
+
 // StripLastChar return true if last char should be stripped
 func (ms *MetaString) StripLastChar() bool {
        if ms.encoding == UTF_8 || ms.encodedBytes == nil {
diff --git a/go/fury/type.go b/go/fury/type.go
index 36cfbd9b..f7d577cc 100644
--- a/go/fury/type.go
+++ b/go/fury/type.go
@@ -274,7 +274,7 @@ type typeResolver struct {
        dynamicWriteStringID  int32
 
        // Class registries
-       typesInfo           map[string]TypeInfo
+       typesInfo           map[reflect.Type]TypeInfo
        nsTypeToTypeInfo    map[nsTypeKey]TypeInfo
        namedTypeToTypeInfo map[namedTypeKey]TypeInfo
 
@@ -310,7 +310,7 @@ func newTypeResolver(fury *Fury) *typeResolver {
                typeIDCounter:         300,
                dynamicWriteStringID:  0,
 
-               typesInfo:           make(map[string]TypeInfo),
+               typesInfo:           make(map[reflect.Type]TypeInfo),
                nsTypeToTypeInfo:    make(map[nsTypeKey]TypeInfo),
                namedTypeToTypeInfo: make(map[namedTypeKey]TypeInfo),
 
@@ -453,7 +453,7 @@ func (r *typeResolver) getSerializerByTypeTag(typeTag 
string) (Serializer, error
 
 func (r *typeResolver) getTypeInfo(value reflect.Value, create bool) 
(TypeInfo, error) {
        // First check if type info exists in cache
-       typeString := value.Type().String()
+       typeString := value.Type()
        if info, ok := r.typesInfo[typeString]; ok {
                if info.Serializer == nil {
                        // Lazy initialize serializer if not created yet
@@ -578,8 +578,7 @@ func (r *typeResolver) registerType(
        }
 
        // Update resolver caches:
-       tname := typ.String()
-       r.typesInfo[tname] = typeInfo // Cache by type string
+       r.typesInfo[typ] = typeInfo // Cache by type string
 
        if typeName != "" {
                // Cache by namespace/name pair


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(fury) branch main updated: perf(go): Optimize the type registration system && Reduce useless code (#2262)

Reply via email to