This is an automated email from the ASF dual-hosted git repository.
pandalee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 9cdf84e7 perf(go): Optimize the type registration system && Reduce
useless code (#2262)
9cdf84e7 is described below
commit 9cdf84e7613bdae031757d45d699259b534d0e33
Author: PAN <[email protected]>
AuthorDate: Thu May 29 01:41:29 2025 +0800
perf(go): Optimize the type registration system && Reduce useless code
(#2262)
<!--
**Thanks for contributing to Fury.**
**If this is your first time opening a PR on fury, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fury (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
- Fury has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->
## What does this PR do?
1、Optimize the type registration system,Use type instead of String to
reduce copying,Reduce useless code
2、Add the buffer tool. For subsequent use, continue to optimize
metastringbytes
<!-- Describe the purpose of this PR. -->
## Related issues
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
go/fury/buffer.go | 168 ++++++++++++++++++++++++++++++++++++++++++++
go/fury/meta/meta_string.go | 25 +++++++
go/fury/type.go | 9 ++-
3 files changed, 197 insertions(+), 5 deletions(-)
diff --git a/go/fury/buffer.go b/go/fury/buffer.go
index 5b7d2b61..294d801a 100644
--- a/go/fury/buffer.go
+++ b/go/fury/buffer.go
@@ -568,3 +568,171 @@ func (b *ByteBuffer) readVarUint32Slow() uint32 {
func (b *ByteBuffer) PutUint8(writerIndex int, value uint8) {
b.data[writerIndex] = byte(value)
}
+
+// WriteVarUint32Small7 writes a uint32 in variable-length small-7 format
+func (b *ByteBuffer) WriteVarUint32Small7(value uint32) int {
+ b.grow(8)
+ if value>>7 == 0 {
+ b.data[b.writerIndex] = byte(value)
+ b.writerIndex++
+ return 1
+ }
+ return b.continueWriteVarUint32Small7(value)
+}
+
+func (b *ByteBuffer) continueWriteVarUint32Small7(value uint32) int {
+ encoded := uint64(value & 0x7F)
+ encoded |= uint64((value&0x3f80)<<1) | 0x80
+ idx := b.writerIndex
+ if value>>14 == 0 {
+ b.unsafePutInt32(idx, int32(encoded))
+ b.writerIndex += 2
+ return 2
+ }
+ d := b.continuePutVarInt36(idx, encoded, uint64(value))
+ b.writerIndex += d
+ return d
+}
+
+func (b *ByteBuffer) continuePutVarInt36(index int, encoded, value uint64) int
{
+ // bits 14
+ encoded |= ((value & 0x1fc000) << 2) | 0x8000
+ if value>>21 == 0 {
+ b.unsafePutInt32(index, int32(encoded))
+ return 3
+ }
+ // bits 21
+ encoded |= ((value & 0xfe00000) << 3) | 0x800000
+ if value>>28 == 0 {
+ b.unsafePutInt32(index, int32(encoded))
+ return 4
+ }
+ // bits 28
+ encoded |= ((value & 0xff0000000) << 4) | 0x80000000
+ b.unsafePutInt64(index, encoded)
+ return 5
+}
+
+func (b *ByteBuffer) unsafePutInt32(index int, v int32) {
+ binary.LittleEndian.PutUint32(b.data[index:], uint32(v))
+}
+
+func (b *ByteBuffer) unsafePutInt64(index int, v uint64) {
+ binary.LittleEndian.PutUint64(b.data[index:], v)
+}
+
+// ByteBuffer methods for variable-length integers
+func (b *ByteBuffer) ReadVarUint32Small7() int {
+ readIdx := b.readerIndex
+ if len(b.data)-readIdx > 0 {
+ v := b.data[readIdx]
+ readIdx++
+ if v&0x80 == 0 {
+ b.readerIndex = readIdx
+ return int(v)
+ }
+ }
+ return b.readVarUint32Small14()
+}
+
+func (b *ByteBuffer) readVarUint32Small14() int {
+ readIdx := b.readerIndex
+ if len(b.data)-readIdx >= 5 {
+ four := b.unsafeGetInt32(readIdx)
+ readIdx++
+ value := four & 0x7F
+ if four&0x80 != 0 {
+ readIdx++
+ value |= (four >> 1) & 0x3f80
+ if four&0x8000 != 0 {
+ return b.continueReadVarUint32(readIdx, four,
value)
+ }
+ }
+ b.readerIndex = readIdx
+ return value
+ }
+ return int(b.readVarUint36Slow())
+}
+
+func (b *ByteBuffer) continueReadVarUint32(readIdx, bulkRead, value int) int {
+ readIdx++
+ value |= (bulkRead >> 2) & 0x1fc000
+ if bulkRead&0x800000 != 0 {
+ readIdx++
+ value |= (bulkRead >> 3) & 0xfe00000
+ if bulkRead&0x80000000 != 0 {
+ v := b.data[readIdx]
+ readIdx++
+ value |= int(v&0x7F) << 28
+ }
+ }
+ b.readerIndex = readIdx
+ return value
+}
+
+func (b *ByteBuffer) readVarUint36Slow() uint64 {
+ // unrolled loop
+ b0, _ := b.ReadByte()
+ result := uint64(b0 & 0x7F)
+ if b0&0x80 != 0 {
+ b1, _ := b.ReadByte()
+ result |= uint64(b1&0x7F) << 7
+ if b1&0x80 != 0 {
+ b2, _ := b.ReadByte()
+ result |= uint64(b2&0x7F) << 14
+ if b2&0x80 != 0 {
+ b3, _ := b.ReadByte()
+ result |= uint64(b3&0x7F) << 21
+ if b3&0x80 != 0 {
+ b4, _ := b.ReadByte()
+ result |= uint64(b4) << 28
+ }
+ }
+ }
+ }
+ return result
+}
+
+// unsafeGetInt32 reads little-endian int32 at index
+func (b *ByteBuffer) unsafeGetInt32(idx int) int {
+ return int(int32(binary.LittleEndian.Uint32(b.data[idx:])))
+}
+
+// IncreaseReaderIndex advances readerIndex
+func (b *ByteBuffer) IncreaseReaderIndex(n int) {
+ b.readerIndex += n
+}
+
+// ReadBytesAsInt64 reads up to 8 bytes and returns as uint64
+// fast path using underlying 64-bit read
+func (b *ByteBuffer) ReadBytesAsInt64(length int) uint64 {
+ readerIdx := b.readerIndex
+ remaining := len(b.data) - readerIdx
+ if remaining >= length {
+ // fast: read full 8 bytes then mask
+ v := binary.LittleEndian.Uint64(b.data[readerIdx:])
+ b.readerIndex = readerIdx + length
+ // mask off unused high bytes
+ mask := uint64(0xffffffffffffffff) >> uint((8-length)*8)
+ return v & mask
+ }
+ return b.slowReadBytesAsInt64(remaining, length)
+}
+
+func (b *ByteBuffer) slowReadBytesAsInt64(remaining, length int) uint64 {
+ // fill buffer omitted: assume data available
+ readerIdx := b.readerIndex
+ b.readerIndex = readerIdx + length
+ var result uint64
+ for i := 0; i < length; i++ {
+ result |= uint64(b.data[readerIdx+i]&0xff) << (i * 8)
+ }
+ return result
+}
+
+// ReadBytes reads n bytes
+func (b *ByteBuffer) ReadBytes(n int) []byte {
+ p := b.data[b.readerIndex : b.readerIndex+n]
+ b.readerIndex += n
+ return p
+}
diff --git a/go/fury/meta/meta_string.go b/go/fury/meta/meta_string.go
index 429e6e58..3c72a64c 100644
--- a/go/fury/meta/meta_string.go
+++ b/go/fury/meta/meta_string.go
@@ -17,6 +17,8 @@
package meta
+import "errors"
+
// Encoding Algorithms Flags
type Encoding uint8
@@ -37,6 +39,20 @@ type MetaString struct {
encodedBytes []byte // serialized data
}
+func NewMetaString(input string, encoding Encoding, special1, special2 byte,
encoded []byte) *MetaString {
+ return &MetaString{
+ inputString: input,
+ encoding: encoding,
+ specialChar1: special1,
+ specialChar2: special2,
+ encodedBytes: encoded,
+ }
+}
+
+func NewEmptyMetaString() *MetaString {
+ return NewMetaString("", UTF_8, 0, 0, []byte{})
+}
+
func (ms *MetaString) GetInputString() string { return ms.inputString }
func (ms *MetaString) GetEncoding() Encoding { return ms.encoding }
@@ -47,6 +63,15 @@ func (ms *MetaString) GetSpecialChar2() byte { return
ms.specialChar2 }
func (ms *MetaString) GetEncodedBytes() []byte { return ms.encodedBytes }
+// EncodingFromByte maps a byte value to an Encoding
+func EncodingFromByte(b byte) (Encoding, error) {
+ switch Encoding(b) {
+ case UTF_8, LOWER_SPECIAL, LOWER_UPPER_DIGIT_SPECIAL,
FIRST_TO_LOWER_SPECIAL, ALL_TO_LOWER_SPECIAL:
+ return Encoding(b), nil
+ }
+ return 0, errors.New("Encoding flag not recognized: " + string(b))
+}
+
// StripLastChar return true if last char should be stripped
func (ms *MetaString) StripLastChar() bool {
if ms.encoding == UTF_8 || ms.encodedBytes == nil {
diff --git a/go/fury/type.go b/go/fury/type.go
index 36cfbd9b..f7d577cc 100644
--- a/go/fury/type.go
+++ b/go/fury/type.go
@@ -274,7 +274,7 @@ type typeResolver struct {
dynamicWriteStringID int32
// Class registries
- typesInfo map[string]TypeInfo
+ typesInfo map[reflect.Type]TypeInfo
nsTypeToTypeInfo map[nsTypeKey]TypeInfo
namedTypeToTypeInfo map[namedTypeKey]TypeInfo
@@ -310,7 +310,7 @@ func newTypeResolver(fury *Fury) *typeResolver {
typeIDCounter: 300,
dynamicWriteStringID: 0,
- typesInfo: make(map[string]TypeInfo),
+ typesInfo: make(map[reflect.Type]TypeInfo),
nsTypeToTypeInfo: make(map[nsTypeKey]TypeInfo),
namedTypeToTypeInfo: make(map[namedTypeKey]TypeInfo),
@@ -453,7 +453,7 @@ func (r *typeResolver) getSerializerByTypeTag(typeTag
string) (Serializer, error
func (r *typeResolver) getTypeInfo(value reflect.Value, create bool)
(TypeInfo, error) {
// First check if type info exists in cache
- typeString := value.Type().String()
+ typeString := value.Type()
if info, ok := r.typesInfo[typeString]; ok {
if info.Serializer == nil {
// Lazy initialize serializer if not created yet
@@ -578,8 +578,7 @@ func (r *typeResolver) registerType(
}
// Update resolver caches:
- tname := typ.String()
- r.typesInfo[tname] = typeInfo // Cache by type string
+ r.typesInfo[typ] = typeInfo // Cache by type string
if typeName != "" {
// Cache by namespace/name pair
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]