This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new a9902be19 fix(go): fix struct value reference tracking bug (#2991)
a9902be19 is described below
commit a9902be19e54a7aaabd764ee079666bb48fa9a87
Author: Shawn Yang <[email protected]>
AuthorDate: Thu Dec 4 17:39:42 2025 +0800
fix(go): fix struct value reference tracking bug (#2991)
## Summary
- Go: Fixed struct value reference tracking - struct values are value
types that should not participate in reference tracking (was incorrectly
using JSON marshaling for content-based deduplication)
- Python: Fixed `reference()` to handle empty `read_ref_ids` when
`NotNullValueFlag` is received (prevents segfault during cross-language
deserialization)
## What was the problem?
Go was incorrectly treating struct **values** (not pointers) as if they
were reference types:
1. It used `json.Marshal()` to create a content-based cache key for
struct values
2. This was semantically wrong - Go struct values are copied, not shared
by reference
3. This was also slow due to JSON marshaling overhead
## Related issues
Closes #2990
## Test plan
- [x] All Go tests pass including cross-language tests
- [x] All Python struct tests pass
- [x] Tested with both Cython and pure Python modes
---
go/fory/reference.go | 34 +++++-----------------------------
python/pyfory/resolver.py | 8 ++++++++
python/pyfory/serialization.pyx | 8 ++++++++
3 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/go/fory/reference.go b/go/fory/reference.go
index bb44e4ab6..36aaf3015 100644
--- a/go/fory/reference.go
+++ b/go/fory/reference.go
@@ -18,7 +18,6 @@
package fory
import (
- "encoding/json"
"fmt"
"reflect"
"unsafe"
@@ -42,9 +41,6 @@ type RefResolver struct {
readObjects []reflect.Value
readRefIds []int32
readObject reflect.Value // last read object which is not a
reference
-
- // basicValueCache caches boxed struct pointers keyed by their JSON
representation to support reference tracking
- basicValueCache map[interface{}]reflect.Value
}
type refKey struct {
@@ -54,9 +50,8 @@ type refKey struct {
func newRefResolver(refTracking bool) *RefResolver {
refResolver := &RefResolver{
- refTracking: refTracking,
- writtenObjects: map[refKey]int32{},
- basicValueCache: map[interface{}]reflect.Value{},
+ refTracking: refTracking,
+ writtenObjects: map[refKey]int32{},
}
return refResolver
}
@@ -98,28 +93,9 @@ func (r *RefResolver) WriteRefOrNull(buffer *ByteBuffer,
value reflect.Value) (r
case reflect.Invalid:
isNil = true
case reflect.Struct:
- // Adds reference tracking for struct types
- raw, _ := json.Marshal(value.Interface())
- key := string(raw)
- boxed, ok := r.basicValueCache[key]
- if !ok {
- boxed = reflect.New(value.Type())
- boxed.Elem().Set(value)
- r.basicValueCache[key] = boxed
- }
- ptr := unsafe.Pointer(boxed.Pointer())
- refKey := refKey{pointer: ptr, length: 0}
- if writtenId, ok := r.writtenObjects[refKey]; ok {
- buffer.WriteInt8(RefFlag)
- buffer.WriteVarInt32(writtenId)
- return true, nil
- }
- newWriteRefId := len(r.writtenObjects)
- if newWriteRefId >= MaxInt32 {
- return false, fmt.Errorf("too many objects execced %d
to serialize", MaxInt32)
- }
- r.writtenObjects[refKey] = int32(newWriteRefId)
- buffer.WriteInt8(RefValueFlag)
+ // Struct values are value types, not reference types.
+ // They should not participate in reference tracking.
+ buffer.WriteInt8(NotNullValueFlag)
return false, nil
default:
// The object is being written for the first time.
diff --git a/python/pyfory/resolver.py b/python/pyfory/resolver.py
index 710efb7bd..d7e0a94d4 100644
--- a/python/pyfory/resolver.py
+++ b/python/pyfory/resolver.py
@@ -192,6 +192,9 @@ class MapRefResolver(RefResolver):
self.read_object = None
if head_flag == REF_VALUE_FLAG:
return self.preserve_ref_id()
+ # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference()
knows
+ # this object is not referenceable (it's a value type, not a
reference type)
+ self.read_ref_ids.append(-1)
# `head_flag` except `REF_FLAG` can be used as stub reference id
because we use
# `refId >= NOT_NULL_VALUE_FLAG` to read data.
return head_flag
@@ -201,6 +204,11 @@ class MapRefResolver(RefResolver):
def reference(self, obj):
ref_id = self.read_ref_ids.pop()
+ # When NOT_NULL_VALUE_FLAG was read instead of REF_VALUE_FLAG,
+ # -1 is pushed to read_ref_ids. This means the object is a value type
+ # (not a reference type), so we skip reference tracking.
+ if ref_id < 0:
+ return
self.set_read_object(ref_id, obj)
def get_read_object(self, id_=None):
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 9b46e940a..47161509e 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -201,6 +201,9 @@ cdef class MapRefResolver:
self.read_object = None
if head_flag == REF_VALUE_FLAG:
return self.preserve_ref_id()
+ # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference()
knows
+ # this object is not referenceable (it's a value type, not a
reference type)
+ self.read_ref_ids.push_back(-1)
return head_flag
cpdef inline int32_t last_preserved_ref_id(self):
@@ -213,6 +216,11 @@ cdef class MapRefResolver:
return
cdef int32_t ref_id = self.read_ref_ids.back()
self.read_ref_ids.pop_back()
+ # When NOT_NULL_VALUE_FLAG was read instead of REF_VALUE_FLAG,
+ # -1 is pushed to read_ref_ids. This means the object is a value type
+ # (not a reference type), so we skip reference tracking.
+ if ref_id < 0:
+ return
cdef c_bool need_inc = self.read_objects[ref_id] == NULL
if need_inc:
Py_INCREF(obj)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]