This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new a9902be19 fix(go): fix struct value reference tracking bug (#2991)
a9902be19 is described below

commit a9902be19e54a7aaabd764ee079666bb48fa9a87
Author: Shawn Yang <[email protected]>
AuthorDate: Thu Dec 4 17:39:42 2025 +0800

    fix(go): fix struct value reference tracking bug (#2991)
    
    ## Summary
    - Go: Fixed struct value reference tracking - struct values are value
    types that should not participate in reference tracking (was incorrectly
    using JSON marshaling for content-based deduplication)
    - Python: Fixed `reference()` to handle empty `read_ref_ids` when
    `NotNullValueFlag` is received (prevents segfault during cross-language
    deserialization)
    
    ## What was the problem?
    
    Go was incorrectly treating struct **values** (not pointers) as if they
    were reference types:
    1. It used `json.Marshal()` to create a content-based cache key for
    struct values
    2. This was semantically wrong - Go struct values are copied, not shared
    by reference
    3. This was also slow due to JSON marshaling overhead
    
    ## Related issues
    Closes #2990
    
    ## Test plan
    - [x] All Go tests pass including cross-language tests
    - [x] All Python struct tests pass
    - [x] Tested with both Cython and pure Python modes
---
 go/fory/reference.go            | 34 +++++-----------------------------
 python/pyfory/resolver.py       |  8 ++++++++
 python/pyfory/serialization.pyx |  8 ++++++++
 3 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/go/fory/reference.go b/go/fory/reference.go
index bb44e4ab6..36aaf3015 100644
--- a/go/fory/reference.go
+++ b/go/fory/reference.go
@@ -18,7 +18,6 @@
 package fory
 
 import (
-       "encoding/json"
        "fmt"
        "reflect"
        "unsafe"
@@ -42,9 +41,6 @@ type RefResolver struct {
        readObjects    []reflect.Value
        readRefIds     []int32
        readObject     reflect.Value // last read object which is not a 
reference
-
-       // basicValueCache caches boxed struct pointers keyed by their JSON 
representation to support reference tracking
-       basicValueCache map[interface{}]reflect.Value
 }
 
 type refKey struct {
@@ -54,9 +50,8 @@ type refKey struct {
 
 func newRefResolver(refTracking bool) *RefResolver {
        refResolver := &RefResolver{
-               refTracking:     refTracking,
-               writtenObjects:  map[refKey]int32{},
-               basicValueCache: map[interface{}]reflect.Value{},
+               refTracking:    refTracking,
+               writtenObjects: map[refKey]int32{},
        }
        return refResolver
 }
@@ -98,28 +93,9 @@ func (r *RefResolver) WriteRefOrNull(buffer *ByteBuffer, 
value reflect.Value) (r
        case reflect.Invalid:
                isNil = true
        case reflect.Struct:
-               // Adds reference tracking for struct types
-               raw, _ := json.Marshal(value.Interface())
-               key := string(raw)
-               boxed, ok := r.basicValueCache[key]
-               if !ok {
-                       boxed = reflect.New(value.Type())
-                       boxed.Elem().Set(value)
-                       r.basicValueCache[key] = boxed
-               }
-               ptr := unsafe.Pointer(boxed.Pointer())
-               refKey := refKey{pointer: ptr, length: 0}
-               if writtenId, ok := r.writtenObjects[refKey]; ok {
-                       buffer.WriteInt8(RefFlag)
-                       buffer.WriteVarInt32(writtenId)
-                       return true, nil
-               }
-               newWriteRefId := len(r.writtenObjects)
-               if newWriteRefId >= MaxInt32 {
-                       return false, fmt.Errorf("too many objects execced %d 
to serialize", MaxInt32)
-               }
-               r.writtenObjects[refKey] = int32(newWriteRefId)
-               buffer.WriteInt8(RefValueFlag)
+               // Struct values are value types, not reference types.
+               // They should not participate in reference tracking.
+               buffer.WriteInt8(NotNullValueFlag)
                return false, nil
        default:
                // The object is being written for the first time.
diff --git a/python/pyfory/resolver.py b/python/pyfory/resolver.py
index 710efb7bd..d7e0a94d4 100644
--- a/python/pyfory/resolver.py
+++ b/python/pyfory/resolver.py
@@ -192,6 +192,9 @@ class MapRefResolver(RefResolver):
             self.read_object = None
             if head_flag == REF_VALUE_FLAG:
                 return self.preserve_ref_id()
+            # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() 
knows
+            # this object is not referenceable (it's a value type, not a 
reference type)
+            self.read_ref_ids.append(-1)
         # `head_flag` except `REF_FLAG` can be used as stub reference id 
because we use
         # `refId >= NOT_NULL_VALUE_FLAG` to read data.
         return head_flag
@@ -201,6 +204,11 @@ class MapRefResolver(RefResolver):
 
     def reference(self, obj):
         ref_id = self.read_ref_ids.pop()
+        # When NOT_NULL_VALUE_FLAG was read instead of REF_VALUE_FLAG,
+        # -1 is pushed to read_ref_ids. This means the object is a value type
+        # (not a reference type), so we skip reference tracking.
+        if ref_id < 0:
+            return
         self.set_read_object(ref_id, obj)
 
     def get_read_object(self, id_=None):
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 9b46e940a..47161509e 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -201,6 +201,9 @@ cdef class MapRefResolver:
             self.read_object = None
             if head_flag == REF_VALUE_FLAG:
                 return self.preserve_ref_id()
+            # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() 
knows
+            # this object is not referenceable (it's a value type, not a 
reference type)
+            self.read_ref_ids.push_back(-1)
             return head_flag
 
     cpdef inline int32_t last_preserved_ref_id(self):
@@ -213,6 +216,11 @@ cdef class MapRefResolver:
             return
         cdef int32_t ref_id = self.read_ref_ids.back()
         self.read_ref_ids.pop_back()
+        # When NOT_NULL_VALUE_FLAG was read instead of REF_VALUE_FLAG,
+        # -1 is pushed to read_ref_ids. This means the object is a value type
+        # (not a reference type), so we skip reference tracking.
+        if ref_id < 0:
+            return
         cdef c_bool need_inc = self.read_objects[ref_id] == NULL
         if need_inc:
             Py_INCREF(obj)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to