Re: [PR] Support merging tags with different types for stream [skywalking-banyandb]

via GitHub Wed, 07 Jan 2026 08:03:53 -0800


Copilot commented on code in PR #929:
URL: 
https://github.com/apache/skywalking-banyandb/pull/929#discussion_r2669050967



##########
banyand/internal/encoding/tag_encoder.go:
##########
@@ -358,3 +371,72 @@ func decodeDefaultTagValues(dst [][]byte, decoder 
*encoding.BytesBlockDecoder, b
        }
        return dst, nil
 }
+
+// TypedValue represents a value with its type information.
+type TypedValue struct {
+       Value []byte
+       Type  pbv1.ValueType
+}
+
+func encodeMixedTagValues(bb *bytes.Buffer, types []pbv1.ValueType, values 
[][]byte) {
+       if len(values) == 0 {
+               return
+       }
+
+       bb.Buf = append(bb.Buf[:0], byte(encoding.EncodeTypeTyped))
+       bb.Buf = encoding.VarUint64ToBytes(bb.Buf, uint64(len(values)))
+
+       typesBytes := make([]byte, len(types))
+       for i, t := range types {
+               typesBytes[i] = byte(t)
+       }
+       compressedTypes := encoding.EncodeBytesBlock(nil, [][]byte{typesBytes})
+       bb.Buf = encoding.VarUint64ToBytes(bb.Buf, uint64(len(compressedTypes)))
+       bb.Buf = append(bb.Buf, compressedTypes...)
+
+       bb.Buf = encoding.EncodeBytesBlock(bb.Buf, values)
+}

Review Comment:
   The encodeMixedTagValues function doesn't validate that the types slice has 
the same length as the values slice. If len(types) != len(values), the encoding 
will create a mismatch between the number of values and type information, which 
will cause decoding to fail with a panic. Consider adding a validation check or 
panic if the lengths don't match to catch bugs early.



##########
banyand/stream/block.go:
##########
@@ -704,94 +708,189 @@ func fastTagAppend(bi, b *blockPointer, offset int) 
error {
                                bi.tagFamilies[i].name, 
len(b.tagFamilies[i].tags), len(bi.tagFamilies[i].tags))
                }
                for j := range bi.tagFamilies[i].tags {
-                       if bi.tagFamilies[i].tags[j].name != 
b.tagFamilies[i].tags[j].name {
+                       existingTag, srcTag := &bi.tagFamilies[i].tags[j], 
&b.tagFamilies[i].tags[j]
+                       if existingTag.name != srcTag.name {
                                return fmt.Errorf("unexpected tag name for tag 
family %q: got %q; want %q",
-                                       bi.tagFamilies[i].name, 
b.tagFamilies[i].tags[j].name, bi.tagFamilies[i].tags[j].name)
+                                       bi.tagFamilies[i].name, srcTag.name, 
existingTag.name)
+                       }
+                       if existingTag.valueType != srcTag.valueType &&
+                               existingTag.valueType != pbv1.ValueTypeUnknown 
&&
+                               srcTag.valueType != pbv1.ValueTypeUnknown {
+                               return fmt.Errorf("type mismatch for tag %q: 
existing=%d, source=%d",
+                                       existingTag.name, 
existingTag.valueType, srcTag.valueType)
+                       }
+                       assertIdxAndOffset(srcTag.name, len(srcTag.values), 
b.idx, offset)
+                       if existingTag.valueType == pbv1.ValueTypeMixed || 
srcTag.valueType == pbv1.ValueTypeMixed {

Review Comment:
   The type mismatch check creates unnecessary fallbacks to fullTagAppend when 
types differ. The code checks if types don't match (line 716) and neither is 
Unknown (lines 717-718), then returns an error. However, line 723 immediately 
checks if either tag is Mixed to call appendAsMixed. This means when 
existingTag is ValueTypeStr and srcTag is ValueTypeInt64 (both non-Unknown, 
non-Mixed), the function returns an error on line 719 instead of converting to 
Mixed type. This forces an inefficient fallback to fullTagAppend even though 
appendAsMixed could handle this case. Consider removing or relaxing the type 
mismatch check since the code after it already handles type mismatches by 
converting to Mixed.
   ```suggestion
                        assertIdxAndOffset(srcTag.name, len(srcTag.values), 
b.idx, offset)
                        if shouldAppendAsMixed(existingTag, srcTag) {
   ```



##########
banyand/internal/encoding/tag_encoder.go:
##########
@@ -358,3 +371,72 @@ func decodeDefaultTagValues(dst [][]byte, decoder 
*encoding.BytesBlockDecoder, b
        }
        return dst, nil
 }
+
+// TypedValue represents a value with its type information.
+type TypedValue struct {
+       Value []byte
+       Type  pbv1.ValueType
+}
+
+func encodeMixedTagValues(bb *bytes.Buffer, types []pbv1.ValueType, values 
[][]byte) {
+       if len(values) == 0 {
+               return
+       }
+
+       bb.Buf = append(bb.Buf[:0], byte(encoding.EncodeTypeTyped))
+       bb.Buf = encoding.VarUint64ToBytes(bb.Buf, uint64(len(values)))
+
+       typesBytes := make([]byte, len(types))
+       for i, t := range types {
+               typesBytes[i] = byte(t)
+       }
+       compressedTypes := encoding.EncodeBytesBlock(nil, [][]byte{typesBytes})
+       bb.Buf = encoding.VarUint64ToBytes(bb.Buf, uint64(len(compressedTypes)))
+       bb.Buf = append(bb.Buf, compressedTypes...)
+
+       bb.Buf = encoding.EncodeBytesBlock(bb.Buf, values)
+}
+
+func decodeMixedTagValues(dst [][]byte, dstTypes []pbv1.ValueType, decoder 
*encoding.BytesBlockDecoder,
+       bb *bytes.Buffer, count uint64,
+) ([][]byte, []pbv1.ValueType, error) {

Review Comment:
   The function doesn't check if the buffer has enough bytes before accessing 
bb.Buf[0] on line 403. If bb.Buf is empty, this will cause a panic. While the 
caller is expected to check len(bb.Buf) before calling this function (as seen 
in DecodeTagValues line 116), defensive programming suggests adding a bounds 
check here for safety.
   ```suggestion
   ) ([][]byte, []pbv1.ValueType, error) {
        if bb == nil || len(bb.Buf) == 0 {
                logger.Panicf("buffer is nil or empty in decodeMixedTagValues")
        }
   ```



##########
banyand/internal/encoding/tag_encoder.go:
##########
@@ -100,25 +101,37 @@ func EncodeTagValues(bb *bytes.Buffer, values [][]byte, 
valueType pbv1.ValueType
                return encodeInt64TagValues(bb, values)
        case pbv1.ValueTypeFloat64:
                return encodeFloat64TagValues(bb, values)
+       case pbv1.ValueTypeMixed:
+               encodeMixedTagValues(bb, types, values)
+               return encoding.EncodeTypeTyped, nil
        default:
                return encodeDefaultTagValues(bb, values)
        }
 }
 
 // DecodeTagValues decodes tag values based on the value type.
-func DecodeTagValues(dst [][]byte, decoder *encoding.BytesBlockDecoder, bb 
*bytes.Buffer, valueType pbv1.ValueType, count int) ([][]byte, error) {
+func DecodeTagValues(dst [][]byte, dstTypes []pbv1.ValueType, decoder 
*encoding.BytesBlockDecoder,
+       bb *bytes.Buffer, valueType pbv1.ValueType, count int,
+) ([][]byte, []pbv1.ValueType, error) {
        if len(bb.Buf) == 0 {
-               return nil, nil
+               if valueType == pbv1.ValueTypeMixed {
+                       return nil, []pbv1.ValueType{pbv1.ValueTypeUnknown}, nil
+               }

Review Comment:
   The function returns an incorrect second value when the buffer is empty and 
valueType is ValueTypeMixed. It returns a slice containing a single 
ValueTypeUnknown element, but this could cause issues if the caller expects nil 
or an empty slice for no data. Consider returning nil or an empty slice instead 
to be consistent with the non-mixed type case.
   ```suggestion
   
   ```



##########
banyand/internal/encoding/tag_encoder.go:
##########
@@ -358,3 +371,72 @@ func decodeDefaultTagValues(dst [][]byte, decoder 
*encoding.BytesBlockDecoder, b
        }
        return dst, nil
 }
+
+// TypedValue represents a value with its type information.
+type TypedValue struct {
+       Value []byte
+       Type  pbv1.ValueType
+}
+

Review Comment:
   The TypedValue struct is defined but never used anywhere in the codebase. It 
appears to be dead code left over from development. Consider removing it to 
keep the code clean.
   ```suggestion
   
   ```



##########
banyand/cmd/dump/trace.go:
##########
@@ -992,7 +992,8 @@ func matchesCriteria(tags map[string][]byte, tagTypes 
map[string]pbv1.ValueType,
                }
 
                valueType := tagTypes[name]
-               tagValue := convertTagValue(value, valueType)
+               tagValue, _ := convertTagValue(value, valueType)
+               // TODO: handle the error

Review Comment:
   The TODO comment indicates that error handling is intentionally skipped. 
When convertTagValue returns an error (such as for ValueTypeMixed), it's 
silently ignored. This could lead to incorrect behavior where mixed-type tags 
are silently dropped from the criteria matching. Consider either handling the 
error properly by logging it or by implementing support for mixed types in the 
convertTagValue function.
   ```suggestion
                tagValue, err := convertTagValue(value, valueType)
                if err != nil {
                        // Report conversion failures instead of silently 
ignoring them.
                        fmt.Fprintf(os.Stderr, "failed to convert tag %s: 
%v\n", name, err)
                        continue
                }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Support merging tags with different types for stream [skywalking-banyandb]

Reply via email to