This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch sidx/query in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit 0905e101f587ce3a1de2b5e13175da3c9741b4e4 Author: Gao Hongtao <[email protected]> AuthorDate: Wed Aug 27 10:21:53 2025 +0800 Refactor tag data handling: Update the writing and reading of tag data to eliminate compression, improving efficiency. Adjust related comments and tests to reflect the changes in data processing, ensuring clarity and robust functionality. --- banyand/internal/sidx/block.go | 7 +++---- banyand/internal/sidx/part.go | 11 +++-------- banyand/internal/sidx/query_result.go | 9 ++------- banyand/internal/sidx/tag.go | 6 ------ banyand/internal/sidx/tag_test.go | 7 ------- 5 files changed, 8 insertions(+), 32 deletions(-) diff --git a/banyand/internal/sidx/block.go b/banyand/internal/sidx/block.go index 24be89d9..ef09fe9f 100644 --- a/banyand/internal/sidx/block.go +++ b/banyand/internal/sidx/block.go @@ -288,11 +288,10 @@ func (b *block) mustWriteTag(tagName string, td *tagData, bm *blockMetadata, ww panic(fmt.Sprintf("failed to encode tag values: %v", err)) } - // Compress and write tag data - compressedData := zstd.Compress(nil, encodedData, 1) + // Write tag data without compression tm.dataBlock.offset = tdw.bytesWritten - tm.dataBlock.size = uint64(len(compressedData)) - tdw.MustWrite(compressedData) + tm.dataBlock.size = uint64(len(encodedData)) + tdw.MustWrite(encodedData) // Write bloom filter if indexed if td.indexed && td.filter != nil { diff --git a/banyand/internal/sidx/part.go b/banyand/internal/sidx/part.go index 5753a300..fa355988 100644 --- a/banyand/internal/sidx/part.go +++ b/banyand/internal/sidx/part.go @@ -400,17 +400,12 @@ func (p *part) readBlockTags(tagName string, bm *blockMetadata, elems *elements) } defer releaseTagMetadata(tm) - // Read and decompress tag data + // Read tag data tdData := make([]byte, tm.dataBlock.size) fs.MustReadData(tdReader, int64(tm.dataBlock.offset), tdData) - decompressedData, err := zstd.Decompress(nil, tdData) - if err != nil { - return fmt.Errorf("cannot decompress tag data: %w", err) - } - - // Decode tag values - tagValues, err := DecodeTagValues(decompressedData, tm.valueType, int(bm.count)) + // Decode tag values directly (no compression) + tagValues, err := DecodeTagValues(tdData, tm.valueType, int(bm.count)) if err != nil { return fmt.Errorf("cannot decode tag values: %w", err) } diff --git a/banyand/internal/sidx/query_result.go b/banyand/internal/sidx/query_result.go index dfe07d80..a8f5eaca 100644 --- a/banyand/internal/sidx/query_result.go +++ b/banyand/internal/sidx/query_result.go @@ -303,13 +303,8 @@ func (qr *queryResult) loadTagData(tmpBlock *block, p *part, tagName string, tag bb2.Buf = bytes.ResizeOver(bb2.Buf[:0], int(tm.dataBlock.size)) fs.MustReadData(tdReader, int64(tm.dataBlock.offset), bb2.Buf) - decompressedData, err := zstd.Decompress(nil, bb2.Buf) - if err != nil { - return false - } - - // Decode tag values - tagValues, err := DecodeTagValues(decompressedData, tm.valueType, count) + // Decode tag values directly (no compression) + tagValues, err := DecodeTagValues(bb2.Buf, tm.valueType, count) if err != nil { return false } diff --git a/banyand/internal/sidx/tag.go b/banyand/internal/sidx/tag.go index d4371992..6dc46bd8 100644 --- a/banyand/internal/sidx/tag.go +++ b/banyand/internal/sidx/tag.go @@ -44,7 +44,6 @@ type tagMetadata struct { filterBlock dataBlock // Offset/size in .tf file valueType pbv1.ValueType indexed bool - compressed bool } // tagData represents the runtime data for a tag with filtering capabilities. @@ -128,7 +127,6 @@ func (tm *tagMetadata) reset() { tm.name = "" tm.valueType = pbv1.ValueTypeUnknown tm.indexed = false - tm.compressed = false tm.dataBlock = dataBlock{} tm.filterBlock = dataBlock{} tm.min = nil @@ -310,9 +308,6 @@ func (tm *tagMetadata) marshal(dst []byte) []byte { if tm.indexed { flags |= 1 } - if tm.compressed { - flags |= 2 - } dst = append(dst, flags) dst = pkgencoding.EncodeBytes(dst, tm.min) @@ -348,7 +343,6 @@ func (tm *tagMetadata) unmarshal(src []byte) ([]byte, error) { flags := src[0] src = src[1:] tm.indexed = (flags & 1) != 0 - tm.compressed = (flags & 2) != 0 src, tm.min, err = pkgencoding.DecodeBytes(src) if err != nil { diff --git a/banyand/internal/sidx/tag_test.go b/banyand/internal/sidx/tag_test.go index a13c699e..ea70c558 100644 --- a/banyand/internal/sidx/tag_test.go +++ b/banyand/internal/sidx/tag_test.go @@ -300,7 +300,6 @@ func TestTagMetadataOperations(t *testing.T) { original.name = "test_tag" original.valueType = pbv1.ValueTypeInt64 original.indexed = true - original.compressed = false original.dataBlock = dataBlock{offset: 100, size: 500} original.filterBlock = dataBlock{offset: 600, size: 200} original.min = []byte{0x01, 0x02} @@ -319,7 +318,6 @@ func TestTagMetadataOperations(t *testing.T) { assert.Equal(t, original.name, unmarshaled.name) assert.Equal(t, original.valueType, unmarshaled.valueType) assert.Equal(t, original.indexed, unmarshaled.indexed) - assert.Equal(t, original.compressed, unmarshaled.compressed) assert.Equal(t, original.dataBlock, unmarshaled.dataBlock) assert.Equal(t, original.filterBlock, unmarshaled.filterBlock) assert.Equal(t, original.min, unmarshaled.min) @@ -342,7 +340,6 @@ func TestTagMetadataOperations(t *testing.T) { assert.Equal(t, tm.name, unmarshaled.name) assert.Equal(t, tm.valueType, unmarshaled.valueType) assert.False(t, unmarshaled.indexed) - assert.False(t, unmarshaled.compressed) assert.Nil(t, unmarshaled.min) assert.Nil(t, unmarshaled.max) }) @@ -354,7 +351,6 @@ func TestTagMetadataOperations(t *testing.T) { tm.name = "test_tag" tm.valueType = pbv1.ValueTypeInt64 tm.indexed = true - tm.compressed = true tm.dataBlock = dataBlock{offset: 100, size: 500} tm.filterBlock = dataBlock{offset: 600, size: 200} tm.min = []byte("min") @@ -367,7 +363,6 @@ func TestTagMetadataOperations(t *testing.T) { assert.Equal(t, "", tm.name) assert.Equal(t, pbv1.ValueTypeUnknown, tm.valueType) assert.False(t, tm.indexed) - assert.False(t, tm.compressed) assert.Equal(t, dataBlock{}, tm.dataBlock) assert.Equal(t, dataBlock{}, tm.filterBlock) assert.Nil(t, tm.min) @@ -502,7 +497,6 @@ func TestRoundTripIntegrity(t *testing.T) { original.name = "integration_tag" original.valueType = pbv1.ValueTypeInt64 original.indexed = true - original.compressed = true original.dataBlock = dataBlock{offset: 1000, size: 2000} original.filterBlock = dataBlock{offset: 3000, size: 500} @@ -535,7 +529,6 @@ func TestRoundTripIntegrity(t *testing.T) { assert.Equal(t, original.name, unmarshaledMetadata.name) assert.Equal(t, original.valueType, unmarshaledMetadata.valueType) assert.Equal(t, original.indexed, unmarshaledMetadata.indexed) - assert.Equal(t, original.compressed, unmarshaledMetadata.compressed) // Verify values integrity assert.Equal(t, len(encodedValues), len(unmarshaledValues))
