This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch sidx/query in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit ce83f369a23c3264cda59b79178f163f85335197 Author: Gao Hongtao <[email protected]> AuthorDate: Wed Aug 27 10:40:54 2025 +0800 Refactor tag data compression: Remove compression and decompression functions for tag data, streamlining the handling of tag metadata. Update related tests to reflect the removal of compression logic, ensuring clarity and maintaining functionality. --- banyand/internal/sidx/query_result.go | 2 +- banyand/internal/sidx/tag.go | 22 ------- banyand/internal/sidx/tag_test.go | 116 ---------------------------------- 3 files changed, 1 insertion(+), 139 deletions(-) diff --git a/banyand/internal/sidx/query_result.go b/banyand/internal/sidx/query_result.go index a8f5eaca..140e2012 100644 --- a/banyand/internal/sidx/query_result.go +++ b/banyand/internal/sidx/query_result.go @@ -211,7 +211,7 @@ func (qr *queryResult) loadBlockData(tmpBlock *block, p *part, bm *blockMetadata bb2.Buf = bytes.ResizeOver(bb2.Buf[:0], int(bm.dataBlock.size)) fs.MustReadData(p.data, int64(bm.dataBlock.offset), bb2.Buf) - dataBuf, err := zstd.Decompress(nil, bb2.Buf) + dataBuf, err := zstd.Decompress(bb.Buf[:0], bb2.Buf) if err != nil { return false } diff --git a/banyand/internal/sidx/tag.go b/banyand/internal/sidx/tag.go index 6dc46bd8..5cc2d333 100644 --- a/banyand/internal/sidx/tag.go +++ b/banyand/internal/sidx/tag.go @@ -22,7 +22,6 @@ import ( "fmt" "github.com/apache/skywalking-banyandb/banyand/internal/encoding" - "github.com/apache/skywalking-banyandb/pkg/compress/zstd" pkgencoding "github.com/apache/skywalking-banyandb/pkg/encoding" "github.com/apache/skywalking-banyandb/pkg/filter" pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1" @@ -133,27 +132,6 @@ func (tm *tagMetadata) reset() { tm.max = nil } -const ( - // defaultCompressionLevel for zstd compression. - defaultCompressionLevel = 3 -) - -// compressTagData compresses tag data using zstd compression. -func compressTagData(data []byte) []byte { - if len(data) == 0 { - return nil - } - return zstd.Compress(nil, data, defaultCompressionLevel) -} - -// decompressTagData decompresses tag data using zstd decompression. -func decompressTagData(compressedData []byte) ([]byte, error) { - if len(compressedData) == 0 { - return nil, nil - } - return zstd.Decompress(nil, compressedData) -} - // generateBloomFilter gets a bloom filter from pool or creates new. func generateBloomFilter(expectedElements int) *filter.BloomFilter { v := bloomFilterPool.Get() diff --git a/banyand/internal/sidx/tag_test.go b/banyand/internal/sidx/tag_test.go index ea70c558..85e940a9 100644 --- a/banyand/internal/sidx/tag_test.go +++ b/banyand/internal/sidx/tag_test.go @@ -544,122 +544,6 @@ func TestRoundTripIntegrity(t *testing.T) { }) } -func TestTagCompression(t *testing.T) { - t.Run("compress and decompress tag data", func(t *testing.T) { - originalData := []byte("this is some test data that should be compressed and decompressed properly") - - // Compress - compressed := compressTagData(originalData) - assert.NotNil(t, compressed) - assert.NotEqual(t, originalData, compressed) - - // Decompress - decompressed, err := decompressTagData(compressed) - require.NoError(t, err) - assert.Equal(t, originalData, decompressed) - }) - - t.Run("compress empty data", func(t *testing.T) { - compressed := compressTagData(nil) - assert.Nil(t, compressed) - - compressed = compressTagData([]byte{}) - assert.Nil(t, compressed) - }) - - t.Run("decompress empty data", func(t *testing.T) { - decompressed, err := decompressTagData(nil) - require.NoError(t, err) - assert.Nil(t, decompressed) - - decompressed, err = decompressTagData([]byte{}) - require.NoError(t, err) - assert.Nil(t, decompressed) - }) -} - -func TestTagValuesCompression(t *testing.T) { - t.Run("encode and decode compressed values", func(t *testing.T) { - values := [][]byte{ - []byte("this is a longer string that should compress well"), - []byte("another long string with repeated words repeated words"), - []byte("yet another string for compression testing purposes"), - } - - // Encode with automatic compression for string data - compressed, err := EncodeTagValues(values, pbv1.ValueTypeStr) - require.NoError(t, err) - assert.NotNil(t, compressed) - - // Decode compressed data - decompressed, err := DecodeTagValues(compressed, pbv1.ValueTypeStr, len(values)) - require.NoError(t, err) - assert.Equal(t, len(values), len(decompressed)) - - for i, expected := range values { - assert.Equal(t, expected, decompressed[i]) - } - }) - - t.Run("compression works for repetitive data", func(t *testing.T) { - // Create repetitive data that should compress well - repetitiveData := make([][]byte, 100) - for i := range repetitiveData { - repetitiveData[i] = []byte("repeated_data_pattern_that_compresses_well") - } - - // Encode with automatic compression - compressed, err := EncodeTagValues(repetitiveData, pbv1.ValueTypeStr) - require.NoError(t, err) - - // Verify decompression works - decompressed, err := DecodeTagValues(compressed, pbv1.ValueTypeStr, len(repetitiveData)) - require.NoError(t, err) - assert.Equal(t, repetitiveData, decompressed) - }) -} - -func TestCompressionRoundTrip(t *testing.T) { - testCases := []struct { - name string - values [][]byte - }{ - { - name: "small values", - values: [][]byte{[]byte("a"), []byte("b"), []byte("c")}, - }, - { - name: "large values", - values: [][]byte{ - []byte("this is a very long string that contains a lot of data and should compress well when using zstd compression algorithm"), - []byte("another long string with different content but still should benefit from compression due to common patterns"), - }, - }, - { - name: "mixed size values", - values: [][]byte{ - []byte("short"), - []byte("this is a medium length string"), - []byte("this is a very very very long string that goes on and on with lots of repeated words and patterns that compression algorithms love to work with"), - []byte("x"), - }, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - // Test round trip: values -> encoded -> decoded - encoded, err := EncodeTagValues(tc.values, pbv1.ValueTypeStr) - require.NoError(t, err) - - decoded, err := DecodeTagValues(encoded, pbv1.ValueTypeStr, len(tc.values)) - require.NoError(t, err) - - assert.Equal(t, tc.values, decoded) - }) - } -} - func TestBloomFilterEncoding(t *testing.T) { t.Run("encode and decode bloom filter", func(t *testing.T) { // Create a bloom filter and add some data
