This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch feat/bloom-having in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit d7f0d561479867bb9b1c351735ec1a32619f2ce9 Author: Gao Hongtao <[email protected]> AuthorDate: Wed Sep 24 11:26:08 2025 +0800 Add Having method to tagFilterOp and tagFamilyFilters for bloom filter checks - Implemented the Having method in tagFilterOp to check if any provided tag values might exist in the bloom filter, returning true if at least one value is found. - Added corresponding Having method in tagFamilyFilters to handle multiple tag family filters. - Included comprehensive unit tests for both methods to ensure correct functionality across various scenarios, including edge cases with empty lists and large input sizes. - Updated the index.Filter interface to include the Having method for consistency in filter operations. - Enhanced trace query handling with a new traceHavingFilter to support HAVING operations in trace queries. --- banyand/internal/sidx/tag_filter_op.go | 35 +++ banyand/internal/sidx/tag_filter_op_test.go | 326 +++++++++++++++++++++++++++ banyand/stream/tag_filter.go | 22 ++ banyand/stream/tag_filter_test.go | 311 +++++++++++++++++++++++++ pkg/index/index.go | 1 + pkg/query/logical/trace/index_filter.go | 30 ++- pkg/query/logical/trace/index_filter_test.go | 281 +++++++++++++++++++++++ 7 files changed, 1005 insertions(+), 1 deletion(-) diff --git a/banyand/internal/sidx/tag_filter_op.go b/banyand/internal/sidx/tag_filter_op.go index 8ed186cc..620f8aff 100644 --- a/banyand/internal/sidx/tag_filter_op.go +++ b/banyand/internal/sidx/tag_filter_op.go @@ -73,6 +73,41 @@ func (tfo *tagFilterOp) Eq(tagName string, tagValue string) bool { return true } +// Having checks if any of the provided tag values might exist in the bloom filter. +// It returns true if at least one value might be contained in the bloom filter. +func (tfo *tagFilterOp) Having(tagName string, tagValues []string) bool { + if tfo.blockMetadata == nil || tfo.part == nil { + return false + } + + // Check if the tag exists in the block + tagBlock, exists := tfo.blockMetadata.tagsBlocks[tagName] + if !exists { + return false + } + + // Get or create cached tag filter data + cache, err := tfo.getTagFilterCache(tagName, tagBlock) + if err != nil { + logger.Errorf("failed to get tag filter cache for %s: %v", tagName, err) + return true // Conservative approach - don't filter out + } + + // Use bloom filter to check if any value might exist + if cache.bloomFilter != nil { + for _, tagValue := range tagValues { + if cache.bloomFilter.MightContain([]byte(tagValue)) { + return true // Return true as soon as we find a potential match + } + } + // None of the values might exist in the bloom filter + return false + } + + // If no bloom filter, conservatively return true + return true +} + // Range checks if a tag is within a specific range using min/max metadata. func (tfo *tagFilterOp) Range(tagName string, rangeOpts index.RangeOpts) (bool, error) { if tfo.blockMetadata == nil || tfo.part == nil { diff --git a/banyand/internal/sidx/tag_filter_op_test.go b/banyand/internal/sidx/tag_filter_op_test.go index 1f5eff87..611c900b 100644 --- a/banyand/internal/sidx/tag_filter_op_test.go +++ b/banyand/internal/sidx/tag_filter_op_test.go @@ -19,6 +19,7 @@ package sidx import ( "bytes" + "fmt" "testing" "github.com/stretchr/testify/assert" @@ -491,6 +492,331 @@ func BenchmarkDecodeBloomFilterFromBytes(b *testing.B) { } } +func BenchmarkTagFilterOpHaving(b *testing.B) { + // Setup bloom filter with test data + bf := filter.NewBloomFilter(1000) + for i := 0; i < 500; i++ { + bf.Add([]byte(fmt.Sprintf("service-%d", i))) + } + + cache := &tagFilterCache{ + bloomFilter: bf, + valueType: pbv1.ValueTypeStr, + } + + tfo := &tagFilterOp{ + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "service": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagCache: map[string]*tagFilterCache{ + "service": cache, + }, + } + + // Test different sizes of input lists + b.Run("small list (5 items)", func(b *testing.B) { + testValues := []string{"service-1", "service-2", "service-3", "unknown-1", "unknown-2"} + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfo.Having("service", testValues) + } + }) + + b.Run("medium list (50 items)", func(b *testing.B) { + testValues := make([]string, 50) + for i := 0; i < 50; i++ { + if i < 25 { + testValues[i] = fmt.Sprintf("service-%d", i) + } else { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfo.Having("service", testValues) + } + }) + + b.Run("large list (500 items)", func(b *testing.B) { + testValues := make([]string, 500) + for i := 0; i < 500; i++ { + if i < 250 { + testValues[i] = fmt.Sprintf("service-%d", i) + } else { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfo.Having("service", testValues) + } + }) + + b.Run("early exit (match first)", func(b *testing.B) { + testValues := make([]string, 100) + testValues[0] = "service-1" // This will match + for i := 1; i < 100; i++ { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfo.Having("service", testValues) + } + }) + + b.Run("no matches", func(b *testing.B) { + testValues := make([]string, 100) + for i := 0; i < 100; i++ { + testValues[i] = fmt.Sprintf("definitely-not-there-%d", i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfo.Having("service", testValues) + } + }) +} + +func TestTagFilterOpHaving(t *testing.T) { + tests := []struct { + blockMetadata *blockMetadata + part *part + name string + tagName string + description string + tagValues []string + expectedResult bool + }{ + { + name: "nil block metadata", + blockMetadata: nil, + part: &part{}, + tagName: "service", + tagValues: []string{"order-service", "user-service"}, + expectedResult: false, + description: "should return false when blockMetadata is nil", + }, + { + name: "nil part", + blockMetadata: &blockMetadata{tagsBlocks: make(map[string]dataBlock)}, + part: nil, + tagName: "service", + tagValues: []string{"order-service", "user-service"}, + expectedResult: false, + description: "should return false when part is nil", + }, + { + name: "tag not in block", + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "other_tag": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagName: "service", + tagValues: []string{"order-service", "user-service"}, + expectedResult: false, + description: "should return false when tag doesn't exist in block", + }, + { + name: "tag exists but no cache data", + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "service": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagName: "service", + tagValues: []string{"order-service", "user-service"}, + expectedResult: true, + description: "should return true (conservative) when tag exists but cache fails", + }, + { + name: "empty tag values list", + blockMetadata: &blockMetadata{tagsBlocks: make(map[string]dataBlock)}, + part: &part{}, + tagName: "service", + tagValues: []string{}, + expectedResult: false, + description: "should return false when no tag values provided", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tfo := &tagFilterOp{ + blockMetadata: tt.blockMetadata, + part: tt.part, + tagCache: make(map[string]*tagFilterCache), + } + + result := tfo.Having(tt.tagName, tt.tagValues) + assert.Equal(t, tt.expectedResult, result, tt.description) + }) + } +} + +func TestTagFilterOpHavingWithBloomFilter(t *testing.T) { + // Create a bloom filter and add some test values + bf := filter.NewBloomFilter(100) + bf.Add([]byte("order-service")) + bf.Add([]byte("payment-service")) + bf.Add([]byte("inventory-service")) + + // Create cache with bloom filter + cache := &tagFilterCache{ + bloomFilter: bf, + valueType: pbv1.ValueTypeStr, + } + + tfo := &tagFilterOp{ + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "service": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagCache: map[string]*tagFilterCache{ + "service": cache, + }, + } + + tests := []struct { + name string + description string + tagValues []string + expectedResult bool + }{ + { + name: "all values might exist", + tagValues: []string{"order-service", "payment-service"}, + expectedResult: true, + description: "should return true when all values might exist in bloom filter", + }, + { + name: "some values might exist", + tagValues: []string{"order-service", "non-existent-service"}, + expectedResult: true, + description: "should return true when at least one value might exist in bloom filter", + }, + { + name: "no values might exist", + tagValues: []string{"non-existent-service-1", "non-existent-service-2"}, + expectedResult: false, + description: "should return false when no values might exist in bloom filter", + }, + { + name: "single value exists", + tagValues: []string{"inventory-service"}, + expectedResult: true, + description: "should return true when single value might exist in bloom filter", + }, + { + name: "single value does not exist", + tagValues: []string{"definitely-not-there"}, + expectedResult: false, + description: "should return false when single value doesn't exist in bloom filter", + }, + { + name: "empty values list", + tagValues: []string{}, + expectedResult: false, + description: "should return false when no values provided", + }, + { + name: "mixed case with first match", + tagValues: []string{"order-service", "unknown-1", "unknown-2"}, + expectedResult: true, + description: "should return true immediately when first value matches", + }, + { + name: "mixed case with last match", + tagValues: []string{"unknown-1", "unknown-2", "payment-service"}, + expectedResult: true, + description: "should return true when last value matches", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tfo.Having("service", tt.tagValues) + assert.Equal(t, tt.expectedResult, result, tt.description) + }) + } +} + +func TestTagFilterOpHavingWithoutBloomFilter(t *testing.T) { + // Create cache without bloom filter + cache := &tagFilterCache{ + bloomFilter: nil, + valueType: pbv1.ValueTypeStr, + } + + tfo := &tagFilterOp{ + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "service": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagCache: map[string]*tagFilterCache{ + "service": cache, + }, + } + + // When no bloom filter is available, should always return true (conservative) + result := tfo.Having("service", []string{"any-service", "another-service"}) + assert.True(t, result, "should return true when no bloom filter is available (conservative approach)") + + // Test with empty list too + result = tfo.Having("service", []string{}) + assert.True(t, result, "should return true even with empty list when no bloom filter (conservative approach)") +} + +func TestTagFilterOpHavingLargeList(t *testing.T) { + // Create a bloom filter with some values + bf := filter.NewBloomFilter(1000) + bf.Add([]byte("target-service")) + + cache := &tagFilterCache{ + bloomFilter: bf, + valueType: pbv1.ValueTypeStr, + } + + tfo := &tagFilterOp{ + blockMetadata: &blockMetadata{ + tagsBlocks: map[string]dataBlock{ + "service": {offset: 0, size: 100}, + }, + }, + part: &part{}, + tagCache: map[string]*tagFilterCache{ + "service": cache, + }, + } + + // Create a large list with the target at the end + largeList := make([]string, 1000) + for i := 0; i < 999; i++ { + largeList[i] = fmt.Sprintf("non-existent-service-%d", i) + } + largeList[999] = "target-service" + + result := tfo.Having("service", largeList) + assert.True(t, result, "should handle large lists and find target value") + + // Test with large list that has no matches + noMatchList := make([]string, 1000) + for i := 0; i < 1000; i++ { + noMatchList[i] = fmt.Sprintf("definitely-not-there-%d", i) + } + + result = tfo.Having("service", noMatchList) + assert.False(t, result, "should return false for large list with no matches") +} + func TestTagFilterOpErrorHandling(t *testing.T) { t.Run("invalid range bounds", func(t *testing.T) { cache := &tagFilterCache{ diff --git a/banyand/stream/tag_filter.go b/banyand/stream/tag_filter.go index 39fd3d8e..1f7925be 100644 --- a/banyand/stream/tag_filter.go +++ b/banyand/stream/tag_filter.go @@ -203,6 +203,28 @@ func (tfs *tagFamilyFilters) Range(tagName string, rangeOpts index.RangeOpts) (b return true, nil } +// Having checks if any of the provided tag values might exist in the bloom filter. +// It returns true if at least one value might be contained in any tag family filter. +func (tfs *tagFamilyFilters) Having(tagName string, tagValues []string) bool { + for _, tff := range tfs.tagFamilyFilters { + if tf, ok := (*tff)[tagName]; ok { + if tf.filter != nil { + for _, tagValue := range tagValues { + if tf.filter.MightContain([]byte(tagValue)) { + return true // Return true as soon as we find a potential match + } + } + // None of the values might exist in this tag family filter + return false + } + // If no bloom filter, conservatively return true + return true + } + } + // If tag is not found in any tag family filter, return true (conservative) + return true +} + func generateTagFamilyFilters() *tagFamilyFilters { v := tagFamilyFiltersPool.Get() if v == nil { diff --git a/banyand/stream/tag_filter_test.go b/banyand/stream/tag_filter_test.go index b381ed78..fbc57642 100644 --- a/banyand/stream/tag_filter_test.go +++ b/banyand/stream/tag_filter_test.go @@ -122,6 +122,317 @@ func generateMetaAndFilter(tagCount int, itemsPerTag int) ([]byte, []byte) { return metaBuf, filterBuf.Bytes() } +func TestTagFamilyFiltersHaving(t *testing.T) { + assert := assert.New(t) + + // Create a tag family filter with test data + bf := filter.NewBloomFilter(100) + bf.Add([]byte("service-1")) + bf.Add([]byte("service-2")) + bf.Add([]byte("service-3")) + + // Create tag filter + tf := &tagFilter{ + filter: bf, + min: []byte{}, + max: []byte{}, + } + + // Create tag family filter map + tff := &tagFamilyFilter{ + "service": tf, + } + + // Create tag family filters + tfs := &tagFamilyFilters{ + tagFamilyFilters: []*tagFamilyFilter{tff}, + } + + tests := []struct { + name string + tagName string + description string + tagValues []string + expectedResult bool + }{ + { + name: "all values might exist", + tagName: "service", + tagValues: []string{"service-1", "service-2"}, + expectedResult: true, + description: "should return true when all values might exist in bloom filter", + }, + { + name: "some values might exist", + tagName: "service", + tagValues: []string{"service-1", "unknown-service"}, + expectedResult: true, + description: "should return true when at least one value might exist in bloom filter", + }, + { + name: "no values might exist", + tagName: "service", + tagValues: []string{"unknown-1", "unknown-2"}, + expectedResult: false, + description: "should return false when no values might exist in bloom filter", + }, + { + name: "single value exists", + tagName: "service", + tagValues: []string{"service-3"}, + expectedResult: true, + description: "should return true when single value might exist in bloom filter", + }, + { + name: "single value does not exist", + tagName: "service", + tagValues: []string{"definitely-not-there"}, + expectedResult: false, + description: "should return false when single value doesn't exist in bloom filter", + }, + { + name: "empty values list", + tagName: "service", + tagValues: []string{}, + expectedResult: false, + description: "should return false when no values provided", + }, + { + name: "tag not found", + tagName: "non-existent-tag", + tagValues: []string{"service-1"}, + expectedResult: true, + description: "should return true (conservative) when tag is not found", + }, + { + name: "early exit - first match", + tagName: "service", + tagValues: []string{"service-1", "unknown-1", "unknown-2"}, + expectedResult: true, + description: "should return true immediately when first value matches", + }, + { + name: "late match", + tagName: "service", + tagValues: []string{"unknown-1", "unknown-2", "service-2"}, + expectedResult: true, + description: "should return true when last value matches", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tfs.Having(tt.tagName, tt.tagValues) + assert.Equal(t, tt.expectedResult, result, tt.description) + }) + } +} + +func TestTagFamilyFiltersHavingWithoutBloomFilter(t *testing.T) { + assert := assert.New(t) + + // Create tag filter without bloom filter + tf := &tagFilter{ + filter: nil, // No bloom filter + min: []byte{}, + max: []byte{}, + } + + // Create tag family filter map + tff := &tagFamilyFilter{ + "service": tf, + } + + // Create tag family filters + tfs := &tagFamilyFilters{ + tagFamilyFilters: []*tagFamilyFilter{tff}, + } + + // When no bloom filter is available, should always return true (conservative) + result := tfs.Having("service", []string{"any-service", "another-service"}) + assert.True(result, "should return true when no bloom filter is available (conservative approach)") + + // Test with empty list too + result = tfs.Having("service", []string{}) + assert.True(result, "should return true even with empty list when no bloom filter (conservative approach)") +} + +func TestTagFamilyFiltersHavingMultipleTagFamilies(t *testing.T) { + assert := assert.New(t) + + // Create first tag family filter + bf1 := filter.NewBloomFilter(50) + bf1.Add([]byte("service-1")) + bf1.Add([]byte("service-2")) + + tf1 := &tagFilter{ + filter: bf1, + min: []byte{}, + max: []byte{}, + } + + tff1 := &tagFamilyFilter{ + "service": tf1, + } + + // Create second tag family filter + bf2 := filter.NewBloomFilter(50) + bf2.Add([]byte("user-1")) + bf2.Add([]byte("user-2")) + + tf2 := &tagFilter{ + filter: bf2, + min: []byte{}, + max: []byte{}, + } + + tff2 := &tagFamilyFilter{ + "user": tf2, + } + + // Create tag family filters with multiple families + tfs := &tagFamilyFilters{ + tagFamilyFilters: []*tagFamilyFilter{tff1, tff2}, + } + + // Test service tag (should find in first family) + result := tfs.Having("service", []string{"service-1", "unknown"}) + assert.True(result, "should find service-1 in first tag family") + + // Test user tag (should find in second family) + result = tfs.Having("user", []string{"user-2", "unknown"}) + assert.True(result, "should find user-2 in second tag family") + + // Test non-existent values + result = tfs.Having("service", []string{"unknown-1", "unknown-2"}) + assert.False(result, "should not find unknown values") +} + +func TestTagFamilyFiltersHavingLargeList(t *testing.T) { + assert := assert.New(t) + + // Create a bloom filter with one target value + bf := filter.NewBloomFilter(1000) + bf.Add([]byte("target-service")) + + tf := &tagFilter{ + filter: bf, + min: []byte{}, + max: []byte{}, + } + + tff := &tagFamilyFilter{ + "service": tf, + } + + tfs := &tagFamilyFilters{ + tagFamilyFilters: []*tagFamilyFilter{tff}, + } + + // Create a large list with the target at the end + largeList := make([]string, 1000) + for i := 0; i < 999; i++ { + largeList[i] = fmt.Sprintf("non-existent-service-%d", i) + } + largeList[999] = "target-service" + + result := tfs.Having("service", largeList) + assert.True(result, "should handle large lists and find target value") + + // Test with large list that has no matches + noMatchList := make([]string, 1000) + for i := 0; i < 1000; i++ { + noMatchList[i] = fmt.Sprintf("definitely-not-there-%d", i) + } + + result = tfs.Having("service", noMatchList) + assert.False(result, "should return false for large list with no matches") +} + +func BenchmarkTagFamilyFiltersHaving(b *testing.B) { + // Setup bloom filter with test data + bf := filter.NewBloomFilter(1000) + for i := 0; i < 500; i++ { + bf.Add([]byte(fmt.Sprintf("service-%d", i))) + } + + tf := &tagFilter{ + filter: bf, + min: []byte{}, + max: []byte{}, + } + + tff := &tagFamilyFilter{ + "service": tf, + } + + tfs := &tagFamilyFilters{ + tagFamilyFilters: []*tagFamilyFilter{tff}, + } + + // Test different sizes of input lists + b.Run("small list (5 items)", func(b *testing.B) { + testValues := []string{"service-1", "service-2", "service-3", "unknown-1", "unknown-2"} + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfs.Having("service", testValues) + } + }) + + b.Run("medium list (50 items)", func(b *testing.B) { + testValues := make([]string, 50) + for i := 0; i < 50; i++ { + if i < 25 { + testValues[i] = fmt.Sprintf("service-%d", i) + } else { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfs.Having("service", testValues) + } + }) + + b.Run("large list (500 items)", func(b *testing.B) { + testValues := make([]string, 500) + for i := 0; i < 500; i++ { + if i < 250 { + testValues[i] = fmt.Sprintf("service-%d", i) + } else { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfs.Having("service", testValues) + } + }) + + b.Run("early exit (match first)", func(b *testing.B) { + testValues := make([]string, 100) + testValues[0] = "service-1" // This will match + for i := 1; i < 100; i++ { + testValues[i] = fmt.Sprintf("unknown-%d", i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfs.Having("service", testValues) + } + }) + + b.Run("no matches", func(b *testing.B) { + testValues := make([]string, 100) + for i := 0; i < 100; i++ { + testValues[i] = fmt.Sprintf("definitely-not-there-%d", i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + tfs.Having("service", testValues) + } + }) +} + func BenchmarkTagFamilyFiltersUnmarshal(b *testing.B) { testCases := []struct { tagFamilyCount int diff --git a/pkg/index/index.go b/pkg/index/index.go index ad8673c7..6e77f821 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -941,4 +941,5 @@ type Filter interface { type FilterOp interface { Eq(tagName string, tagValue string) bool Range(tagName string, rangeOpts RangeOpts) (bool, error) + Having(tagName string, tagValues []string) bool } diff --git a/pkg/query/logical/trace/index_filter.go b/pkg/query/logical/trace/index_filter.go index 6faa950d..d99a0253 100644 --- a/pkg/query/logical/trace/index_filter.go +++ b/pkg/query/logical/trace/index_filter.go @@ -71,7 +71,7 @@ func parseConditionToFilter(cond *modelv1.Condition, schema logical.Schema, case modelv1.Condition_BINARY_OP_MATCH: return &traceMatchFilter{op: "match", tagName: cond.Name}, [][]*modelv1.TagValue{entity}, nil case modelv1.Condition_BINARY_OP_HAVING: - return &traceFilter{op: "having", tagName: cond.Name}, [][]*modelv1.TagValue{entity}, nil + return &traceHavingFilter{op: "having", tagName: cond.Name, expr: expr}, [][]*modelv1.TagValue{entity}, nil case modelv1.Condition_BINARY_OP_NOT_HAVING: return &traceFilter{op: "not_having", tagName: cond.Name}, [][]*modelv1.TagValue{entity}, nil case modelv1.Condition_BINARY_OP_IN: @@ -250,6 +250,34 @@ func (tmf *traceMatchFilter) String() string { return tmf.op + ":" + tmf.tagName } +// traceHavingFilter implements index.Filter for HAVING operations in trace queries. +type traceHavingFilter struct { + expr logical.LiteralExpr + op string + tagName string +} + +func (thf *traceHavingFilter) Execute(_ index.GetSearcher, _ common.SeriesID, _ *index.RangeOpts) (posting.List, posting.List, error) { + panic("traceHavingFilter.Execute should not be invoked") +} + +func (thf *traceHavingFilter) ShouldSkip(tagFilters index.FilterOp) (bool, error) { + // Use the parsed expression to get the tag values and invoke tagFilters.Having + if thf.expr != nil { + subExprs := thf.expr.SubExprs() + tagValues := make([]string, len(subExprs)) + for i, subExpr := range subExprs { + tagValues[i] = subExpr.String() + } + return !tagFilters.Having(thf.tagName, tagValues), nil + } + return false, nil +} + +func (thf *traceHavingFilter) String() string { + return thf.op + ":" + thf.tagName +} + // extractTraceIDsFromCondition extracts trace IDs from equal and in conditions. func extractTraceIDsFromCondition(cond *modelv1.Condition) []string { var traceIDs []string diff --git a/pkg/query/logical/trace/index_filter_test.go b/pkg/query/logical/trace/index_filter_test.go new file mode 100644 index 00000000..b8f3a186 --- /dev/null +++ b/pkg/query/logical/trace/index_filter_test.go @@ -0,0 +1,281 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package trace + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/apache/skywalking-banyandb/pkg/index" + "github.com/apache/skywalking-banyandb/pkg/query/logical" +) + +type MockFilterOp struct { + eqResults map[string]map[string]bool + havingResults map[string]map[string]bool +} + +func NewMockFilterOp() *MockFilterOp { + return &MockFilterOp{ + eqResults: make(map[string]map[string]bool), + havingResults: make(map[string]map[string]bool), + } +} + +func (m *MockFilterOp) Eq(tagName string, tagValue string) bool { + if tagResults, ok := m.eqResults[tagName]; ok { + if result, ok := tagResults[tagValue]; ok { + return result + } + } + return false +} + +func (m *MockFilterOp) Range(_ string, _ index.RangeOpts) (bool, error) { + return false, nil +} + +func (m *MockFilterOp) Having(tagName string, tagValues []string) bool { + if tagResults, ok := m.havingResults[tagName]; ok { + key := "" + for i, val := range tagValues { + if i > 0 { + key += "," + } + key += val + } + if result, ok := tagResults[key]; ok { + return result + } + } + return false +} + +func (m *MockFilterOp) SetHavingResult(tagName string, tagValues []string, result bool) { + if m.havingResults[tagName] == nil { + m.havingResults[tagName] = make(map[string]bool) + } + key := "" + for i, val := range tagValues { + if i > 0 { + key += "," + } + key += val + } + m.havingResults[tagName][key] = result +} + +type MockLiteralExpr struct { + stringValue string + subExprs []logical.LiteralExpr + elements []string +} + +func NewMockLiteralExpr(stringValue string) *MockLiteralExpr { + return &MockLiteralExpr{ + stringValue: stringValue, + elements: []string{stringValue}, + } +} + +func NewMockLiteralExprWithSubExprs(subExprs []logical.LiteralExpr) *MockLiteralExpr { + elements := make([]string, len(subExprs)) + for i, expr := range subExprs { + elements[i] = expr.String() + } + return &MockLiteralExpr{ + subExprs: subExprs, + elements: elements, + } +} + +func (m *MockLiteralExpr) String() string { + return m.stringValue +} + +func (m *MockLiteralExpr) Elements() []string { + return m.elements +} + +func (m *MockLiteralExpr) Equal(other logical.Expr) bool { + if o, ok := other.(*MockLiteralExpr); ok { + return m.stringValue == o.stringValue + } + return false +} + +func (m *MockLiteralExpr) SubExprs() []logical.LiteralExpr { + return m.subExprs +} + +func (m *MockLiteralExpr) Bytes() [][]byte { + result := make([][]byte, len(m.elements)) + for i, elem := range m.elements { + result[i] = []byte(elem) + } + return result +} + +func (m *MockLiteralExpr) Field(_ index.FieldKey) index.Field { + return index.Field{} +} + +func (m *MockLiteralExpr) RangeOpts(_ bool, _ bool, _ bool) index.RangeOpts { + return index.RangeOpts{} +} + +func TestTraceHavingFilterShouldSkip(t *testing.T) { + tests := []struct { + name string + tagName string + description string + subExprStrings []string + havingResult bool + expectedSkip bool + }{ + { + name: "having matches - should not skip", + tagName: "service", + subExprStrings: []string{"service-1", "service-2"}, + havingResult: true, + expectedSkip: false, + description: "should not skip when Having returns true", + }, + { + name: "having doesn't match - should skip", + tagName: "service", + subExprStrings: []string{"service-1", "service-2"}, + havingResult: false, + expectedSkip: true, + description: "should skip when Having returns false", + }, + { + name: "single value matches - should not skip", + tagName: "endpoint", + subExprStrings: []string{"/api/v1/users"}, + havingResult: true, + expectedSkip: false, + description: "should not skip when single value Having returns true", + }, + { + name: "multiple values no match - should skip", + tagName: "endpoint", + subExprStrings: []string{"/api/v1/unknown", "/api/v2/unknown"}, + havingResult: false, + expectedSkip: true, + description: "should skip when multiple values Having returns false", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create mock sub-expressions + subExprs := make([]logical.LiteralExpr, len(tt.subExprStrings)) + for i, str := range tt.subExprStrings { + subExprs[i] = NewMockLiteralExpr(str) + } + + // Create mock expression + mockExpr := NewMockLiteralExprWithSubExprs(subExprs) + + // Create trace having filter + filter := &traceHavingFilter{ + expr: mockExpr, + op: "having", + tagName: tt.tagName, + } + + // Create mock FilterOp + mockFilterOp := NewMockFilterOp() + mockFilterOp.SetHavingResult(tt.tagName, tt.subExprStrings, tt.havingResult) + + // Test ShouldSkip + result, err := filter.ShouldSkip(mockFilterOp) + + assert.NoError(t, err, tt.description) + assert.Equal(t, tt.expectedSkip, result, tt.description) + }) + } +} + +func TestTraceHavingFilterShouldSkipNilExpr(t *testing.T) { + filter := &traceHavingFilter{ + expr: nil, + op: "having", + tagName: "service", + } + + mockFilterOp := NewMockFilterOp() + + result, err := filter.ShouldSkip(mockFilterOp) + + assert.NoError(t, err) + assert.False(t, result, "should not skip when expr is nil") +} + +func TestTraceHavingFilterExecutePanics(t *testing.T) { + filter := &traceHavingFilter{ + expr: nil, + op: "having", + tagName: "service", + } + + assert.Panics(t, func() { + filter.Execute(nil, 0, nil) + }, "Execute should panic") +} + +func TestTraceHavingFilterString(t *testing.T) { + filter := &traceHavingFilter{ + expr: nil, + op: "having", + tagName: "service", + } + + result := filter.String() + assert.Equal(t, "having:service", result) +} + +func TestTraceHavingFilterIntegration(t *testing.T) { + // Integration test to ensure the filter works with realistic data + + // Create sub-expressions + subExpr1 := NewMockLiteralExpr("order-service") + subExpr2 := NewMockLiteralExpr("user-service") + + // Create main expression + mockExpr := NewMockLiteralExprWithSubExprs([]logical.LiteralExpr{subExpr1, subExpr2}) + + // Create filter + filter := &traceHavingFilter{ + expr: mockExpr, + op: "having", + tagName: "service_name", + } + + // Create mock FilterOp that simulates bloom filter behavior + mockFilterOp := NewMockFilterOp() + mockFilterOp.SetHavingResult("service_name", []string{"order-service", "user-service"}, true) + + // Test + shouldSkip, err := filter.ShouldSkip(mockFilterOp) + + assert.NoError(t, err) + assert.False(t, shouldSkip, "should not skip when services are found") +}
