(skywalking-banyandb) 01/02: Add Having method to tagFilterOp and tagFamilyFilters for bloom filter checks

hanahmily Fri, 17 Oct 2025 21:08:03 -0700

This is an automated email from the ASF dual-hosted git repository.

hanahmily pushed a commit to branch feat/bloom-having
in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git


commit d7f0d561479867bb9b1c351735ec1a32619f2ce9
Author: Gao Hongtao <[email protected]>
AuthorDate: Wed Sep 24 11:26:08 2025 +0800

    Add Having method to tagFilterOp and tagFamilyFilters for bloom filter 
checks
    
    - Implemented the Having method in tagFilterOp to check if any provided tag 
values might exist in the bloom filter, returning true if at least one value is 
found.
    - Added corresponding Having method in tagFamilyFilters to handle multiple 
tag family filters.
    - Included comprehensive unit tests for both methods to ensure correct 
functionality across various scenarios, including edge cases with empty lists 
and large input sizes.
    - Updated the index.Filter interface to include the Having method for 
consistency in filter operations.
    - Enhanced trace query handling with a new traceHavingFilter to support 
HAVING operations in trace queries.
---
 banyand/internal/sidx/tag_filter_op.go       |  35 +++
 banyand/internal/sidx/tag_filter_op_test.go  | 326 +++++++++++++++++++++++++++
 banyand/stream/tag_filter.go                 |  22 ++
 banyand/stream/tag_filter_test.go            | 311 +++++++++++++++++++++++++
 pkg/index/index.go                           |   1 +
 pkg/query/logical/trace/index_filter.go      |  30 ++-
 pkg/query/logical/trace/index_filter_test.go | 281 +++++++++++++++++++++++
 7 files changed, 1005 insertions(+), 1 deletion(-)

diff --git a/banyand/internal/sidx/tag_filter_op.go 
b/banyand/internal/sidx/tag_filter_op.go
index 8ed186cc..620f8aff 100644
--- a/banyand/internal/sidx/tag_filter_op.go
+++ b/banyand/internal/sidx/tag_filter_op.go
@@ -73,6 +73,41 @@ func (tfo *tagFilterOp) Eq(tagName string, tagValue string) 
bool {
        return true
 }
 
+// Having checks if any of the provided tag values might exist in the bloom 
filter.
+// It returns true if at least one value might be contained in the bloom 
filter.
+func (tfo *tagFilterOp) Having(tagName string, tagValues []string) bool {
+       if tfo.blockMetadata == nil || tfo.part == nil {
+               return false
+       }
+
+       // Check if the tag exists in the block
+       tagBlock, exists := tfo.blockMetadata.tagsBlocks[tagName]
+       if !exists {
+               return false
+       }
+
+       // Get or create cached tag filter data
+       cache, err := tfo.getTagFilterCache(tagName, tagBlock)
+       if err != nil {
+               logger.Errorf("failed to get tag filter cache for %s: %v", 
tagName, err)
+               return true // Conservative approach - don't filter out
+       }
+
+       // Use bloom filter to check if any value might exist
+       if cache.bloomFilter != nil {
+               for _, tagValue := range tagValues {
+                       if cache.bloomFilter.MightContain([]byte(tagValue)) {
+                               return true // Return true as soon as we find a 
potential match
+                       }
+               }
+               // None of the values might exist in the bloom filter
+               return false
+       }
+
+       // If no bloom filter, conservatively return true
+       return true
+}
+
 // Range checks if a tag is within a specific range using min/max metadata.
 func (tfo *tagFilterOp) Range(tagName string, rangeOpts index.RangeOpts) 
(bool, error) {
        if tfo.blockMetadata == nil || tfo.part == nil {
diff --git a/banyand/internal/sidx/tag_filter_op_test.go 
b/banyand/internal/sidx/tag_filter_op_test.go
index 1f5eff87..611c900b 100644
--- a/banyand/internal/sidx/tag_filter_op_test.go
+++ b/banyand/internal/sidx/tag_filter_op_test.go
@@ -19,6 +19,7 @@ package sidx
 
 import (
        "bytes"
+       "fmt"
        "testing"
 
        "github.com/stretchr/testify/assert"
@@ -491,6 +492,331 @@ func BenchmarkDecodeBloomFilterFromBytes(b *testing.B) {
        }
 }
 
+func BenchmarkTagFilterOpHaving(b *testing.B) {
+       // Setup bloom filter with test data
+       bf := filter.NewBloomFilter(1000)
+       for i := 0; i < 500; i++ {
+               bf.Add([]byte(fmt.Sprintf("service-%d", i)))
+       }
+
+       cache := &tagFilterCache{
+               bloomFilter: bf,
+               valueType:   pbv1.ValueTypeStr,
+       }
+
+       tfo := &tagFilterOp{
+               blockMetadata: &blockMetadata{
+                       tagsBlocks: map[string]dataBlock{
+                               "service": {offset: 0, size: 100},
+                       },
+               },
+               part: &part{},
+               tagCache: map[string]*tagFilterCache{
+                       "service": cache,
+               },
+       }
+
+       // Test different sizes of input lists
+       b.Run("small list (5 items)", func(b *testing.B) {
+               testValues := []string{"service-1", "service-2", "service-3", 
"unknown-1", "unknown-2"}
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfo.Having("service", testValues)
+               }
+       })
+
+       b.Run("medium list (50 items)", func(b *testing.B) {
+               testValues := make([]string, 50)
+               for i := 0; i < 50; i++ {
+                       if i < 25 {
+                               testValues[i] = fmt.Sprintf("service-%d", i)
+                       } else {
+                               testValues[i] = fmt.Sprintf("unknown-%d", i)
+                       }
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfo.Having("service", testValues)
+               }
+       })
+
+       b.Run("large list (500 items)", func(b *testing.B) {
+               testValues := make([]string, 500)
+               for i := 0; i < 500; i++ {
+                       if i < 250 {
+                               testValues[i] = fmt.Sprintf("service-%d", i)
+                       } else {
+                               testValues[i] = fmt.Sprintf("unknown-%d", i)
+                       }
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfo.Having("service", testValues)
+               }
+       })
+
+       b.Run("early exit (match first)", func(b *testing.B) {
+               testValues := make([]string, 100)
+               testValues[0] = "service-1" // This will match
+               for i := 1; i < 100; i++ {
+                       testValues[i] = fmt.Sprintf("unknown-%d", i)
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfo.Having("service", testValues)
+               }
+       })
+
+       b.Run("no matches", func(b *testing.B) {
+               testValues := make([]string, 100)
+               for i := 0; i < 100; i++ {
+                       testValues[i] = fmt.Sprintf("definitely-not-there-%d", 
i)
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfo.Having("service", testValues)
+               }
+       })
+}
+
+func TestTagFilterOpHaving(t *testing.T) {
+       tests := []struct {
+               blockMetadata  *blockMetadata
+               part           *part
+               name           string
+               tagName        string
+               description    string
+               tagValues      []string
+               expectedResult bool
+       }{
+               {
+                       name:           "nil block metadata",
+                       blockMetadata:  nil,
+                       part:           &part{},
+                       tagName:        "service",
+                       tagValues:      []string{"order-service", 
"user-service"},
+                       expectedResult: false,
+                       description:    "should return false when blockMetadata 
is nil",
+               },
+               {
+                       name:           "nil part",
+                       blockMetadata:  &blockMetadata{tagsBlocks: 
make(map[string]dataBlock)},
+                       part:           nil,
+                       tagName:        "service",
+                       tagValues:      []string{"order-service", 
"user-service"},
+                       expectedResult: false,
+                       description:    "should return false when part is nil",
+               },
+               {
+                       name: "tag not in block",
+                       blockMetadata: &blockMetadata{
+                               tagsBlocks: map[string]dataBlock{
+                                       "other_tag": {offset: 0, size: 100},
+                               },
+                       },
+                       part:           &part{},
+                       tagName:        "service",
+                       tagValues:      []string{"order-service", 
"user-service"},
+                       expectedResult: false,
+                       description:    "should return false when tag doesn't 
exist in block",
+               },
+               {
+                       name: "tag exists but no cache data",
+                       blockMetadata: &blockMetadata{
+                               tagsBlocks: map[string]dataBlock{
+                                       "service": {offset: 0, size: 100},
+                               },
+                       },
+                       part:           &part{},
+                       tagName:        "service",
+                       tagValues:      []string{"order-service", 
"user-service"},
+                       expectedResult: true,
+                       description:    "should return true (conservative) when 
tag exists but cache fails",
+               },
+               {
+                       name:           "empty tag values list",
+                       blockMetadata:  &blockMetadata{tagsBlocks: 
make(map[string]dataBlock)},
+                       part:           &part{},
+                       tagName:        "service",
+                       tagValues:      []string{},
+                       expectedResult: false,
+                       description:    "should return false when no tag values 
provided",
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       tfo := &tagFilterOp{
+                               blockMetadata: tt.blockMetadata,
+                               part:          tt.part,
+                               tagCache:      make(map[string]*tagFilterCache),
+                       }
+
+                       result := tfo.Having(tt.tagName, tt.tagValues)
+                       assert.Equal(t, tt.expectedResult, result, 
tt.description)
+               })
+       }
+}
+
+func TestTagFilterOpHavingWithBloomFilter(t *testing.T) {
+       // Create a bloom filter and add some test values
+       bf := filter.NewBloomFilter(100)
+       bf.Add([]byte("order-service"))
+       bf.Add([]byte("payment-service"))
+       bf.Add([]byte("inventory-service"))
+
+       // Create cache with bloom filter
+       cache := &tagFilterCache{
+               bloomFilter: bf,
+               valueType:   pbv1.ValueTypeStr,
+       }
+
+       tfo := &tagFilterOp{
+               blockMetadata: &blockMetadata{
+                       tagsBlocks: map[string]dataBlock{
+                               "service": {offset: 0, size: 100},
+                       },
+               },
+               part: &part{},
+               tagCache: map[string]*tagFilterCache{
+                       "service": cache,
+               },
+       }
+
+       tests := []struct {
+               name           string
+               description    string
+               tagValues      []string
+               expectedResult bool
+       }{
+               {
+                       name:           "all values might exist",
+                       tagValues:      []string{"order-service", 
"payment-service"},
+                       expectedResult: true,
+                       description:    "should return true when all values 
might exist in bloom filter",
+               },
+               {
+                       name:           "some values might exist",
+                       tagValues:      []string{"order-service", 
"non-existent-service"},
+                       expectedResult: true,
+                       description:    "should return true when at least one 
value might exist in bloom filter",
+               },
+               {
+                       name:           "no values might exist",
+                       tagValues:      []string{"non-existent-service-1", 
"non-existent-service-2"},
+                       expectedResult: false,
+                       description:    "should return false when no values 
might exist in bloom filter",
+               },
+               {
+                       name:           "single value exists",
+                       tagValues:      []string{"inventory-service"},
+                       expectedResult: true,
+                       description:    "should return true when single value 
might exist in bloom filter",
+               },
+               {
+                       name:           "single value does not exist",
+                       tagValues:      []string{"definitely-not-there"},
+                       expectedResult: false,
+                       description:    "should return false when single value 
doesn't exist in bloom filter",
+               },
+               {
+                       name:           "empty values list",
+                       tagValues:      []string{},
+                       expectedResult: false,
+                       description:    "should return false when no values 
provided",
+               },
+               {
+                       name:           "mixed case with first match",
+                       tagValues:      []string{"order-service", "unknown-1", 
"unknown-2"},
+                       expectedResult: true,
+                       description:    "should return true immediately when 
first value matches",
+               },
+               {
+                       name:           "mixed case with last match",
+                       tagValues:      []string{"unknown-1", "unknown-2", 
"payment-service"},
+                       expectedResult: true,
+                       description:    "should return true when last value 
matches",
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       result := tfo.Having("service", tt.tagValues)
+                       assert.Equal(t, tt.expectedResult, result, 
tt.description)
+               })
+       }
+}
+
+func TestTagFilterOpHavingWithoutBloomFilter(t *testing.T) {
+       // Create cache without bloom filter
+       cache := &tagFilterCache{
+               bloomFilter: nil,
+               valueType:   pbv1.ValueTypeStr,
+       }
+
+       tfo := &tagFilterOp{
+               blockMetadata: &blockMetadata{
+                       tagsBlocks: map[string]dataBlock{
+                               "service": {offset: 0, size: 100},
+                       },
+               },
+               part: &part{},
+               tagCache: map[string]*tagFilterCache{
+                       "service": cache,
+               },
+       }
+
+       // When no bloom filter is available, should always return true 
(conservative)
+       result := tfo.Having("service", []string{"any-service", 
"another-service"})
+       assert.True(t, result, "should return true when no bloom filter is 
available (conservative approach)")
+
+       // Test with empty list too
+       result = tfo.Having("service", []string{})
+       assert.True(t, result, "should return true even with empty list when no 
bloom filter (conservative approach)")
+}
+
+func TestTagFilterOpHavingLargeList(t *testing.T) {
+       // Create a bloom filter with some values
+       bf := filter.NewBloomFilter(1000)
+       bf.Add([]byte("target-service"))
+
+       cache := &tagFilterCache{
+               bloomFilter: bf,
+               valueType:   pbv1.ValueTypeStr,
+       }
+
+       tfo := &tagFilterOp{
+               blockMetadata: &blockMetadata{
+                       tagsBlocks: map[string]dataBlock{
+                               "service": {offset: 0, size: 100},
+                       },
+               },
+               part: &part{},
+               tagCache: map[string]*tagFilterCache{
+                       "service": cache,
+               },
+       }
+
+       // Create a large list with the target at the end
+       largeList := make([]string, 1000)
+       for i := 0; i < 999; i++ {
+               largeList[i] = fmt.Sprintf("non-existent-service-%d", i)
+       }
+       largeList[999] = "target-service"
+
+       result := tfo.Having("service", largeList)
+       assert.True(t, result, "should handle large lists and find target 
value")
+
+       // Test with large list that has no matches
+       noMatchList := make([]string, 1000)
+       for i := 0; i < 1000; i++ {
+               noMatchList[i] = fmt.Sprintf("definitely-not-there-%d", i)
+       }
+
+       result = tfo.Having("service", noMatchList)
+       assert.False(t, result, "should return false for large list with no 
matches")
+}
+
 func TestTagFilterOpErrorHandling(t *testing.T) {
        t.Run("invalid range bounds", func(t *testing.T) {
                cache := &tagFilterCache{
diff --git a/banyand/stream/tag_filter.go b/banyand/stream/tag_filter.go
index 39fd3d8e..1f7925be 100644
--- a/banyand/stream/tag_filter.go
+++ b/banyand/stream/tag_filter.go
@@ -203,6 +203,28 @@ func (tfs *tagFamilyFilters) Range(tagName string, 
rangeOpts index.RangeOpts) (b
        return true, nil
 }
 
+// Having checks if any of the provided tag values might exist in the bloom 
filter.
+// It returns true if at least one value might be contained in any tag family 
filter.
+func (tfs *tagFamilyFilters) Having(tagName string, tagValues []string) bool {
+       for _, tff := range tfs.tagFamilyFilters {
+               if tf, ok := (*tff)[tagName]; ok {
+                       if tf.filter != nil {
+                               for _, tagValue := range tagValues {
+                                       if 
tf.filter.MightContain([]byte(tagValue)) {
+                                               return true // Return true as 
soon as we find a potential match
+                                       }
+                               }
+                               // None of the values might exist in this tag 
family filter
+                               return false
+                       }
+                       // If no bloom filter, conservatively return true
+                       return true
+               }
+       }
+       // If tag is not found in any tag family filter, return true 
(conservative)
+       return true
+}
+
 func generateTagFamilyFilters() *tagFamilyFilters {
        v := tagFamilyFiltersPool.Get()
        if v == nil {
diff --git a/banyand/stream/tag_filter_test.go 
b/banyand/stream/tag_filter_test.go
index b381ed78..fbc57642 100644
--- a/banyand/stream/tag_filter_test.go
+++ b/banyand/stream/tag_filter_test.go
@@ -122,6 +122,317 @@ func generateMetaAndFilter(tagCount int, itemsPerTag int) 
([]byte, []byte) {
        return metaBuf, filterBuf.Bytes()
 }
 
+func TestTagFamilyFiltersHaving(t *testing.T) {
+       assert := assert.New(t)
+
+       // Create a tag family filter with test data
+       bf := filter.NewBloomFilter(100)
+       bf.Add([]byte("service-1"))
+       bf.Add([]byte("service-2"))
+       bf.Add([]byte("service-3"))
+
+       // Create tag filter
+       tf := &tagFilter{
+               filter: bf,
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       // Create tag family filter map
+       tff := &tagFamilyFilter{
+               "service": tf,
+       }
+
+       // Create tag family filters
+       tfs := &tagFamilyFilters{
+               tagFamilyFilters: []*tagFamilyFilter{tff},
+       }
+
+       tests := []struct {
+               name           string
+               tagName        string
+               description    string
+               tagValues      []string
+               expectedResult bool
+       }{
+               {
+                       name:           "all values might exist",
+                       tagName:        "service",
+                       tagValues:      []string{"service-1", "service-2"},
+                       expectedResult: true,
+                       description:    "should return true when all values 
might exist in bloom filter",
+               },
+               {
+                       name:           "some values might exist",
+                       tagName:        "service",
+                       tagValues:      []string{"service-1", 
"unknown-service"},
+                       expectedResult: true,
+                       description:    "should return true when at least one 
value might exist in bloom filter",
+               },
+               {
+                       name:           "no values might exist",
+                       tagName:        "service",
+                       tagValues:      []string{"unknown-1", "unknown-2"},
+                       expectedResult: false,
+                       description:    "should return false when no values 
might exist in bloom filter",
+               },
+               {
+                       name:           "single value exists",
+                       tagName:        "service",
+                       tagValues:      []string{"service-3"},
+                       expectedResult: true,
+                       description:    "should return true when single value 
might exist in bloom filter",
+               },
+               {
+                       name:           "single value does not exist",
+                       tagName:        "service",
+                       tagValues:      []string{"definitely-not-there"},
+                       expectedResult: false,
+                       description:    "should return false when single value 
doesn't exist in bloom filter",
+               },
+               {
+                       name:           "empty values list",
+                       tagName:        "service",
+                       tagValues:      []string{},
+                       expectedResult: false,
+                       description:    "should return false when no values 
provided",
+               },
+               {
+                       name:           "tag not found",
+                       tagName:        "non-existent-tag",
+                       tagValues:      []string{"service-1"},
+                       expectedResult: true,
+                       description:    "should return true (conservative) when 
tag is not found",
+               },
+               {
+                       name:           "early exit - first match",
+                       tagName:        "service",
+                       tagValues:      []string{"service-1", "unknown-1", 
"unknown-2"},
+                       expectedResult: true,
+                       description:    "should return true immediately when 
first value matches",
+               },
+               {
+                       name:           "late match",
+                       tagName:        "service",
+                       tagValues:      []string{"unknown-1", "unknown-2", 
"service-2"},
+                       expectedResult: true,
+                       description:    "should return true when last value 
matches",
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       result := tfs.Having(tt.tagName, tt.tagValues)
+                       assert.Equal(t, tt.expectedResult, result, 
tt.description)
+               })
+       }
+}
+
+func TestTagFamilyFiltersHavingWithoutBloomFilter(t *testing.T) {
+       assert := assert.New(t)
+
+       // Create tag filter without bloom filter
+       tf := &tagFilter{
+               filter: nil, // No bloom filter
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       // Create tag family filter map
+       tff := &tagFamilyFilter{
+               "service": tf,
+       }
+
+       // Create tag family filters
+       tfs := &tagFamilyFilters{
+               tagFamilyFilters: []*tagFamilyFilter{tff},
+       }
+
+       // When no bloom filter is available, should always return true 
(conservative)
+       result := tfs.Having("service", []string{"any-service", 
"another-service"})
+       assert.True(result, "should return true when no bloom filter is 
available (conservative approach)")
+
+       // Test with empty list too
+       result = tfs.Having("service", []string{})
+       assert.True(result, "should return true even with empty list when no 
bloom filter (conservative approach)")
+}
+
+func TestTagFamilyFiltersHavingMultipleTagFamilies(t *testing.T) {
+       assert := assert.New(t)
+
+       // Create first tag family filter
+       bf1 := filter.NewBloomFilter(50)
+       bf1.Add([]byte("service-1"))
+       bf1.Add([]byte("service-2"))
+
+       tf1 := &tagFilter{
+               filter: bf1,
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       tff1 := &tagFamilyFilter{
+               "service": tf1,
+       }
+
+       // Create second tag family filter
+       bf2 := filter.NewBloomFilter(50)
+       bf2.Add([]byte("user-1"))
+       bf2.Add([]byte("user-2"))
+
+       tf2 := &tagFilter{
+               filter: bf2,
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       tff2 := &tagFamilyFilter{
+               "user": tf2,
+       }
+
+       // Create tag family filters with multiple families
+       tfs := &tagFamilyFilters{
+               tagFamilyFilters: []*tagFamilyFilter{tff1, tff2},
+       }
+
+       // Test service tag (should find in first family)
+       result := tfs.Having("service", []string{"service-1", "unknown"})
+       assert.True(result, "should find service-1 in first tag family")
+
+       // Test user tag (should find in second family)
+       result = tfs.Having("user", []string{"user-2", "unknown"})
+       assert.True(result, "should find user-2 in second tag family")
+
+       // Test non-existent values
+       result = tfs.Having("service", []string{"unknown-1", "unknown-2"})
+       assert.False(result, "should not find unknown values")
+}
+
+func TestTagFamilyFiltersHavingLargeList(t *testing.T) {
+       assert := assert.New(t)
+
+       // Create a bloom filter with one target value
+       bf := filter.NewBloomFilter(1000)
+       bf.Add([]byte("target-service"))
+
+       tf := &tagFilter{
+               filter: bf,
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       tff := &tagFamilyFilter{
+               "service": tf,
+       }
+
+       tfs := &tagFamilyFilters{
+               tagFamilyFilters: []*tagFamilyFilter{tff},
+       }
+
+       // Create a large list with the target at the end
+       largeList := make([]string, 1000)
+       for i := 0; i < 999; i++ {
+               largeList[i] = fmt.Sprintf("non-existent-service-%d", i)
+       }
+       largeList[999] = "target-service"
+
+       result := tfs.Having("service", largeList)
+       assert.True(result, "should handle large lists and find target value")
+
+       // Test with large list that has no matches
+       noMatchList := make([]string, 1000)
+       for i := 0; i < 1000; i++ {
+               noMatchList[i] = fmt.Sprintf("definitely-not-there-%d", i)
+       }
+
+       result = tfs.Having("service", noMatchList)
+       assert.False(result, "should return false for large list with no 
matches")
+}
+
+func BenchmarkTagFamilyFiltersHaving(b *testing.B) {
+       // Setup bloom filter with test data
+       bf := filter.NewBloomFilter(1000)
+       for i := 0; i < 500; i++ {
+               bf.Add([]byte(fmt.Sprintf("service-%d", i)))
+       }
+
+       tf := &tagFilter{
+               filter: bf,
+               min:    []byte{},
+               max:    []byte{},
+       }
+
+       tff := &tagFamilyFilter{
+               "service": tf,
+       }
+
+       tfs := &tagFamilyFilters{
+               tagFamilyFilters: []*tagFamilyFilter{tff},
+       }
+
+       // Test different sizes of input lists
+       b.Run("small list (5 items)", func(b *testing.B) {
+               testValues := []string{"service-1", "service-2", "service-3", 
"unknown-1", "unknown-2"}
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfs.Having("service", testValues)
+               }
+       })
+
+       b.Run("medium list (50 items)", func(b *testing.B) {
+               testValues := make([]string, 50)
+               for i := 0; i < 50; i++ {
+                       if i < 25 {
+                               testValues[i] = fmt.Sprintf("service-%d", i)
+                       } else {
+                               testValues[i] = fmt.Sprintf("unknown-%d", i)
+                       }
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfs.Having("service", testValues)
+               }
+       })
+
+       b.Run("large list (500 items)", func(b *testing.B) {
+               testValues := make([]string, 500)
+               for i := 0; i < 500; i++ {
+                       if i < 250 {
+                               testValues[i] = fmt.Sprintf("service-%d", i)
+                       } else {
+                               testValues[i] = fmt.Sprintf("unknown-%d", i)
+                       }
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfs.Having("service", testValues)
+               }
+       })
+
+       b.Run("early exit (match first)", func(b *testing.B) {
+               testValues := make([]string, 100)
+               testValues[0] = "service-1" // This will match
+               for i := 1; i < 100; i++ {
+                       testValues[i] = fmt.Sprintf("unknown-%d", i)
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfs.Having("service", testValues)
+               }
+       })
+
+       b.Run("no matches", func(b *testing.B) {
+               testValues := make([]string, 100)
+               for i := 0; i < 100; i++ {
+                       testValues[i] = fmt.Sprintf("definitely-not-there-%d", 
i)
+               }
+               b.ResetTimer()
+               for i := 0; i < b.N; i++ {
+                       tfs.Having("service", testValues)
+               }
+       })
+}
+
 func BenchmarkTagFamilyFiltersUnmarshal(b *testing.B) {
        testCases := []struct {
                tagFamilyCount int
diff --git a/pkg/index/index.go b/pkg/index/index.go
index ad8673c7..6e77f821 100644
--- a/pkg/index/index.go
+++ b/pkg/index/index.go
@@ -941,4 +941,5 @@ type Filter interface {
 type FilterOp interface {
        Eq(tagName string, tagValue string) bool
        Range(tagName string, rangeOpts RangeOpts) (bool, error)
+       Having(tagName string, tagValues []string) bool
 }
diff --git a/pkg/query/logical/trace/index_filter.go 
b/pkg/query/logical/trace/index_filter.go
index 6faa950d..d99a0253 100644
--- a/pkg/query/logical/trace/index_filter.go
+++ b/pkg/query/logical/trace/index_filter.go
@@ -71,7 +71,7 @@ func parseConditionToFilter(cond *modelv1.Condition, schema 
logical.Schema,
        case modelv1.Condition_BINARY_OP_MATCH:
                return &traceMatchFilter{op: "match", tagName: cond.Name}, 
[][]*modelv1.TagValue{entity}, nil
        case modelv1.Condition_BINARY_OP_HAVING:
-               return &traceFilter{op: "having", tagName: cond.Name}, 
[][]*modelv1.TagValue{entity}, nil
+               return &traceHavingFilter{op: "having", tagName: cond.Name, 
expr: expr}, [][]*modelv1.TagValue{entity}, nil
        case modelv1.Condition_BINARY_OP_NOT_HAVING:
                return &traceFilter{op: "not_having", tagName: cond.Name}, 
[][]*modelv1.TagValue{entity}, nil
        case modelv1.Condition_BINARY_OP_IN:
@@ -250,6 +250,34 @@ func (tmf *traceMatchFilter) String() string {
        return tmf.op + ":" + tmf.tagName
 }
 
+// traceHavingFilter implements index.Filter for HAVING operations in trace 
queries.
+type traceHavingFilter struct {
+       expr    logical.LiteralExpr
+       op      string
+       tagName string
+}
+
+func (thf *traceHavingFilter) Execute(_ index.GetSearcher, _ common.SeriesID, 
_ *index.RangeOpts) (posting.List, posting.List, error) {
+       panic("traceHavingFilter.Execute should not be invoked")
+}
+
+func (thf *traceHavingFilter) ShouldSkip(tagFilters index.FilterOp) (bool, 
error) {
+       // Use the parsed expression to get the tag values and invoke 
tagFilters.Having
+       if thf.expr != nil {
+               subExprs := thf.expr.SubExprs()
+               tagValues := make([]string, len(subExprs))
+               for i, subExpr := range subExprs {
+                       tagValues[i] = subExpr.String()
+               }
+               return !tagFilters.Having(thf.tagName, tagValues), nil
+       }
+       return false, nil
+}
+
+func (thf *traceHavingFilter) String() string {
+       return thf.op + ":" + thf.tagName
+}
+
 // extractTraceIDsFromCondition extracts trace IDs from equal and in 
conditions.
 func extractTraceIDsFromCondition(cond *modelv1.Condition) []string {
        var traceIDs []string
diff --git a/pkg/query/logical/trace/index_filter_test.go 
b/pkg/query/logical/trace/index_filter_test.go
new file mode 100644
index 00000000..b8f3a186
--- /dev/null
+++ b/pkg/query/logical/trace/index_filter_test.go
@@ -0,0 +1,281 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package trace
+
+import (
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+
+       "github.com/apache/skywalking-banyandb/pkg/index"
+       "github.com/apache/skywalking-banyandb/pkg/query/logical"
+)
+
+type MockFilterOp struct {
+       eqResults     map[string]map[string]bool
+       havingResults map[string]map[string]bool
+}
+
+func NewMockFilterOp() *MockFilterOp {
+       return &MockFilterOp{
+               eqResults:     make(map[string]map[string]bool),
+               havingResults: make(map[string]map[string]bool),
+       }
+}
+
+func (m *MockFilterOp) Eq(tagName string, tagValue string) bool {
+       if tagResults, ok := m.eqResults[tagName]; ok {
+               if result, ok := tagResults[tagValue]; ok {
+                       return result
+               }
+       }
+       return false
+}
+
+func (m *MockFilterOp) Range(_ string, _ index.RangeOpts) (bool, error) {
+       return false, nil
+}
+
+func (m *MockFilterOp) Having(tagName string, tagValues []string) bool {
+       if tagResults, ok := m.havingResults[tagName]; ok {
+               key := ""
+               for i, val := range tagValues {
+                       if i > 0 {
+                               key += ","
+                       }
+                       key += val
+               }
+               if result, ok := tagResults[key]; ok {
+                       return result
+               }
+       }
+       return false
+}
+
+func (m *MockFilterOp) SetHavingResult(tagName string, tagValues []string, 
result bool) {
+       if m.havingResults[tagName] == nil {
+               m.havingResults[tagName] = make(map[string]bool)
+       }
+       key := ""
+       for i, val := range tagValues {
+               if i > 0 {
+                       key += ","
+               }
+               key += val
+       }
+       m.havingResults[tagName][key] = result
+}
+
+type MockLiteralExpr struct {
+       stringValue string
+       subExprs    []logical.LiteralExpr
+       elements    []string
+}
+
+func NewMockLiteralExpr(stringValue string) *MockLiteralExpr {
+       return &MockLiteralExpr{
+               stringValue: stringValue,
+               elements:    []string{stringValue},
+       }
+}
+
+func NewMockLiteralExprWithSubExprs(subExprs []logical.LiteralExpr) 
*MockLiteralExpr {
+       elements := make([]string, len(subExprs))
+       for i, expr := range subExprs {
+               elements[i] = expr.String()
+       }
+       return &MockLiteralExpr{
+               subExprs: subExprs,
+               elements: elements,
+       }
+}
+
+func (m *MockLiteralExpr) String() string {
+       return m.stringValue
+}
+
+func (m *MockLiteralExpr) Elements() []string {
+       return m.elements
+}
+
+func (m *MockLiteralExpr) Equal(other logical.Expr) bool {
+       if o, ok := other.(*MockLiteralExpr); ok {
+               return m.stringValue == o.stringValue
+       }
+       return false
+}
+
+func (m *MockLiteralExpr) SubExprs() []logical.LiteralExpr {
+       return m.subExprs
+}
+
+func (m *MockLiteralExpr) Bytes() [][]byte {
+       result := make([][]byte, len(m.elements))
+       for i, elem := range m.elements {
+               result[i] = []byte(elem)
+       }
+       return result
+}
+
+func (m *MockLiteralExpr) Field(_ index.FieldKey) index.Field {
+       return index.Field{}
+}
+
+func (m *MockLiteralExpr) RangeOpts(_ bool, _ bool, _ bool) index.RangeOpts {
+       return index.RangeOpts{}
+}
+
+func TestTraceHavingFilterShouldSkip(t *testing.T) {
+       tests := []struct {
+               name           string
+               tagName        string
+               description    string
+               subExprStrings []string
+               havingResult   bool
+               expectedSkip   bool
+       }{
+               {
+                       name:           "having matches - should not skip",
+                       tagName:        "service",
+                       subExprStrings: []string{"service-1", "service-2"},
+                       havingResult:   true,
+                       expectedSkip:   false,
+                       description:    "should not skip when Having returns 
true",
+               },
+               {
+                       name:           "having doesn't match - should skip",
+                       tagName:        "service",
+                       subExprStrings: []string{"service-1", "service-2"},
+                       havingResult:   false,
+                       expectedSkip:   true,
+                       description:    "should skip when Having returns false",
+               },
+               {
+                       name:           "single value matches - should not 
skip",
+                       tagName:        "endpoint",
+                       subExprStrings: []string{"/api/v1/users"},
+                       havingResult:   true,
+                       expectedSkip:   false,
+                       description:    "should not skip when single value 
Having returns true",
+               },
+               {
+                       name:           "multiple values no match - should 
skip",
+                       tagName:        "endpoint",
+                       subExprStrings: []string{"/api/v1/unknown", 
"/api/v2/unknown"},
+                       havingResult:   false,
+                       expectedSkip:   true,
+                       description:    "should skip when multiple values 
Having returns false",
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       // Create mock sub-expressions
+                       subExprs := make([]logical.LiteralExpr, 
len(tt.subExprStrings))
+                       for i, str := range tt.subExprStrings {
+                               subExprs[i] = NewMockLiteralExpr(str)
+                       }
+
+                       // Create mock expression
+                       mockExpr := NewMockLiteralExprWithSubExprs(subExprs)
+
+                       // Create trace having filter
+                       filter := &traceHavingFilter{
+                               expr:    mockExpr,
+                               op:      "having",
+                               tagName: tt.tagName,
+                       }
+
+                       // Create mock FilterOp
+                       mockFilterOp := NewMockFilterOp()
+                       mockFilterOp.SetHavingResult(tt.tagName, 
tt.subExprStrings, tt.havingResult)
+
+                       // Test ShouldSkip
+                       result, err := filter.ShouldSkip(mockFilterOp)
+
+                       assert.NoError(t, err, tt.description)
+                       assert.Equal(t, tt.expectedSkip, result, tt.description)
+               })
+       }
+}
+
+func TestTraceHavingFilterShouldSkipNilExpr(t *testing.T) {
+       filter := &traceHavingFilter{
+               expr:    nil,
+               op:      "having",
+               tagName: "service",
+       }
+
+       mockFilterOp := NewMockFilterOp()
+
+       result, err := filter.ShouldSkip(mockFilterOp)
+
+       assert.NoError(t, err)
+       assert.False(t, result, "should not skip when expr is nil")
+}
+
+func TestTraceHavingFilterExecutePanics(t *testing.T) {
+       filter := &traceHavingFilter{
+               expr:    nil,
+               op:      "having",
+               tagName: "service",
+       }
+
+       assert.Panics(t, func() {
+               filter.Execute(nil, 0, nil)
+       }, "Execute should panic")
+}
+
+func TestTraceHavingFilterString(t *testing.T) {
+       filter := &traceHavingFilter{
+               expr:    nil,
+               op:      "having",
+               tagName: "service",
+       }
+
+       result := filter.String()
+       assert.Equal(t, "having:service", result)
+}
+
+func TestTraceHavingFilterIntegration(t *testing.T) {
+       // Integration test to ensure the filter works with realistic data
+
+       // Create sub-expressions
+       subExpr1 := NewMockLiteralExpr("order-service")
+       subExpr2 := NewMockLiteralExpr("user-service")
+
+       // Create main expression
+       mockExpr := 
NewMockLiteralExprWithSubExprs([]logical.LiteralExpr{subExpr1, subExpr2})
+
+       // Create filter
+       filter := &traceHavingFilter{
+               expr:    mockExpr,
+               op:      "having",
+               tagName: "service_name",
+       }
+
+       // Create mock FilterOp that simulates bloom filter behavior
+       mockFilterOp := NewMockFilterOp()
+       mockFilterOp.SetHavingResult("service_name", []string{"order-service", 
"user-service"}, true)
+
+       // Test
+       shouldSkip, err := filter.ShouldSkip(mockFilterOp)
+
+       assert.NoError(t, err)
+       assert.False(t, shouldSkip, "should not skip when services are found")
+}

(skywalking-banyandb) 01/02: Add Having method to tagFilterOp and tagFamilyFilters for bloom filter checks

Reply via email to