This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch bug/trace-range-query in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit 517f33943e18b7e187dd389fae7d4013f151ce7a Author: Hongtao Gao <[email protected]> AuthorDate: Sat Feb 28 09:22:48 2026 +0800 fix: deduplicate tag names in trace queries with range conditions --- CHANGES.md | 1 + pkg/query/logical/tag_filter.go | 9 ++- pkg/query/logical/trace/index_filter.go | 3 +- pkg/query/logical/trace/index_filter_test.go | 87 ++++++++++++++++++++++ .../data/input/duration_range_order_timestamp.ql | 23 ++++++ .../data/input/duration_range_order_timestamp.yml | 40 ++++++++++ .../data/want/duration_range_order_timestamp.yml | 34 +++++++++ test/cases/trace/trace.go | 2 + 8 files changed, 195 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b9c8ff244..35ca8ce87 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -36,6 +36,7 @@ Release Notes. - Fix data written to the wrong shard and related stream queries. - Fix the lifecycle panic when the trace has no sidx. - Fix panic in sidx merge and flush operations when part counts don't match expectations. +- Fix trace queries with range conditions on the same tag (e.g., duration) combined with ORDER BY by deduplicating tag names when merging logical expression branches. ### Document diff --git a/pkg/query/logical/tag_filter.go b/pkg/query/logical/tag_filter.go index 7f767fec7..103e3b90d 100644 --- a/pkg/query/logical/tag_filter.go +++ b/pkg/query/logical/tag_filter.go @@ -532,12 +532,15 @@ func (r *rangeTag) String() string { } func tagExpr(accessor TagValueIndexAccessor, registry TagSpecRegistry, tagName string, analyzer *analysis.Analyzer) (ComparableExpr, error) { - if tagSpec := registry.FindTagSpecByName(tagName); tagSpec != nil { - if tagVal := accessor.GetTagValue(tagSpec.TagFamilyIdx, tagSpec.TagIdx); tagVal != nil { + tagSpec := registry.FindTagSpecByName(tagName) + if tagSpec != nil { + tagVal := accessor.GetTagValue(tagSpec.TagFamilyIdx, tagSpec.TagIdx) + if tagVal != nil { return parseExpr(tagVal, analyzer) } + return nil, errors.WithMessagef(ErrTagNotDefined, "tag value is nil for tag %q, tagSpec: %+v", tagName, tagSpec) } - return nil, ErrTagNotDefined + return nil, errors.WithMessagef(ErrTagNotDefined, "tag %q does not exist in the current schema", tagName) } type matchTag struct { diff --git a/pkg/query/logical/trace/index_filter.go b/pkg/query/logical/trace/index_filter.go index db1b2e1c4..2a606df2f 100644 --- a/pkg/query/logical/trace/index_filter.go +++ b/pkg/query/logical/trace/index_filter.go @@ -400,9 +400,10 @@ func buildFilterFromLogicalExpression(le *modelv1.LogicalExpression, schema logi return nil, nil, append(leftTagNames, rightTagNames...), append(leftTraceIDs, rightTraceIDs...), minVal, maxVal, err } - // Merge tag names from both sides + // Merge tag names from both sides (deduplicate since same tag can appear in multiple conditions) collectedTagNames = append(collectedTagNames, leftTagNames...) collectedTagNames = append(collectedTagNames, rightTagNames...) + collectedTagNames = deduplicateStrings(collectedTagNames) // Merge trace IDs from both sides traceIDs = append(traceIDs, leftTraceIDs...) diff --git a/pkg/query/logical/trace/index_filter_test.go b/pkg/query/logical/trace/index_filter_test.go index b8f3a1864..fac72009b 100644 --- a/pkg/query/logical/trace/index_filter_test.go +++ b/pkg/query/logical/trace/index_filter_test.go @@ -22,6 +22,9 @@ import ( "github.com/stretchr/testify/assert" + commonv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/common/v1" + databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1" + modelv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1" "github.com/apache/skywalking-banyandb/pkg/index" "github.com/apache/skywalking-banyandb/pkg/query/logical" ) @@ -279,3 +282,87 @@ func TestTraceHavingFilterIntegration(t *testing.T) { assert.NoError(t, err) assert.False(t, shouldSkip, "should not skip when services are found") } + +// TestBuildFilterDeduplicatesCollectedTagNames verifies that buildFilter deduplicates tag names +// when criteria has multiple conditions on the same tag (e.g. duration > 100 AND duration < 200). +func TestBuildFilterDeduplicatesCollectedTagNames(t *testing.T) { + trace := &databasev1.Trace{ + Metadata: &commonv1.Metadata{Name: "test", Group: "default"}, + Tags: []*databasev1.TraceTagSpec{ + {Name: "trace_id", Type: databasev1.TagType_TAG_TYPE_STRING}, + {Name: "span_id", Type: databasev1.TagType_TAG_TYPE_STRING}, + {Name: "timestamp", Type: databasev1.TagType_TAG_TYPE_TIMESTAMP}, + {Name: "duration", Type: databasev1.TagType_TAG_TYPE_INT}, + {Name: "service_id", Type: databasev1.TagType_TAG_TYPE_STRING}, + }, + TraceIdTagName: "trace_id", + SpanIdTagName: "span_id", + TimestampTagName: "timestamp", + } + schema, err := BuildSchema(trace, nil) + assert.NoError(t, err) + entityDict := make(map[string]int) + entity := []*modelv1.TagValue{} + + // Case 1: duration > 100 AND duration < 200 (same tag in both conditions) + criteria := &modelv1.Criteria{ + Exp: &modelv1.Criteria_Le{ + Le: &modelv1.LogicalExpression{ + Op: modelv1.LogicalExpression_LOGICAL_OP_AND, + Left: &modelv1.Criteria{ + Exp: &modelv1.Criteria_Condition{ + Condition: &modelv1.Condition{ + Name: "duration", + Op: modelv1.Condition_BINARY_OP_GT, + Value: &modelv1.TagValue{Value: &modelv1.TagValue_Int{Int: &modelv1.Int{Value: 100}}}, + }, + }, + }, + Right: &modelv1.Criteria{ + Exp: &modelv1.Criteria_Condition{ + Condition: &modelv1.Condition{ + Name: "duration", + Op: modelv1.Condition_BINARY_OP_LT, + Value: &modelv1.TagValue{Value: &modelv1.TagValue_Int{Int: &modelv1.Int{Value: 200}}}, + }, + }, + }, + }, + }, + } + _, _, collectedTagNames, _, _, _, err := buildTraceFilter( //nolint:dogsled + criteria, schema, entityDict, entity, "trace_id", "span_id", "") + assert.NoError(t, err) + assert.Equal(t, []string{"duration"}, collectedTagNames, "duration should appear only once") + + // Case 2: service_id = "a" OR service_id = "b" (same tag in both conditions) + criteriaOR := &modelv1.Criteria{ + Exp: &modelv1.Criteria_Le{ + Le: &modelv1.LogicalExpression{ + Op: modelv1.LogicalExpression_LOGICAL_OP_OR, + Left: &modelv1.Criteria{ + Exp: &modelv1.Criteria_Condition{ + Condition: &modelv1.Condition{ + Name: "service_id", + Op: modelv1.Condition_BINARY_OP_EQ, + Value: &modelv1.TagValue{Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: "a"}}}, + }, + }, + }, + Right: &modelv1.Criteria{ + Exp: &modelv1.Criteria_Condition{ + Condition: &modelv1.Condition{ + Name: "service_id", + Op: modelv1.Condition_BINARY_OP_EQ, + Value: &modelv1.TagValue{Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: "b"}}}, + }, + }, + }, + }, + }, + } + _, _, collectedTagNamesOR, _, _, _, err := buildTraceFilter( //nolint:dogsled + criteriaOR, schema, entityDict, entity, "trace_id", "span_id", "") + assert.NoError(t, err) + assert.Equal(t, []string{"service_id"}, collectedTagNamesOR, "service_id should appear only once") +} diff --git a/test/cases/trace/data/input/duration_range_order_timestamp.ql b/test/cases/trace/data/input/duration_range_order_timestamp.ql new file mode 100644 index 000000000..e0a4a6214 --- /dev/null +++ b/test/cases/trace/data/input/duration_range_order_timestamp.ql @@ -0,0 +1,23 @@ +# Licensed to Apache Software Foundation (ASF) under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Apache Software Foundation (ASF) licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +SELECT () FROM TRACE zipkin IN zipkinTrace +TIME > '-1h' +WHERE duration >= 10 AND duration <= 1000 +ORDER BY zipkin-timestamp DESC +LIMIT 10 diff --git a/test/cases/trace/data/input/duration_range_order_timestamp.yml b/test/cases/trace/data/input/duration_range_order_timestamp.yml new file mode 100644 index 000000000..a37f54044 --- /dev/null +++ b/test/cases/trace/data/input/duration_range_order_timestamp.yml @@ -0,0 +1,40 @@ +# Licensed to Apache Software Foundation (ASF) under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Apache Software Foundation (ASF) licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: "zipkin" +groups: ["zipkinTrace"] +limit: 10 +order_by: + index_rule_name: "zipkin-timestamp" + sort: "SORT_DESC" +criteria: + le: + op: "LOGICAL_OP_AND" + left: + condition: + name: "duration" + op: "BINARY_OP_GE" + value: + int: + value: 10 + right: + condition: + name: "duration" + op: "BINARY_OP_LE" + value: + int: + value: 1000 diff --git a/test/cases/trace/data/want/duration_range_order_timestamp.yml b/test/cases/trace/data/want/duration_range_order_timestamp.yml new file mode 100644 index 000000000..ce49281d1 --- /dev/null +++ b/test/cases/trace/data/want/duration_range_order_timestamp.yml @@ -0,0 +1,34 @@ +# Licensed to Apache Software Foundation (ASF) under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Apache Software Foundation (ASF) licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +traces: + - spans: + - span: zipkin_trace_003_span_005 + spanId: span_005 + traceId: zipkin_trace_003 + - spans: + - span: zipkin_trace_002_span_003 + spanId: span_003 + - span: zipkin_trace_002_span_004 + spanId: span_004 + traceId: zipkin_trace_002 + - spans: + - span: zipkin_trace_001_span_001 + spanId: span_001 + - span: zipkin_trace_001_span_002 + spanId: span_002 + traceId: zipkin_trace_001 diff --git a/test/cases/trace/trace.go b/test/cases/trace/trace.go index 5a51c5c30..cbe61d32c 100644 --- a/test/cases/trace/trace.go +++ b/test/cases/trace/trace.go @@ -47,6 +47,8 @@ var _ = g.DescribeTable("Scanning Traces", func(args helpers.Args) { g.Entry("query by empty span ids", helpers.Args{Input: "in_empty_span_ids", Duration: 1 * time.Hour, WantEmpty: true}), g.Entry("order by timestamp", helpers.Args{Input: "order_timestamp_desc", Duration: 1 * time.Hour}), g.Entry("order by duration", helpers.Args{Input: "order_duration_desc", Duration: 1 * time.Hour}), + g.Entry("duration range 10-1000 order by timestamp", + helpers.Args{Input: "duration_range_order_timestamp", Duration: 1 * time.Hour}), g.Entry("filter by service id", helpers.Args{Input: "eq_service_order_timestamp_desc", Duration: 1 * time.Hour}), g.Entry("filter by service instance id", helpers.Args{Input: "eq_service_instance_order_time_asc", Duration: 1 * time.Hour}), g.Entry("filter by endpoint", helpers.Args{Input: "eq_endpoint_order_duration_asc", Duration: 1 * time.Hour}),
