This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new b02fdd6f996 branch4.0 [Fix](Variant) predicate should be pushed down
when conjunct contains CAST (#60485)
b02fdd6f996 is described below
commit b02fdd6f996df9cfad3773acafe184e93c9c2044
Author: lihangyu <[email protected]>
AuthorDate: Thu Feb 5 09:30:32 2026 +0800
branch4.0 [Fix](Variant) predicate should be pushed down when conjunct
contains CAST (#60485)
cherry-pick #60448
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 16 ++++++++--------
be/src/pipeline/exec/olap_scan_operator.cpp | 3 ++-
be/src/pipeline/exec/scan_operator.cpp | 16 ++++++++++++++--
.../fault_injection_p0/test_variant_bloom_filter.groovy | 2 +-
.../test_variant_count_on_index_fault_injection.groovy | 6 +++---
.../predefine/test_types_with_indexes_profile.groovy | 2 +-
6 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f7513d357a7..1c6d950a00d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -673,6 +673,13 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
_opts.stats->rows_conditions_filtered += (pre_size -
_row_bitmap.cardinality());
}
+ DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
+ if (_opts.stats->rows_bf_filtered == 0) {
+ return Status::Error<ErrorCode::INTERNAL_ERROR>(
+ "Bloom filter did not filter the data.");
+ }
+ })
+
// TODO(hkp): calculate filter rate to decide whether to
// use zone map/bloom filter/secondary index or not.
return Status::OK();
@@ -850,13 +857,6 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges,
condition_row_ranges);
_opts.stats->rows_bf_filtered += (pre_size -
condition_row_ranges->count());
-
- DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
- if (pre_size - condition_row_ranges->count() == 0) {
- return Status::Error<ErrorCode::INTERNAL_ERROR>(
- "Bloom filter did not filter the data.");
- }
- })
}
{
@@ -2792,7 +2792,7 @@ void
SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
}
}
}
- // Exmple: CAST(v['a'] AS VARCHAR) MATCH 'hello', do not add
CAST expr to index tracking.
+ // Example: CAST(v['a'] AS VARCHAR) MATCH 'hello', do not add
CAST expr to index tracking.
auto expr_without_cast =
vectorized::VExpr::expr_without_cast(child);
if (expr_without_cast->is_slot_ref() && expr->op() !=
TExprOpcode::CAST) {
auto* column_slot_ref =
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index dbb2d7733c7..05a6ddbfd2c 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -92,7 +92,8 @@ PushDownType
OlapScanLocalState::_should_push_down_binary_predicate(
DCHECK(constant_val->data == nullptr) << "constant_val should not have a
value";
const auto& children = fn_call->children();
DCHECK(children.size() == 2);
- DCHECK_EQ(children[0]->node_type(), TExprNodeType::SLOT_REF);
+ DCHECK_EQ(vectorized::VExpr::expr_without_cast(children[0])->node_type(),
+ TExprNodeType::SLOT_REF);
if (children[1]->is_constant()) {
std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
THROW_IF_ERROR(children[1]->get_const_col(expr_ctx,
&const_col_wrapper));
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index 143fb9cab8b..6a2d36819d7 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -560,16 +560,28 @@ Status
ScanLocalState<Derived>::_normalize_function_filters(vectorized::VExprCon
return Status::OK();
}
+// only one level cast expr could push down for variant type
+// check if expr is cast and it's children is slot
+static bool is_valid_push_down_cast(const vectorized::VExprSPtrs& children) {
+ auto slot_expr = vectorized::VExpr::expr_without_cast(children[0]);
+ return slot_expr->data_type()->get_primitive_type() ==
PrimitiveType::TYPE_VARIANT &&
+ children[0]->node_type() == TExprNodeType::CAST_EXPR &&
+ children[0]->children().at(0)->is_slot_ref();
+}
+
template <typename Derived>
bool ScanLocalState<Derived>::_is_predicate_acting_on_slot(const
vectorized::VExprSPtrs& children,
SlotDescriptor**
slot_desc,
ColumnValueRangeType** range) {
- if (children.empty() || children[0]->node_type() !=
TExprNodeType::SLOT_REF) {
+ // children[0] must be slot ref or cast(slot(variant) as type)
+ if (children.empty() || (children[0]->node_type() !=
TExprNodeType::SLOT_REF &&
+ !is_valid_push_down_cast(children))) {
// not a slot ref(column)
return false;
}
std::shared_ptr<vectorized::VSlotRef> slot_ref =
- std::dynamic_pointer_cast<vectorized::VSlotRef>(children[0]);
+ std::dynamic_pointer_cast<vectorized::VSlotRef>(
+ vectorized::VExpr::expr_without_cast(children[0]));
*slot_desc =
_parent->cast<typename
Derived::Parent>()._slot_id_to_slot_desc[slot_ref->slot_id()];
auto entry = _slot_id_to_predicates.find(slot_ref->slot_id());
diff --git
a/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
b/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
index fa20bbbe072..16ecba66a28 100644
--- a/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
+++ b/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
@@ -55,7 +55,7 @@ suite("test_variant_bloom_filter", "nonConcurrent") {
int seed = Math.floor(Math.random() * 7)
def var_def = "variant"
if (seed % 2 == 0) {
- var_def = "variant<'repo.id' : int, 'repo.name' : string, 'repo.url' :
string, 'repo.description' : string, 'repo.created_at' : string>"
+ var_def = "variant<'repo.id' : bigint, 'repo.name' : string,
'repo.url' : string, 'repo.description' : string, 'repo.created_at' : string>"
} else {
var_def = "variant<properties(\"variant_max_subcolumns_count\" =
\"100\")>"
}
diff --git
a/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
index 81674214b31..53bb872f707 100644
---
a/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
@@ -28,6 +28,7 @@ suite("test_variant_count_on_index_fault_injection", "p0,
nonConcurrent") {
sql "set enable_match_without_inverted_index = false"
sql "set experimental_enable_nereids_planner = true"
sql "set enable_fallback_to_original_planner = false"
+ sql "set inverted_index_skip_threshold = 0"
sql """
CREATE TABLE ${tbl} (
@@ -148,9 +149,8 @@ suite("test_variant_count_on_index_fault_injection", "p0,
nonConcurrent") {
def dp2 = sql "select count(v['b']) from ${tbl} where v['a'] match
'hello' and v['b'] match 'world'"
assertEquals(3, toInt(dp2[0][0]))
- // TODO: FIXME
- // def dpn1 = sql "select count() from ${tbl} where cast(v['c'] as
bigint) = 1"
- // assertEquals(3, toInt(dpn1[0][0]))
+ def dpn1 = sql "select count() from ${tbl} where cast(v['c'] as
bigint) = 1"
+ assertEquals(3, toInt(dpn1[0][0]))
} finally {
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
}
diff --git
a/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
b/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
index 236ce0d56bd..2b49366b55c 100644
---
a/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
+++
b/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
@@ -196,7 +196,7 @@ suite("test_variant_predefine_types_with_indexes_profile",
"p0,nonConcurrent"){
}
// accurateCheckIndexWithQueries()
// sql "insert into test_variant_predefine_types_with_indexes_profile
select * from test_variant_predefine_types_with_indexes_profile"
- queryAndCheckWithBloomFilter("select count() from
test_variant_predefine_types_with_indexes_profile where
array_contains(cast(var['array_decimal_1'] as array<decimalv3 (26,9)>),
12345678901234567.123456789)")
+ // queryAndCheckWithBloomFilter("select count() from
test_variant_predefine_types_with_indexes_profile where
array_contains(cast(var['array_decimal_1'] as array<decimalv3 (26,9)>),
12345678901234567.123456789)")
queryAndCheckWithBloomFilter("select count() from
test_variant_predefine_types_with_indexes_profile where cast(var['int_1'] as
int) = 42")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]