This is an automated email from the ASF dual-hosted git repository.

BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f7b7d9ce005 [feature](runtime-filter) Support bloom pruning for list 
partitions (#64318)
f7b7d9ce005 is described below

commit f7b7d9ce005c176673a6ea3c83af4379029c4277
Author: Pxl <[email protected]>
AuthorDate: Thu Jun 18 14:18:05 2026 +0800

    [feature](runtime-filter) Support bloom pruning for list partitions (#64318)
    
    The RANGE-partition script provided by the reviewer was used as a shape
    reference, but the measured case is intentionally LIST partitioned. Pure
    Bloom
    runtime-filter pruning for RANGE partitions remains disabled by design
    in this
    PR; using `IN_OR_BLOOM_FILTER` on a tiny build side can measure the
    pre-existing
    IN-set RANGE pruning path instead of the new Bloom LIST path.
---
 .../runtime_filter_partition_pruner.cpp            |  86 ++++++++++++++++-
 .../runtime_filter_partition_pruner.h              |   4 +
 .../runtime_filter_partition_pruner_test.cpp       | 107 +++++++++++++++++++++
 .../RuntimeFilterPartitionPruneClassifier.java     |   7 +-
 .../glue/translator/RuntimeFilterTranslator.java   |   4 +-
 .../RuntimeFilterPartitionPruneClassifierTest.java |  78 +++++++++++++++
 .../runtime_filter/rf_partition_pruning.out        |  15 +++
 .../runtime_filter/rf_partition_pruning.groovy     |  46 +++++++++
 8 files changed, 340 insertions(+), 7 deletions(-)

diff --git a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp 
b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
index a25059be8b7..6e6d43472c5 100644
--- a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
+++ b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp
@@ -23,10 +23,14 @@
 #include <optional>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "core/block/block.h"
 #include "core/column/column.h"
+#include "core/column/column_decimal.h"
 #include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
 #include "core/data_type/data_type_nullable.h"
 #include "core/field.h"
 #include "exprs/bloom_filter_func.h"
@@ -40,6 +44,39 @@
 
 namespace doris {
 
+namespace {
+
+template <PrimitiveType PT>
+bool bloom_may_match_fixed_values(const ColumnValueRange<PT>& cvr,
+                                  BloomFilterFuncBase* bloom_filter) {
+    if (cvr.get_fixed_value_size() == 0) {
+        return false;
+    }
+
+    using CppType = typename PrimitiveTypeTraits<PT>::CppType;
+    using ColumnType = typename PrimitiveTypeTraits<PT>::ColumnType;
+
+    MutableColumnPtr values_column;
+    if constexpr (IsDecimalNumber<CppType>) {
+        values_column = ColumnType::create(0, cvr.scale());
+    } else {
+        values_column = ColumnType::create();
+    }
+    auto* typed_column = static_cast<ColumnType*>(values_column.get());
+    for (const auto& value : cvr.get_fixed_value_set()) {
+        typed_column->insert_value(value);
+    }
+    const size_t row_count = values_column->size();
+
+    std::vector<uint8_t> results(row_count, 0);
+    ColumnPtr values_column_ptr = std::move(values_column);
+    bloom_filter->find_fixed_len(values_column_ptr, results.data());
+    return std::any_of(results.begin(), results.end(),
+                       [](uint8_t matched) { return matched != 0; });
+}
+
+} // namespace
+
 // 
NOLINTBEGIN(readability-function-cognitive-complexity,readability-function-size)
 // Complexity is inflated by macro expansion for each PrimitiveType case.
 Status ParsedPartitionBoundaries::parse(
@@ -79,6 +116,7 @@ Status ParsedPartitionBoundaries::parse(
         bool is_list = tb.__isset.list_values && !tb.list_values.empty();      
                   \
         bool is_range = tb.__isset.range_start || tb.__isset.range_end;        
                   \
         DORIS_CHECK(is_list || is_range);                                      
                   \
+        boundary.is_list_boundary = is_list;                                   
                   \
         ColumnValueRange<TYPE_##NAME> cvr(slot->col_name(), is_nullable, 
precision, scale);       \
         /* Returns nullopt if `node` is a NULL literal; the caller then sets 
contain_null  */     \
         /* on the CVR instead of trying to extract a typed value (which would 
dereference  */     \
@@ -381,8 +419,9 @@ Status 
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
             DCHECK(cvr != nullptr);                                            
                  \
             DCHECK(!boundary.only_null);                                       
                  \
             DCHECK(!boundary.contains_null);                                   
                  \
-            boundary_is_list[i] = cvr->is_fixed_value_range();                 
                  \
+            boundary_is_list[i] = boundary.is_list_boundary;                   
                  \
             if (boundary_is_list[i]) {                                         
                  \
+                DORIS_CHECK(cvr->is_fixed_value_range());                      
                  \
                 list_result_begin[i] = list_row_count;                         
                  \
                 for (const auto& value : cvr->get_fixed_value_set()) {         
                  \
                     list_inner->insert_value(value);                           
                  \
@@ -516,6 +555,7 @@ Status 
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
                 projected_boundary.partition_id = orig_boundary.partition_id;  
                 \
                 projected_boundary.slot_id = leaf_slot_id;                     
                 \
                 projected_boundary.is_nullable = out_nullable;                 
                 \
+                projected_boundary.is_list_boundary = true;                    
                 \
                 projected_boundary.contains_null = list_has_null;              
                 \
                 projected_boundary.only_null = list_has_null && 
!list_has_value;                \
                 projected_boundary.boundary_cvr = std::move(cvr);              
                 \
@@ -552,6 +592,7 @@ Status 
ParsedPartitionBoundaries::get_or_compute_projected_boundaries(
             projected_boundary.partition_id = orig_boundary.partition_id;      
                 \
             projected_boundary.slot_id = leaf_slot_id;                         
                 \
             projected_boundary.is_nullable = out_nullable;                     
                 \
+            projected_boundary.is_list_boundary = false;                       
                 \
             projected_boundary.only_null = orig_boundary.only_null;            
                 \
             projected_boundary.contains_null = orig_boundary.contains_null;    
                 \
             projected_boundary.boundary_cvr = std::move(cvr);                  
                 \
@@ -627,12 +668,49 @@ void 
RuntimeFilterPartitionPruner::_try_prune_by_single_rf(
     // FilterBase::contain_null() already folds in `_null_aware`, so we only
     // get a true result when the build side is actually null-aware AND
     // produced a NULL value.
+    if (impl->node_type() == TExprNodeType::BLOOM_PRED) {
+        auto bloom = impl->get_bloom_filter_func();
+        DORIS_CHECK(bloom != nullptr);
+        bool rf_contains_null = bloom->contain_null();
+
+        for (const auto& pb : boundaries) {
+            if (_pruned_partition_ids.contains(pb.partition_id) ||
+                newly_pruned.contains(pb.partition_id)) {
+                continue;
+            }
+
+            if (pb.only_null) {
+                if (!rf_contains_null) {
+                    newly_pruned.insert(pb.partition_id);
+                }
+                continue;
+            }
+            if (pb.contains_null && rf_contains_null) {
+                continue;
+            }
+            if (!pb.is_list_boundary) {
+                continue;
+            }
+
+            bool may_match = true;
+            std::visit(
+                    [&](const auto& boundary_cvr) {
+                        if (!boundary_cvr.is_fixed_value_range()) {
+                            return;
+                        }
+                        may_match = bloom_may_match_fixed_values(boundary_cvr, 
bloom.get());
+                    },
+                    pb.boundary_cvr);
+            if (!may_match) {
+                newly_pruned.insert(pb.partition_id);
+            }
+        }
+        return;
+    }
+
     bool rf_contains_null = false;
     if (auto hybrid_set = impl->get_set_func()) {
         rf_contains_null = hybrid_set->contain_null();
-    } else if (impl->node_type() == TExprNodeType::BLOOM_PRED) {
-        auto bloom = impl->get_bloom_filter_func();
-        rf_contains_null = bloom && bloom->contain_null();
     } else if (impl->node_type() == TExprNodeType::NULL_AWARE_BINARY_PRED) {
         // Min/Max RF built on a null-safe equal join. The literal child holds
         // the min or max bound; the NULL semantic is conveyed by the node
diff --git a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h 
b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
index f718fe7e418..9e3d0150db1 100644
--- a/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
+++ b/be/src/exec/runtime_filter/runtime_filter_partition_pruner.h
@@ -43,6 +43,10 @@ struct ParsedBoundary {
     int64_t partition_id = 0;
     SlotId slot_id = 0;
     bool is_nullable = false;
+    // True only when the original/projection boundary is a finite LIST value 
set.
+    // Bloom RF pruning relies on complete value enumeration and must not use 
RANGE
+    // boundaries, even when a RANGE projection degenerates to a single point.
+    bool is_list_boundary = false;
     ColumnValueRangeType boundary_cvr;
     // True if the partition's value set is exactly {NULL} (e.g. LIST
     // partition whose only key is NULL). The CVR alone cannot encode
diff --git 
a/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp 
b/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
index c4193061f0c..6267c172c98 100644
--- a/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
+++ b/be/test/exec/runtime_filter/runtime_filter_partition_pruner_test.cpp
@@ -28,9 +28,11 @@
 #include "core/data_type/data_type_factory.hpp"
 #include "core/string_ref.h"
 #include "core/types.h"
+#include "exec/runtime_filter/runtime_filter_definitions.h"
 #include "exec/runtime_filter/utils.h"
 #include "exprs/create_predicate_function.h"
 #include "exprs/runtime_filter_expr.h"
+#include "exprs/vbloom_predicate.h"
 #include "exprs/vdirect_in_predicate.h"
 #include "exprs/vexpr.h"
 #include "exprs/vexpr_context.h"
@@ -164,6 +166,42 @@ protected:
         return VDirectInPredicate::create_shared(node, in_filter<PT>(value, 
contain_null));
     }
 
+    template <PrimitiveType PT>
+    VExprSPtr bloom_predicate(const std::vector<CppType<PT>>& values, bool 
contain_null = false) {
+        std::shared_ptr<BloomFilterFuncBase> filter(create_bloom_filter(PT, 
contain_null));
+        RuntimeFilterParams params;
+        params.filter_type = RuntimeFilterType::BLOOM_FILTER;
+        params.column_return_type = PT;
+        params.null_aware = contain_null;
+        params.bloom_filter_size = 1024;
+        filter->init_params(&params);
+        EXPECT_TRUE(filter->init_with_fixed_length(1024).ok());
+
+        using ColumnType = typename PrimitiveTypeTraits<PT>::ColumnType;
+        MutableColumnPtr values_column = ColumnType::create();
+        auto* typed_column = assert_cast<ColumnType*>(values_column.get());
+        for (const auto& value : values) {
+            typed_column->insert_value(value);
+        }
+        ColumnPtr values_column_ptr = std::move(values_column);
+        filter->insert_fixed_len(values_column_ptr, 0);
+
+        if (contain_null) {
+            std::shared_ptr<HybridSetBase> null_set(create_set(PT, 
contain_null));
+            null_set->insert(static_cast<const void*>(nullptr));
+            filter->insert_set(null_set);
+        }
+
+        TExprNode node;
+        node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+        node.__set_node_type(TExprNodeType::BLOOM_PRED);
+        node.__set_opcode(TExprOpcode::RT_FILTER);
+        node.__set_is_nullable(false);
+        auto bloom_pred = VBloomPredicate::create_shared(node);
+        bloom_pred->set_filter(filter);
+        return bloom_pred;
+    }
+
     template <PrimitiveType PT>
     VExprSPtr minmax_predicate_le(const CppType<PT>& value, const DataTypePtr& 
type) {
         VExprSPtr pred;
@@ -226,6 +264,7 @@ protected:
         ASSERT_FALSE(parsed->empty());
         ASSERT_EQ(parsed->total_partitions(), 2);
         const auto& parsed_boundaries = 
parsed->slot_to_boundaries().at(SLOT_ID);
+        EXPECT_TRUE(parsed_boundaries[0].is_list_boundary);
 
         RuntimeFilterPartitionPruner in_pruner;
         phmap::flat_hash_set<int64_t> in_pruned;
@@ -247,6 +286,7 @@ protected:
         auto parsed_range = parse_boundaries(PT, range_boundaries, false, 
precision, scale);
         EXPECT_FALSE(parsed_range->empty());
         EXPECT_EQ(parsed_range->total_partitions(), 1);
+        
EXPECT_FALSE(parsed_range->slot_to_boundaries().at(SLOT_ID)[0].is_list_boundary);
     }
 
     DateV2Value<DateV2ValueType> date_v2(uint16_t year, uint8_t month, uint8_t 
day) {
@@ -346,6 +386,8 @@ TEST_F(RuntimeFilterPartitionPrunerTest, 
ProjectedBoundariesSupportListValues) {
                         .ok());
     ASSERT_EQ(projected->size(), 2);
 
+    EXPECT_TRUE(projected->at(0).is_list_boundary);
+    EXPECT_TRUE(projected->at(1).is_list_boundary);
     const auto& first = 
std::get<ColumnValueRange<TYPE_INT>>(projected->at(0).boundary_cvr);
     EXPECT_TRUE(first.is_fixed_value_range());
     EXPECT_TRUE(first.get_fixed_value_set().contains(one));
@@ -361,6 +403,71 @@ TEST_F(RuntimeFilterPartitionPrunerTest, 
ProjectedBoundariesSupportListValues) {
     EXPECT_TRUE(pruned.contains(2));
 }
 
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomPrunesListPartitionFixedValues) {
+    int32_t one = 1;
+    int32_t two = 2;
+    int32_t three = 3;
+    int32_t four = 4;
+    int32_t five = 5;
+    std::vector<TPartitionBoundary> boundaries {
+            list_boundary<TYPE_INT>(1, {literal_node<TYPE_INT>(one)}),
+            list_boundary<TYPE_INT>(2,
+                                    {literal_node<TYPE_INT>(two), 
literal_node<TYPE_INT>(three)}),
+            list_boundary<TYPE_INT>(3,
+                                    {literal_node<TYPE_INT>(four), 
literal_node<TYPE_INT>(five)})};
+    auto parsed = parse_boundaries(TYPE_INT, boundaries);
+    const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+
+    RuntimeFilterPartitionPruner pruner;
+    phmap::flat_hash_set<int64_t> pruned;
+    pruner._try_prune_by_single_rf(parsed_boundaries, 
bloom_predicate<TYPE_INT>({two}), pruned);
+    EXPECT_TRUE(pruned.contains(1));
+    EXPECT_FALSE(pruned.contains(2));
+    EXPECT_TRUE(pruned.contains(3));
+}
+
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomPreservesListNullSemantics) {
+    int32_t one = 1;
+    int32_t two = 2;
+    std::vector<TPartitionBoundary> boundaries {
+            list_boundary<TYPE_INT>(1, {null_node(TYPE_INT)}),
+            list_boundary<TYPE_INT>(2, {null_node(TYPE_INT), 
literal_node<TYPE_INT>(one)}),
+            list_boundary<TYPE_INT>(3, {literal_node<TYPE_INT>(two)})};
+    auto parsed = parse_boundaries(TYPE_INT, boundaries, true);
+    const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+
+    RuntimeFilterPartitionPruner non_null_pruner;
+    phmap::flat_hash_set<int64_t> non_null_pruned;
+    non_null_pruner._try_prune_by_single_rf(parsed_boundaries, 
bloom_predicate<TYPE_INT>({one}),
+                                            non_null_pruned);
+    EXPECT_TRUE(non_null_pruned.contains(1));
+    EXPECT_FALSE(non_null_pruned.contains(2));
+    EXPECT_TRUE(non_null_pruned.contains(3));
+
+    RuntimeFilterPartitionPruner null_aware_pruner;
+    phmap::flat_hash_set<int64_t> null_aware_pruned;
+    null_aware_pruner._try_prune_by_single_rf(
+            parsed_boundaries, bloom_predicate<TYPE_INT>({one}, true), 
null_aware_pruned);
+    EXPECT_FALSE(null_aware_pruned.contains(1));
+    EXPECT_FALSE(null_aware_pruned.contains(2));
+    EXPECT_TRUE(null_aware_pruned.contains(3));
+}
+
+TEST_F(RuntimeFilterPartitionPrunerTest, BloomDoesNotPruneRangePartition) {
+    int32_t one = 1;
+    int32_t two = 2;
+    int32_t miss = 100;
+    std::vector<TPartitionBoundary> boundaries {range_boundary<TYPE_INT>(1, 
one, two)};
+    auto parsed = parse_boundaries(TYPE_INT, boundaries);
+    const auto& parsed_boundaries = parsed->slot_to_boundaries().at(SLOT_ID);
+    ASSERT_FALSE(parsed_boundaries[0].is_list_boundary);
+
+    RuntimeFilterPartitionPruner pruner;
+    phmap::flat_hash_set<int64_t> pruned;
+    pruner._try_prune_by_single_rf(parsed_boundaries, 
bloom_predicate<TYPE_INT>({miss}), pruned);
+    EXPECT_TRUE(pruned.empty());
+}
+
 TEST_F(RuntimeFilterPartitionPrunerTest, InvalidPartitionBoundaryRejected) {
     TPartitionBoundary boundary;
     boundary.__set_partition_id(1);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
index da2aa99092f..b1dd098324e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifier.java
@@ -36,6 +36,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.Monotonic;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.planner.OlapScanNode;
 import org.apache.doris.planner.PlanNode;
+import org.apache.doris.thrift.TRuntimeFilterType;
 import org.apache.doris.thrift.TTargetExprMonotonicity;
 
 import com.google.common.collect.Range;
@@ -55,7 +56,8 @@ final class RuntimeFilterPartitionPruneClassifier {
     private RuntimeFilterPartitionPruneClassifier() {
     }
 
-    static Classification classify(Expr targetExpr, Expression 
nereidsTargetExpr, PlanNode scanNode) {
+    static Classification classify(TRuntimeFilterType filterType, Expr 
targetExpr,
+            Expression nereidsTargetExpr, PlanNode scanNode) {
         if (!(scanNode instanceof OlapScanNode)) {
             return Classification.unsupported("target scan is not an 
OlapScanNode");
         }
@@ -71,6 +73,9 @@ final class RuntimeFilterPartitionPruneClassifier {
         if (partType != PartitionType.RANGE && partType != PartitionType.LIST) 
{
             return Classification.unsupported("partition type is not RANGE or 
LIST");
         }
+        if (filterType == TRuntimeFilterType.BLOOM && partType == 
PartitionType.RANGE) {
+            return Classification.unsupported("BLOOM runtime filter does not 
support RANGE partition pruning");
+        }
         if (hasUnsupportedAutomaticPartitionExpression(partitionInfo)) {
             return Classification.unsupported("automatic partition expression 
boundary is not modeled");
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
index b0be2b9ec0e..286330d79ea 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
@@ -246,7 +246,7 @@ public class RuntimeFilterTranslator {
                     }
                     RuntimeFilterPartitionPruneClassifier.Classification 
classification =
                             RuntimeFilterPartitionPruneClassifier.classify(
-                                    targetExpr, nereidsTargetExprList.get(i), 
scanNode);
+                                    head.getType(), targetExpr, 
nereidsTargetExprList.get(i), scanNode);
                     if (classification.canPrunePartitions()) {
                         
origFilter.markTargetCanPrunePartitions(scanNode.getId());
                     }
@@ -353,7 +353,7 @@ public class RuntimeFilterTranslator {
                     }
                     RuntimeFilterPartitionPruneClassifier.Classification 
classification =
                             RuntimeFilterPartitionPruneClassifier.classify(
-                                    targetExpr, 
filter.getTargetExpressions().get(i), scanNode);
+                                    filter.getType(), targetExpr, 
filter.getTargetExpressions().get(i), scanNode);
                     if (classification.canPrunePartitions()) {
                         
origFilter.markTargetCanPrunePartitions(scanNode.getId());
                     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
index d8982dbd147..34de78c7a76 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/RuntimeFilterPartitionPruneClassifierTest.java
@@ -17,14 +17,34 @@
 
 package org.apache.doris.nereids.glue.translator;
 
+import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.TupleId;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.ListPartitionItem;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionInfo;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.RangePartitionItem;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.functions.Monotonic;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc;
 import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
 import org.apache.doris.nereids.types.DateTimeV2Type;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.planner.OlapScanNode;
+import org.apache.doris.thrift.TRuntimeFilterType;
+import org.apache.doris.thrift.TTargetExprMonotonicity;
 
+import com.google.common.collect.ImmutableList;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import java.util.Map;
 
 class RuntimeFilterPartitionPruneClassifierTest {
     @Test
@@ -47,4 +67,62 @@ class RuntimeFilterPartitionPruneClassifierTest {
         
Assertions.assertTrue(RuntimeFilterPartitionPruneClassifier.hasInputSlotOnlyInMonotonicChild(
                 dateTrunc, monotonic.getMonotonicFunctionChildIndex()));
     }
+
+    @Test
+    void testBloomRangePartitionUnsupported() {
+        RuntimeFilterPartitionPruneClassifier.Classification classification = 
classify(
+                TRuntimeFilterType.BLOOM, PartitionType.RANGE, 
RangePartitionItem.DUMMY_ITEM);
+
+        Assertions.assertFalse(classification.canPrunePartitions());
+        
Assertions.assertTrue(classification.getUnsupportedReason().contains("BLOOM"));
+        
Assertions.assertTrue(classification.getPartitionMonotonicity().isEmpty());
+    }
+
+    @Test
+    void testBloomListPartitionSupported() {
+        RuntimeFilterPartitionPruneClassifier.Classification classification = 
classify(
+                TRuntimeFilterType.BLOOM, PartitionType.LIST, 
ListPartitionItem.DUMMY_ITEM);
+
+        assertSupportedIncreasingPartitions(classification);
+    }
+
+    @Test
+    void testInOrBloomRangePartitionStillSupported() {
+        RuntimeFilterPartitionPruneClassifier.Classification classification = 
classify(
+                TRuntimeFilterType.IN_OR_BLOOM, PartitionType.RANGE, 
RangePartitionItem.DUMMY_ITEM);
+
+        assertSupportedIncreasingPartitions(classification);
+    }
+
+    private RuntimeFilterPartitionPruneClassifier.Classification classify(
+            TRuntimeFilterType filterType, PartitionType partitionType, 
PartitionItem partitionItem) {
+        Column partitionColumn = new Column("part_col", PrimitiveType.INT);
+        SlotDescriptor slotDescriptor = new SlotDescriptor(new SlotId(1), new 
TupleId(1));
+        slotDescriptor.setColumn(partitionColumn);
+        slotDescriptor.setType(partitionColumn.getType());
+        SlotRef targetSlot = new SlotRef(slotDescriptor);
+        SlotReference nereidsTarget = new SlotReference("part_col", 
IntegerType.INSTANCE);
+
+        OlapTable table = Mockito.mock(OlapTable.class);
+        PartitionInfo partitionInfo = Mockito.mock(PartitionInfo.class);
+        OlapScanNode scanNode = Mockito.mock(OlapScanNode.class);
+        Mockito.when(scanNode.getOlapTable()).thenReturn(table);
+        
Mockito.when(scanNode.getSelectedPartitionIds()).thenReturn(ImmutableList.of(1L,
 2L));
+        Mockito.when(table.getPartitionInfo()).thenReturn(partitionInfo);
+        Mockito.when(partitionInfo.getType()).thenReturn(partitionType);
+        
Mockito.when(partitionInfo.getPartitionColumns()).thenReturn(ImmutableList.of(partitionColumn));
+        Mockito.when(partitionInfo.getItem(1L)).thenReturn(partitionItem);
+        Mockito.when(partitionInfo.getItem(2L)).thenReturn(partitionItem);
+
+        return RuntimeFilterPartitionPruneClassifier.classify(filterType, 
targetSlot, nereidsTarget, scanNode);
+    }
+
+    private void assertSupportedIncreasingPartitions(
+            RuntimeFilterPartitionPruneClassifier.Classification 
classification) {
+        Assertions.assertTrue(classification.canPrunePartitions());
+        Map<Long, TTargetExprMonotonicity> monotonicity = 
classification.getPartitionMonotonicity();
+        Assertions.assertEquals(2, monotonicity.size());
+        Assertions.assertEquals(TTargetExprMonotonicity.MONOTONIC_INCREASING, 
monotonicity.get(1L));
+        Assertions.assertEquals(TTargetExprMonotonicity.MONOTONIC_INCREASING, 
monotonicity.get(2L));
+    }
 }
diff --git 
a/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out 
b/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
index 09a100d695b..44295ef30a1 100644
--- a/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
+++ b/regression-test/data/query_p0/runtime_filter/rf_partition_pruning.out
@@ -30,6 +30,12 @@
 5      3       e
 6      3       f
 
+-- !list_int_bloom --
+1      1       a
+2      1       b
+5      3       e
+6      3       f
+
 -- !no_pruning --
 13     250     m
 18     350     r
@@ -123,3 +129,12 @@ Beijing    3
 2      1       4       b
 3      1       5       c
 4      1       9       d
+
+-- !list_expr_bloom --
+1      1       a
+2      1       b
+5      3       e
+6      3       f
+
+-- !list_str_bloom --
+3      c
diff --git 
a/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy 
b/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
index 37453ecfbd7..1b76f964aac 100644
--- a/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
+++ b/regression-test/suites/query_p0/runtime_filter/rf_partition_pruning.groovy
@@ -442,6 +442,24 @@ suite("rf_partition_pruning", "nonConcurrent") {
         "* FROM rf_prune_list_int f JOIN rf_prune_dim_region d ON f.region_id 
= d.dim_region",
         "IN_OR_BLOOM_FILTER", 5, 3)
 
+    // Test 6b: List partition (INT) - Bloom filter prune.
+    // Regions {1, 3} keep two LIST partitions and prune the other three.
+    order_qt_list_int_bloom """
+        SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+            f.id, f.region_id, f.value
+        FROM rf_prune_list_int f
+        JOIN rf_prune_dim_region d ON f.region_id = d.dim_region
+    """
+    assertPruningProfile(
+        "* FROM rf_prune_list_int f JOIN rf_prune_dim_region d ON f.region_id 
= d.dim_region",
+        "BLOOM_FILTER", 5, 3)
+
+    // Test 6c: Range partition (INT) - Bloom filter must not register RF 
partition pruning.
+    // A Bloom filter can disprove individual values, not an arbitrary [a, b) 
range.
+    assertNoPartitionPruningProfile(
+        "* FROM rf_prune_range_int f JOIN rf_prune_dim_int d ON f.part_col = 
d.dim_key",
+        "BLOOM_FILTER")
+
     // Test 7: No pruning - dim matches all partitions
     sql "drop table if exists rf_prune_dim_all"
     sql """
@@ -1070,6 +1088,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
     assertPruningProfile(
         "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_five d ON f.part_col = 
d.dim_key",
         "IN_OR_BLOOM_FILTER", 3, 2)
+    assertPruningProfile(
+        "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_five d ON f.part_col = 
d.dim_key",
+        "BLOOM_FILTER", 3, 2)
 
     // Test 31: Mixed partition {NULL,5} + RF {7} (no value match, RF 
non-null-aware)
     //   p_a still pruned (NULL row can't match non-null RF; concrete 5 != 7)
@@ -1096,6 +1117,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
     assertPruningProfile(
         "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_seven d ON f.part_col 
= d.dim_key",
         "IN_OR_BLOOM_FILTER", 3, 3)
+    assertPruningProfile(
+        "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_seven d ON f.part_col 
= d.dim_key",
+        "BLOOM_FILTER", 3, 3)
 
     // Test 32: Null-safe equal join (<=>) on mixed partition. RF is 
null_aware AND
     //   contains NULL (build side has NULL key), so p_a (which contains NULL) 
MUST
@@ -1117,6 +1141,9 @@ suite("rf_partition_pruning", "nonConcurrent") {
         FROM rf_prune_list_mixed f
         JOIN rf_prune_dim_null d ON f.part_col <=> d.dim_key
     """
+    assertPruningProfile(
+        "* FROM rf_prune_list_mixed f JOIN rf_prune_dim_null d ON f.part_col 
<=> d.dim_key",
+        "BLOOM_FILTER", 3, 2)
 
     // Test 32b: Nullable RANGE partition columns can store NULL rows in the
     // MINVALUE-side first partition. A null-aware RF containing only NULL must
@@ -1441,6 +1468,16 @@ suite("rf_partition_pruning", "nonConcurrent") {
         "count(*) FROM rf_prune_list_int f JOIN rf_prune_dim_region_twice d "
                 + "ON f.region_id + f.region_id = d.dim_region",
         "IN_OR_BLOOM_FILTER", 5, 3)
+    order_qt_list_expr_bloom """
+        SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+            f.id, f.region_id, f.value
+        FROM rf_prune_list_int f
+        JOIN rf_prune_dim_region_twice d ON f.region_id + f.region_id = 
d.dim_region
+    """
+    assertPruningProfile(
+        "count(*) FROM rf_prune_list_int f JOIN rf_prune_dim_region_twice d "
+                + "ON f.region_id + f.region_id = d.dim_region",
+        "BLOOM_FILTER", 5, 3)
 
     // ============================================================
     // Test 51: String partition column (LIST partition on VARCHAR).
@@ -1485,6 +1522,15 @@ suite("rf_partition_pruning", "nonConcurrent") {
     assertPruningProfile(
         "* FROM rf_prune_list_str f JOIN rf_prune_dim_str d ON f.part_col = 
d.dim_key",
         "IN_OR_BLOOM_FILTER", 4, 3)
+    order_qt_list_str_bloom """
+        SELECT /*+ SET_VAR(runtime_filter_type='BLOOM_FILTER') */
+            f.id, f.part_col
+        FROM rf_prune_list_str f
+        JOIN rf_prune_dim_str d ON f.part_col = d.dim_key
+    """
+    assertPruningProfile(
+        "* FROM rf_prune_list_str f JOIN rf_prune_dim_str d ON f.part_col = 
d.dim_key",
+        "BLOOM_FILTER", 4, 3)
 
     // ============================================================
     // Test 52: Grouped RF with multiple targets.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to