This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 77c88204fdd0eaf37427f3736acd69c553609bd2 Author: Mryange <[email protected]> AuthorDate: Thu Aug 31 21:12:09 2023 +0800 [fix](filter) fix error id in bloomfilter (#23564) 1. "set" may overwrite the original ID. 2.A bloom filter may not necessarily be an IN_OR_BLOOM_FILTER. before may be RuntimeFilterInfo id -1: [type = BF, input = 25, filtered = 0] now RuntimeFilterInfo id 0: [type = BF, input = 25, filtered = 0] --- be/src/exprs/bitmapfilter_predicate.h | 7 ++----- be/src/exprs/bloom_filter_func.h | 11 +---------- be/src/exprs/hybrid_set.h | 6 ++---- be/src/exprs/runtime_filter.cpp | 3 +-- be/src/exprs/runtime_filter.h | 12 ++++++++++++ be/src/olap/bitmap_filter_predicate.h | 1 + be/src/olap/bloom_filter_predicate.h | 7 ++++++- be/src/olap/column_predicate.h | 3 ++- be/src/olap/in_list_predicate.h | 2 ++ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +++- 10 files changed, 32 insertions(+), 24 deletions(-) diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h index 561bc7a001..743a55c4b6 100644 --- a/be/src/exprs/bitmapfilter_predicate.h +++ b/be/src/exprs/bitmapfilter_predicate.h @@ -19,6 +19,7 @@ #include <algorithm> +#include "exprs/runtime_filter.h" #include "gutil/integral_types.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" @@ -27,7 +28,7 @@ namespace doris { // only used in Runtime Filter -class BitmapFilterFuncBase { +class BitmapFilterFuncBase : public FilterFuncBase { public: virtual void insert(const void* data) = 0; virtual void insert_many(const std::vector<const BitmapValue*> bitmaps) = 0; @@ -43,13 +44,9 @@ public: void set_not_in(bool not_in) { _not_in = not_in; } virtual ~BitmapFilterFuncBase() = default; - void set_filter_id(int filter_id) { _filter_id = filter_id; } - int get_filter_id() const { return _filter_id; } - protected: // true -> not in bitmap, false -> in bitmap bool _not_in {false}; - int _filter_id = -1; }; template <PrimitiveType type> diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index 2d2b7664ac..1c77ce8e90 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -94,7 +94,7 @@ private: }; // Only Used In RuntimeFilter -class BloomFilterFuncBase { +class BloomFilterFuncBase : public FilterFuncBase { public: BloomFilterFuncBase() : _inited(false) {} @@ -164,7 +164,6 @@ public: << other_func->_bloom_filter_alloced; return Status::InvalidArgument("bloom filter size invalid"); } - set_filter_id(other_func->_filter_id); return _bloom_filter->merge(other_func->_bloom_filter.get()); } { @@ -176,7 +175,6 @@ public: _bloom_filter = bloomfilter_func->_bloom_filter; _bloom_filter_alloced = other_func->_bloom_filter_alloced; _inited = true; - set_filter_id(other_func->_filter_id); return Status::OK(); } else { DCHECK(bloomfilter_func != nullptr); @@ -187,7 +185,6 @@ public: << other_func->_bloom_filter_alloced; return Status::InvalidArgument("bloom filter size invalid"); } - set_filter_id(other_func->_filter_id); return _bloom_filter->merge(other_func->_bloom_filter.get()); } } @@ -215,13 +212,8 @@ public: _bloom_filter_alloced = other_func->_bloom_filter_alloced; _bloom_filter = other_func->_bloom_filter; _inited = other_func->_inited; - set_filter_id(other_func->_filter_id); } - void set_filter_id(int filter_id) { _filter_id = filter_id; } - - int get_filter_id() const { return _filter_id; } - virtual void insert(const void* data) = 0; // This function is only to be used if the be_exec_version may be less than 2. If updated, please delete it. @@ -255,7 +247,6 @@ protected: std::mutex _lock; int64_t _bloom_filter_length; bool _build_bf_exactly = false; - int _filter_id = -1; }; template <class T> diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index c158b9ab52..5bb5b9d69c 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -18,6 +18,7 @@ #pragma once #include "common/object_pool.h" +#include "exprs/runtime_filter.h" #include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" @@ -174,7 +175,7 @@ private: }; // TODO Maybe change void* parameter to template parameter better. -class HybridSetBase { +class HybridSetBase : public FilterFuncBase { public: HybridSetBase() = default; virtual ~HybridSetBase() = default; @@ -225,9 +226,6 @@ public: LOG(FATAL) << "HybridSetBase not support find_batch_nullable_negative"; } - void set_filter_id(int filter_id) { _filter_id = filter_id; } - int get_filter_id() const { return _filter_id; } - int _filter_id = -1; class IteratorBase { public: IteratorBase() = default; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index d06d28a1aa..74ada5a449 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -1492,7 +1492,6 @@ void IRuntimeFilter::update_runtime_filter_type_to_profile() { if (_profile != nullptr) { _profile->add_info_string("RealRuntimeFilterType", ::doris::to_string(_wrapper->get_real_type())); - _wrapper->set_filter_id(_filter_id); } } @@ -1847,7 +1846,7 @@ Status RuntimePredicateWrapper::get_push_exprs(std::list<vectorized::VExprContex vectorized::VExprContextSPtr probe_ctx; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(probe_expr, probe_ctx)); probe_ctxs.push_back(probe_ctx); - + set_filter_id(_filter_id); DCHECK(probe_ctx->root()->type().type == _column_return_type || (is_string_type(probe_ctx->root()->type().type) && is_string_type(_column_return_type)) || diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 2c23af6251..631125a15d 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -119,7 +119,19 @@ struct RuntimeFilterParams { bool bitmap_filter_not_in; bool build_bf_exactly; }; +struct FilterFuncBase { +public: + void set_filter_id(int filter_id) { + if (_filter_id == -1) { + _filter_id = filter_id; + } + } + [[nodiscard]] int get_filter_id() const { return _filter_id; } + +private: + int _filter_id = -1; +}; struct UpdateRuntimeFilterParams { UpdateRuntimeFilterParams(const PPublishFilterRequest* req, butil::IOBufAsZeroCopyInputStream* data_stream, ObjectPool* obj_pool) diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 38133f3f3f..f90e0e625b 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -101,6 +101,7 @@ private: SpecificFilter* _specific_filter; // owned by _filter int get_filter_id() const override { return _filter->get_filter_id(); } + bool is_filter() const override { return true; } }; template <PrimitiveType T> diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 885927d3f5..d2816be996 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -155,7 +155,12 @@ private: return info; } - int get_filter_id() const override { return _filter->get_filter_id(); } + int get_filter_id() const override { + int filter_id = _filter->get_filter_id(); + DCHECK(filter_id != -1); + return filter_id; + } + bool is_filter() const override { return true; } std::shared_ptr<BloomFilterFuncBase> _filter; SpecificFilter* _specific_filter; // owned by _filter diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index cad253ac1a..b98156f5fb 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -219,7 +219,8 @@ public: virtual void clone(ColumnPredicate** to) const { LOG(FATAL) << "clone not supported"; } virtual int get_filter_id() const { return -1; } - + // now InListPredicateBase BloomFilterColumnPredicate BitmapFilterColumnPredicate = true + virtual bool is_filter() const { return false; } PredicateFilterInfo get_filtered_info() const { return PredicateFilterInfo {static_cast<int>(type()), _evaluated_rows - 1, _evaluated_rows - 1 - _passed_rows}; diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index f4e432cf28..329c9b8dc0 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -287,6 +287,8 @@ public: return new_size; } int get_filter_id() const override { return _values->get_filter_id(); } + bool is_filter() const override { return true; } + template <bool is_and> void _evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 7452c51fb4..ff367be869 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1338,7 +1338,9 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } else { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); - _filter_info_id.push_back(predicate); + if (predicate->is_filter()) { + _filter_info_id.push_back(predicate); + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
