This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 4434ba4e7b5 [refine](predicate) reduce HybridSet/InListPredicate
template bloat and fix create_string_value_set bug (#61999)
4434ba4e7b5 is described below
commit 4434ba4e7b51576c71b7b08ccd163188d4874b72
Author: Mryange <[email protected]>
AuthorDate: Thu Apr 2 20:01:02 2026 +0800
[refine](predicate) reduce HybridSet/InListPredicate template bloat and fix
create_string_value_set bug (#61999)
The HybridSet / FixedContainer / InListPredicateBase template system has
~264 redundant template instantiations and a latent bug. This PR fixes 4
issues:
**1. Bug: `in.h` `create_string_value_set` matches wrong overload**
`create_string_value_set(get_size_with_out_null(context))` silently
matches the single-parameter template overload
`create_string_value_set(bool)` via implicit `size_t→bool` conversion,
instead of the intended two-parameter `create_string_value_set(size_t,
bool)`. This causes `FixedContainer<StringRef, N>` to never activate for
string IN queries. Fixed by calling `create_string_value_set(size,
true)`.
**2. Eliminate 24 redundant instances in `create_set` path**
String types always use `StringSet<DynamicContainer>` regardless of N,
but previous code still instantiated all N variants. Added `if constexpr
(is_string_type(type))` short-circuit in
`HybridSetTraits::get_function`.
**3. Eliminate ~320 redundant instances in `InListPredicateBase`**
- Non-string types in the constructor simply do `_values = hybrid_set`
(pointer sharing), making N irrelevant. Moved N-dispatch behind `if
constexpr (is_string_type(TYPE))` in both
`predicate_creator_in_list_*.cpp`.
- Date/DECIMALV2 types have identical `ElementType` and `ContainerType`
between caller's HybridSet and InListPredicateBase — the data copy was
unnecessary. Narrowed the copy branch to string-only.
**4. Simplify `FixedContainer::find()` with fold expression**
Replaced ~60-line manual `if constexpr` expansion (N=0..8) with a C++17
fold expression + `std::index_sequence`. Identical assembly at `-O2`.
### Changed files
| File | Change |
|------|--------|
| `be/src/exprs/function/in.h` | Fix `create_string_value_set` call to
use 2-arg overload |
| `be/src/exprs/create_predicate_function.h` | Compile-time
short-circuit for string types |
| `be/src/exprs/hybrid_set.h` | Fold expression rewrite of
`FixedContainer::find()` |
| `be/src/storage/predicate/in_list_predicate.h` | Narrow if-branch to
string-only; remove unnecessary data copy for date/DECIMALV2 |
| `be/src/storage/predicate/predicate_creator_in_list_in.cpp` |
N-dispatch only for string types |
| `be/src/storage/predicate/predicate_creator_in_list_not_in.cpp` | Same
as above |
---
be/src/exprs/create_predicate_function.h | 17 +++---
be/src/exprs/function/in.h | 2 +-
be/src/exprs/hybrid_set.h | 65 ++++------------------
be/src/storage/predicate/in_list_predicate.h | 60 ++++++++++----------
.../predicate/predicate_creator_in_list_in.cpp | 60 +++++++++++---------
.../predicate/predicate_creator_in_list_not_in.cpp | 60 +++++++++++---------
6 files changed, 115 insertions(+), 149 deletions(-)
diff --git a/be/src/exprs/create_predicate_function.h
b/be/src/exprs/create_predicate_function.h
index a9146732a1f..94f8802569c 100644
--- a/be/src/exprs/create_predicate_function.h
+++ b/be/src/exprs/create_predicate_function.h
@@ -48,17 +48,14 @@ public:
using BasePtr = HybridSetBase*;
template <PrimitiveType type, size_t N>
static BasePtr get_function(bool null_aware) {
- if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
- using Set = std::conditional_t<
- is_string_type(type), StringSet<>,
- HybridSet<type,
- FixedContainer<typename
PrimitiveTypeTraits<type>::CppType, N>>>;
- return new Set(null_aware);
+ if constexpr (is_string_type(type)) {
+ return new StringSet<>(null_aware);
+ } else if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
+ using CppType = typename PrimitiveTypeTraits<type>::CppType;
+ return new HybridSet<type, FixedContainer<CppType, N>>(null_aware);
} else {
- using Set = std::conditional_t<
- is_string_type(type), StringSet<>,
- HybridSet<type, DynamicContainer<typename
PrimitiveTypeTraits<type>::CppType>>>;
- return new Set(null_aware);
+ using CppType = typename PrimitiveTypeTraits<type>::CppType;
+ return new HybridSet<type, DynamicContainer<CppType>>(null_aware);
}
}
};
diff --git a/be/src/exprs/function/in.h b/be/src/exprs/function/in.h
index 75b1a6753f6..84058977d91 100644
--- a/be/src/exprs/function/in.h
+++ b/be/src/exprs/function/in.h
@@ -113,7 +113,7 @@ public:
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_VARCHAR ||
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_STRING) {
// the StringValue's memory is held by FunctionContext, so we can
use StringValueSet here directly
-
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
+
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context),
true));
} else {
state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
get_size_with_out_null(context), true));
diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index 8b4cda61aef..6c938792123 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -72,65 +72,20 @@ public:
// Use '|' instead of '||' has better performance by test.
ALWAYS_INLINE bool find(const T& value) const {
DCHECK_EQ(N, _size);
- if constexpr (N == 0) {
+ return _find_impl(value, std::make_index_sequence<N> {});
+ }
+
+private:
+ template <size_t... I>
+ ALWAYS_INLINE bool _find_impl(const T& value, std::index_sequence<I...>)
const {
+ if constexpr (sizeof...(I) == 0) {
return false;
+ } else {
+ return (... | (uint8_t)(Compare::equal(value, _data[I])));
}
- if constexpr (N == 1) {
- return (Compare::equal(value, _data[0]));
- }
- if constexpr (N == 2) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1]));
- }
- if constexpr (N == 3) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2]));
- }
- if constexpr (N == 4) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2])) |
- (uint8_t)(Compare::equal(value, _data[3]));
- }
- if constexpr (N == 5) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2])) |
- (uint8_t)(Compare::equal(value, _data[3])) |
- (uint8_t)(Compare::equal(value, _data[4]));
- }
- if constexpr (N == 6) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2])) |
- (uint8_t)(Compare::equal(value, _data[3])) |
- (uint8_t)(Compare::equal(value, _data[4])) |
- (uint8_t)(Compare::equal(value, _data[5]));
- }
- if constexpr (N == 7) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2])) |
- (uint8_t)(Compare::equal(value, _data[3])) |
- (uint8_t)(Compare::equal(value, _data[4])) |
- (uint8_t)(Compare::equal(value, _data[5])) |
- (uint8_t)(Compare::equal(value, _data[6]));
- }
- if constexpr (N == FIXED_CONTAINER_MAX_SIZE) {
- return (uint8_t)(Compare::equal(value, _data[0])) |
- (uint8_t)(Compare::equal(value, _data[1])) |
- (uint8_t)(Compare::equal(value, _data[2])) |
- (uint8_t)(Compare::equal(value, _data[3])) |
- (uint8_t)(Compare::equal(value, _data[4])) |
- (uint8_t)(Compare::equal(value, _data[5])) |
- (uint8_t)(Compare::equal(value, _data[6])) |
- (uint8_t)(Compare::equal(value, _data[7]));
- }
- CHECK(false) << "unreachable path";
- return false;
}
+public:
size_t size() const { return _size; }
class Iterator {
diff --git a/be/src/storage/predicate/in_list_predicate.h
b/be/src/storage/predicate/in_list_predicate.h
index df41d24d6b4..8aec120bd94 100644
--- a/be/src/storage/predicate/in_list_predicate.h
+++ b/be/src/storage/predicate/in_list_predicate.h
@@ -67,14 +67,6 @@ class InListPredicateBase final : public ColumnPredicate {
public:
ENABLE_FACTORY_CREATOR(InListPredicateBase);
using T = typename PrimitiveTypeTraits<Type>::CppType;
- using HybridSetType = std::conditional_t<
- N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE,
- std::conditional_t<is_string_type(Type),
StringSet<FixedContainer<std::string, N>>,
- HybridSet<Type, FixedContainer<T, N>,
-
PredicateColumnType<PredicateEvaluateType<Type>>>>,
- std::conditional_t<is_string_type(Type),
StringSet<DynamicContainer<std::string>>,
- HybridSet<Type, DynamicContainer<T>,
-
PredicateColumnType<PredicateEvaluateType<Type>>>>>;
InListPredicateBase(uint32_t column_id, std::string col_name,
const std::shared_ptr<HybridSetBase>& hybrid_set, bool
is_opposite,
size_t char_length = 0)
@@ -83,33 +75,39 @@ public:
_max_value(type_limit<T>::min()) {
CHECK(hybrid_set != nullptr);
- if constexpr (is_string_type(Type) || Type == TYPE_DECIMALV2 ||
is_date_type(Type)) {
+ // String types need a copy because:
+ // 1. The caller's set is StringSet<DynamicContainer<std::string>>,
but here we want
+ // StringSet<FixedContainer<std::string, N>> for small-set
optimization — different
+ // C++ types, cannot share the pointer.
+ // 2. CHAR type additionally needs padding to char_length.
+ //
+ // Date/DECIMALV2 types do NOT need a copy: their ElementType
(CppType) is identical
+ // between the caller's HybridSet and InListPredicateBase's, and
InListPredicateBase
+ // only calls _values->find() / begin() / size() which are virtual and
don't depend on
+ // the ColumnType template parameter difference.
+ if constexpr (is_string_type(Type)) {
+ using HybridSetType = std::conditional_t<N >= 1 && N <=
FIXED_CONTAINER_MAX_SIZE,
+
StringSet<FixedContainer<std::string, N>>,
+
StringSet<DynamicContainer<std::string>>>;
_values = std::make_shared<HybridSetType>(false);
- if constexpr (is_string_type(Type)) {
- HybridSetBase::IteratorBase* iter = hybrid_set->begin();
- while (iter->has_next()) {
- const auto* value = (const StringRef*)(iter->get_value());
- if constexpr (Type == TYPE_CHAR) {
- _temp_datas.emplace_back("");
- _temp_datas.back().resize(std::max(char_length,
value->size));
- memcpy(_temp_datas.back().data(), value->data,
value->size);
- const std::string& str = _temp_datas.back();
- _values->insert((void*)str.data(), str.length());
- } else {
- _values->insert((void*)value->data, value->size);
- }
- iter->next();
- }
- } else {
- HybridSetBase::IteratorBase* iter = hybrid_set->begin();
- while (iter->has_next()) {
- const void* value = iter->get_value();
- _values->insert(value);
- iter->next();
+ HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+ while (iter->has_next()) {
+ const auto* value = (const StringRef*)(iter->get_value());
+ if constexpr (Type == TYPE_CHAR) {
+ _temp_datas.emplace_back("");
+ _temp_datas.back().resize(std::max(char_length,
value->size));
+ memcpy(_temp_datas.back().data(), value->data,
value->size);
+ const std::string& str = _temp_datas.back();
+ _values->insert((void*)str.data(), str.length());
+ } else {
+ _values->insert((void*)value->data, value->size);
}
+ iter->next();
}
} else {
- // shared from the caller, so it needs to be shared ptr
+ // Non-string types: directly share the caller's hybrid_set.
+ // The caller's set already contains the correct
FixedContainer/DynamicContainer
+ // specialization; _values->find() dispatches to it via virtual
call.
_values = hybrid_set;
}
HybridSetBase::IteratorBase* iter = _values->begin();
diff --git a/be/src/storage/predicate/predicate_creator_in_list_in.cpp
b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
index 7b40a9adef6..e720da632a8 100644
--- a/be/src/storage/predicate/predicate_creator_in_list_in.cpp
+++ b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
bool is_opposite, size_t char_length = 0) {
- auto set_size = set->size();
- if (set_size == 1) {
- return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 2) {
- return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 3) {
- return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 4) {
- return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 5) {
- return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 6) {
- return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 7) {
- return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
- return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else {
+ // Only string types construct their own HybridSetType in the constructor
(to convert
+ // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch
is only needed
+ // for them. All other types directly share the caller's hybrid_set.
+ if constexpr (!is_string_type(TYPE)) {
return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
cid, col_name, set, is_opposite, char_length);
+ } else {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else {
+ return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
+ cid, col_name, set, is_opposite, char_length);
+ }
}
}
diff --git a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
index be28f2fabc1..63e8eb37b18 100644
--- a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
+++ b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
bool is_opposite, size_t char_length = 0) {
- auto set_size = set->size();
- if (set_size == 1) {
- return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 2) {
- return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 3) {
- return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 4) {
- return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 5) {
- return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 6) {
- return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 7) {
- return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
- return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else {
+ // Only string types construct their own HybridSetType in the constructor
(to convert
+ // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch
is only needed
+ // for them. All other types directly share the caller's hybrid_set.
+ if constexpr (!is_string_type(TYPE)) {
return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
cid, col_name, set, is_opposite, char_length);
+ } else {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else {
+ return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
+ cid, col_name, set, is_opposite, char_length);
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]