This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1d7d7ee9b686424d22c06b7507feca8b21c8d806 Author: Pxl <[email protected]> AuthorDate: Thu Jan 25 10:08:25 2024 +0800 [Chore](join) split out join hash map from hash map (#30280) split out join hash map from hash map --- be/src/pipeline/exec/hashjoin_build_sink.cpp | 2 +- be/src/pipeline/pipeline_x/dependency.h | 27 +- be/src/vec/common/hash_table/hash_map.h | 376 +-------------------- be/src/vec/common/hash_table/hash_map_context.h | 56 ++- .../hash_table/{hash_map.h => join_hash_table.h} | 276 +++------------ .../vec/common/hash_table/partitioned_hash_map.h | 3 +- be/src/vec/common/hash_table/string_hash_map.h | 2 + be/src/vec/exec/join/vhash_join_node.cpp | 2 +- be/src/vec/exec/vset_operation_node.cpp | 16 +- be/src/vec/exec/vset_operation_node.h | 21 +- 10 files changed, 122 insertions(+), 659 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index f02e203c783..2711b0d8852 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -364,7 +364,7 @@ void HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) { } return; } - if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32, RowRefListType>( + if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32, RowRefListType>( *_shared_state->hash_table_variants, _build_expr_ctxs)) { _shared_state->hash_table_variants ->emplace<vectorized::SerializedHashTableContext<RowRefListType>>(); diff --git a/be/src/pipeline/pipeline_x/dependency.h b/be/src/pipeline/pipeline_x/dependency.h index 59f9fee3775..8a58973be35 100644 --- a/be/src/pipeline/pipeline_x/dependency.h +++ b/be/src/pipeline/pipeline_x/dependency.h @@ -35,6 +35,7 @@ #include "vec/common/hash_table/hash_map_context_creator.h" #include "vec/common/sort/partition_sorter.h" #include "vec/common/sort/sorter.h" +#include "vec/core/types.h" #include "vec/exec/join/process_hash_table_probe.h" #include "vec/exec/join/vhash_join_node.h" #include "vec/exec/vaggregation_node.h" @@ -524,24 +525,22 @@ public: /// called in setup_local_state void hash_table_init() { + using namespace vectorized; if (child_exprs_lists[0].size() == 1 && (!build_not_ignore_null[0])) { // Single column optimization switch (child_exprs_lists[0][0]->root()->result_type()) { case TYPE_BOOLEAN: case TYPE_TINYINT: - hash_table_variants->emplace< - vectorized::I8HashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>(); break; case TYPE_SMALLINT: - hash_table_variants->emplace< - vectorized::I16HashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>(); break; case TYPE_INT: case TYPE_FLOAT: case TYPE_DATEV2: case TYPE_DECIMAL32: - hash_table_variants->emplace< - vectorized::I32HashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>(); break; case TYPE_BIGINT: case TYPE_DOUBLE: @@ -549,27 +548,21 @@ public: case TYPE_DATE: case TYPE_DECIMAL64: case TYPE_DATETIMEV2: - hash_table_variants->emplace< - vectorized::I64HashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>(); break; case TYPE_LARGEINT: case TYPE_DECIMALV2: case TYPE_DECIMAL128I: - hash_table_variants->emplace< - vectorized::I128HashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>(); break; default: - hash_table_variants->emplace< - vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetSerializedHashTableContext>(); } return; } - - if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32, - vectorized::RowRefListWithFlags>( + if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, RowRefListWithFlags>( *hash_table_variants, child_exprs_lists[0])) { - hash_table_variants->emplace< - vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>(); + hash_table_variants->emplace<SetSerializedHashTableContext>(); } } }; diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index 382f46acb74..d10b24ade21 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -27,7 +27,9 @@ #include "vec/common/hash_table/hash.h" #include "vec/common/hash_table/hash_table.h" #include "vec/common/hash_table/hash_table_allocator.h" +#include "vec/common/hash_table/join_hash_table.h" +namespace doris { /** NOTE HashMap could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. @@ -192,379 +194,15 @@ public: bool has_null_key_data() const { return false; } }; -template <typename Key, typename Cell, typename Hash = DefaultHash<Key>, - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator> { -public: - using Self = JoinHashMapTable; - using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>; - - using key_type = Key; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; - - using LookupResult = typename Base::LookupResult; - - static uint32_t calc_bucket_size(size_t num_elem) { - size_t expect_bucket_size = num_elem + (num_elem - 1) / 7; - return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1; - } - - size_t get_byte_size() const { - auto cal_vector_mem = [](const auto& vec) { return vec.capacity() * sizeof(vec[0]); }; - return cal_vector_mem(visited) + cal_vector_mem(first) + cal_vector_mem(next); - } - - template <int JoinOpType> - void prepare_build(size_t num_elem, int batch_size, bool has_null_key) { - _has_null_key = has_null_key; - - // the first row in build side is not really from build side table - _empty_build_side = num_elem <= 1; - max_batch_size = batch_size; - bucket_size = calc_bucket_size(num_elem + 1); - first.resize(bucket_size + 1); - next.resize(num_elem); - - if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { - visited.resize(num_elem); - } - } - - uint32_t get_bucket_size() const { return bucket_size; } - - size_t size() const { return Base::size() == 0 ? next.size() : Base::size(); } - - std::vector<uint8_t>& get_visited() { return visited; } - - void build(const Key* __restrict keys, const uint32_t* __restrict bucket_nums, - size_t num_elem) { - build_keys = keys; - for (size_t i = 1; i < num_elem; i++) { - uint32_t bucket_num = bucket_nums[i]; - next[i] = first[bucket_num]; - first[bucket_num] = i; - } - first[bucket_size] = 0; // index = bucket_num means null - } - - template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join, bool need_judge_null> - auto find_batch(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, - int probe_idx, uint32_t build_idx, int probe_rows, - uint32_t* __restrict probe_idxs, bool& probe_visited, - uint32_t* __restrict build_idxs, - doris::vectorized::ColumnFilterHelper* mark_column) { - if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { - if (_empty_build_side) { - return _process_null_aware_left_anti_join_for_empty_build_side< - JoinOpType, with_other_conjuncts, is_mark_join>( - probe_idx, probe_rows, probe_idxs, build_idxs, mark_column); - } - } - - if constexpr (with_other_conjuncts) { - return _find_batch_conjunct<JoinOpType>(keys, build_idx_map, probe_idx, build_idx, - probe_rows, probe_idxs, build_idxs); - } - - if constexpr (is_mark_join) { - return _find_batch_mark<JoinOpType>(keys, build_idx_map, probe_idx, probe_rows, - probe_idxs, build_idxs, mark_column); - } - - if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) { - return _find_batch_inner_outer_join<JoinOpType>(keys, build_idx_map, probe_idx, - build_idx, probe_rows, probe_idxs, - probe_visited, build_idxs); - } - if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { - return _find_batch_left_semi_anti<JoinOpType, need_judge_null>( - keys, build_idx_map, probe_idx, probe_rows, probe_idxs); - } - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { - return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx, probe_rows); - } - return std::tuple {0, 0U, 0}; - } - - template <int JoinOpType> - bool iterate_map(std::vector<uint32_t>& build_idxs) const { - const auto batch_size = max_batch_size; - const auto elem_num = visited.size(); - int count = 0; - build_idxs.resize(batch_size); - - while (count < batch_size && iter_idx < elem_num) { - const auto matched = visited[iter_idx]; - build_idxs[count] = iter_idx; - if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) { - count += !matched; - } else { - count += matched; - } - iter_idx++; - } - - build_idxs.resize(count); - return iter_idx >= elem_num; - } - - bool has_null_key() { return _has_null_key; } - - void pre_build_idxs(std::vector<uint32>& bucksets, const uint8_t* null_map) { - if (null_map) { - first[bucket_size] = bucket_size; // distinguish between not matched and null - } - - for (uint32_t i = 0; i < bucksets.size(); i++) { - bucksets[i] = first[bucksets[i]]; - } - } - -private: - // only LEFT_ANTI_JOIN/LEFT_SEMI_JOIN/NULL_AWARE_LEFT_ANTI_JOIN/CROSS_JOIN support mark join - template <int JoinOpType> - auto _find_batch_mark(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, - int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs, - uint32_t* __restrict build_idxs, - doris::vectorized::ColumnFilterHelper* mark_column) { - auto matched_cnt = 0; - const auto batch_size = max_batch_size; - - while (probe_idx < probe_rows && matched_cnt < batch_size) { - auto build_idx = build_idx_map[probe_idx] == bucket_size ? 0 : build_idx_map[probe_idx]; - - while (build_idx && keys[probe_idx] != build_keys[build_idx]) { - build_idx = next[build_idx]; - } - - if (build_idx_map[probe_idx] == bucket_size) { - // mark result as null when probe row is null - mark_column->insert_null(); - } else { - bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 - : build_idx == 0; - if (!matched && _has_null_key) { - mark_column->insert_null(); - } else { - mark_column->insert_value(matched); - } - } - - probe_idxs[matched_cnt] = probe_idx++; - build_idxs[matched_cnt] = build_idx; - matched_cnt++; - } - return std::tuple {probe_idx, 0U, matched_cnt}; - } - - template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join> - auto _process_null_aware_left_anti_join_for_empty_build_side( - int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs, - uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) { - static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN); - auto matched_cnt = 0; - const auto batch_size = max_batch_size; - - while (probe_idx < probe_rows && matched_cnt < batch_size) { - probe_idxs[matched_cnt] = probe_idx++; - if constexpr (is_mark_join) { - build_idxs[matched_cnt] = 0; - } - ++matched_cnt; - } - - if constexpr (is_mark_join && !with_other_conjuncts) { - mark_column->resize_fill(matched_cnt, 1); - } - - return std::tuple {probe_idx, 0U, matched_cnt}; - } - - auto _find_batch_right_semi_anti(const Key* __restrict keys, - const uint32_t* __restrict build_idx_map, int probe_idx, - int probe_rows) { - while (probe_idx < probe_rows) { - auto build_idx = build_idx_map[probe_idx]; - - while (build_idx) { - if (!visited[build_idx] && keys[probe_idx] == build_keys[build_idx]) { - visited[build_idx] = 1; - } - build_idx = next[build_idx]; - } - probe_idx++; - } - return std::tuple {probe_idx, 0U, 0}; - } - - template <int JoinOpType, bool need_judge_null> - auto _find_batch_left_semi_anti(const Key* __restrict keys, - const uint32_t* __restrict build_idx_map, int probe_idx, - int probe_rows, uint32_t* __restrict probe_idxs) { - auto matched_cnt = 0; - const auto batch_size = max_batch_size; - - while (probe_idx < probe_rows && matched_cnt < batch_size) { - if constexpr (need_judge_null) { - if (build_idx_map[probe_idx] == bucket_size) { - probe_idx++; - continue; - } - } - - auto build_idx = build_idx_map[probe_idx]; - - while (build_idx && keys[probe_idx] != build_keys[build_idx]) { - build_idx = next[build_idx]; - } - bool matched = - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 : build_idx == 0; - probe_idxs[matched_cnt] = probe_idx++; - matched_cnt += matched; - } - return std::tuple {probe_idx, 0U, matched_cnt}; - } - - template <int JoinOpType> - auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, - int probe_idx, uint32_t build_idx, int probe_rows, - uint32_t* __restrict probe_idxs, uint32_t* __restrict build_idxs) { - auto matched_cnt = 0; - const auto batch_size = max_batch_size; - - auto do_the_probe = [&]() { - while (build_idx && matched_cnt < batch_size) { - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { - if (!visited[build_idx] && keys[probe_idx] == build_keys[build_idx]) { - probe_idxs[matched_cnt] = probe_idx; - build_idxs[matched_cnt] = build_idx; - matched_cnt++; - } - } else if (keys[probe_idx] == build_keys[build_idx]) { - build_idxs[matched_cnt] = build_idx; - probe_idxs[matched_cnt] = probe_idx; - matched_cnt++; - } - build_idx = next[build_idx]; - } - - if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { - // may over batch_size when emplace 0 into build_idxs - if (!build_idx) { - probe_idxs[matched_cnt] = probe_idx; - build_idxs[matched_cnt] = 0; - matched_cnt++; - } - } - - probe_idx++; - }; - - if (build_idx) { - do_the_probe(); - } - - while (probe_idx < probe_rows && matched_cnt < batch_size) { - build_idx = build_idx_map[probe_idx]; - do_the_probe(); - } - - probe_idx -= (build_idx != 0); - return std::tuple {probe_idx, build_idx, matched_cnt}; - } - - template <int JoinOpType> - auto _find_batch_inner_outer_join(const Key* __restrict keys, - const uint32_t* __restrict build_idx_map, int probe_idx, - uint32_t build_idx, int probe_rows, - uint32_t* __restrict probe_idxs, bool& probe_visited, - uint32_t* __restrict build_idxs) { - auto matched_cnt = 0; - const auto batch_size = max_batch_size; - - auto do_the_probe = [&]() { - while (build_idx && matched_cnt < batch_size) { - if (keys[probe_idx] == build_keys[build_idx]) { - probe_idxs[matched_cnt] = probe_idx; - build_idxs[matched_cnt] = build_idx; - matched_cnt++; - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) { - if (!visited[build_idx]) { - visited[build_idx] = 1; - } - } - } - build_idx = next[build_idx]; - } - - if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) { - // `(!matched_cnt || probe_idxs[matched_cnt - 1] != probe_idx)` means not match one build side - probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1] == probe_idx); - if (!build_idx) { - if (!probe_visited) { - probe_idxs[matched_cnt] = probe_idx; - build_idxs[matched_cnt] = 0; - matched_cnt++; - } - probe_visited = false; - } - } - probe_idx++; - }; - - if (build_idx) { - do_the_probe(); - } - - while (probe_idx < probe_rows && matched_cnt < batch_size) { - build_idx = build_idx_map[probe_idx]; - do_the_probe(); - } - - probe_idx -= (build_idx != 0); - return std::tuple {probe_idx, build_idx, matched_cnt}; - } - - const Key* __restrict build_keys; - std::vector<uint8_t> visited; - - uint32_t bucket_size = 1; - int max_batch_size = 4064; - - std::vector<uint32_t> first = {0}; - std::vector<uint32_t> next = {0}; - - // use in iter hash map - mutable uint32_t iter_idx = 1; - Cell cell; - doris::vectorized::Arena* pool; - bool _has_null_key = false; - bool _empty_build_side = true; -}; - template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>; template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>> -using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>; +using NormalHashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>; + +template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>> +using JoinHashMap = JoinHashTable<Key, Hash>; template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> @@ -577,3 +215,5 @@ using HashMapWithStackMemory = HashMapTable< HashTableGrower<initial_size_degree>, HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) * sizeof(HashMapCellWithSavedHash<Key, Mapped, Hash>)>>; + +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/common/hash_table/hash_map_context.h b/be/src/vec/common/hash_table/hash_map_context.h index d96aa2d7c65..f8861ccfcd7 100644 --- a/be/src/vec/common/hash_table/hash_map_context.h +++ b/be/src/vec/common/hash_table/hash_map_context.h @@ -31,6 +31,7 @@ #include "vec/common/hash_table/string_hash_map.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" +#include "vec/exec/join/join_op.h" #include "vec/utils/util.hpp" namespace doris::vectorized { @@ -41,15 +42,13 @@ template <typename Base> struct DataWithNullKey; template <typename HashMap> -struct MethodBase { +struct MethodBaseInner { using Key = typename HashMap::key_type; using Mapped = typename HashMap::mapped_type; using Value = typename HashMap::value_type; - using Iterator = typename HashMap::iterator; using HashMapType = HashMap; std::shared_ptr<HashMap> hash_table; - Iterator iterator; bool inited_iterator = false; Key* keys = nullptr; Arena arena; @@ -58,21 +57,14 @@ struct MethodBase { // use in join case std::vector<uint32_t> bucket_nums; - MethodBase() { hash_table.reset(new HashMap()); } - virtual ~MethodBase() = default; + MethodBaseInner() { hash_table.reset(new HashMap()); } + virtual ~MethodBaseInner() = default; virtual void reset() { arena.clear(); inited_iterator = false; } - void init_iterator() { - if (!inited_iterator) { - inited_iterator = true; - iterator = hash_table->begin(); - } - } - virtual void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t num_rows, const uint8_t* null_map = nullptr, bool is_join = false, bool is_build = false, uint32_t bucket_size = 0) = 0; @@ -170,6 +162,29 @@ struct MethodBase { size_t num_rows) = 0; }; +template <typename T> +concept IteratoredMap = requires(T* map) { typename T::iterator; }; + +template <typename HashMap> +struct MethodBase : public MethodBaseInner<HashMap> { + using Iterator = void*; + Iterator iterator; + void init_iterator() { MethodBaseInner<HashMap>::inited_iterator = true; } +}; + +template <IteratoredMap HashMap> +struct MethodBase<HashMap> : public MethodBaseInner<HashMap> { + using Iterator = typename HashMap::iterator; + using Base = MethodBaseInner<HashMap>; + Iterator iterator; + void init_iterator() { + if (!Base::inited_iterator) { + Base::inited_iterator = true; + iterator = Base::hash_table->begin(); + } + } +}; + template <typename TData> struct MethodSerialized : public MethodBase<TData> { using Base = MethodBase<TData>; @@ -555,14 +570,23 @@ struct MethodSingleNullableColumn : public SingleColumnMethod { }; template <typename RowRefListType> -using SerializedHashTableContext = MethodSerialized<JoinFixedHashMap<StringRef, RowRefListType>>; +using SerializedHashTableContext = MethodSerialized<JoinHashMap<StringRef, RowRefListType>>; template <class T, typename RowRefListType> using PrimaryTypeHashTableContext = - MethodOneNumber<T, JoinFixedHashMap<T, RowRefListType, HashCRC32<T>>>; + MethodOneNumber<T, JoinHashMap<T, RowRefListType, HashCRC32<T>>>; template <class Key, bool has_null, typename Value> -using FixedKeyHashTableContext = - MethodKeysFixed<JoinFixedHashMap<Key, Value, HashCRC32<Key>>, has_null>; +using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, Value, HashCRC32<Key>>, has_null>; + +template <class Key, bool has_null> +using SetFixedKeyHashTableContext = + MethodKeysFixed<HashMap<Key, RowRefListWithFlags, HashCRC32<Key>>, has_null>; + +template <class T> +using SetPrimaryTypeHashTableContext = + MethodOneNumber<T, HashMap<T, RowRefListWithFlags, HashCRC32<T>>>; + +using SetSerializedHashTableContext = MethodSerialized<HashMap<StringRef, RowRefListWithFlags>>; } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/join_hash_table.h similarity index 53% copy from be/src/vec/common/hash_table/hash_map.h copy to be/src/vec/common/hash_table/join_hash_table.h index 382f46acb74..b190d3d89ce 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/join_hash_table.h @@ -14,196 +14,24 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashMap.h -// and modified by Doris #pragma once #include <gen_cpp/PlanNodes_types.h> -#include "common/compiler_util.h" #include "vec/columns/column_filter_helper.h" #include "vec/common/hash_table/hash.h" #include "vec/common/hash_table/hash_table.h" #include "vec/common/hash_table/hash_table_allocator.h" -/** NOTE HashMap could only be used for memmoveable (position independent) types. - * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. - * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. - */ - -struct NoInitTag {}; - -/// A pair that does not initialize the elements, if not needed. -template <typename First, typename Second> -struct PairNoInit { - First first; - Second second; - - PairNoInit() {} - - template <typename First_> - PairNoInit(First_&& first_, NoInitTag) : first(std::forward<First_>(first_)) {} - - template <typename First_, typename Second_> - PairNoInit(First_&& first_, Second_&& second_) - : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) {} -}; - -template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> -struct HashMapCell { - using Mapped = TMapped; - using State = TState; - - using value_type = PairNoInit<Key, Mapped>; - using mapped_type = Mapped; - using key_type = Key; - - value_type value; - - HashMapCell() = default; - HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {} - HashMapCell(const Key& key_, const Mapped& mapped_) : value(key_, mapped_) {} - HashMapCell(const value_type& value_, const State&) : value(value_) {} - - const Key& get_first() const { return value.first; } - Mapped& get_second() { return value.second; } - const Mapped& get_second() const { return value.second; } - - const value_type& get_value() const { return value; } - - static const Key& get_key(const value_type& value) { return value.first; } - Mapped& get_mapped() { return value.second; } - const Mapped& get_mapped() const { return value.second; } - - bool key_equals(const Key& key_) const { return value.first == key_; } - bool key_equals(const Key& key_, size_t /*hash_*/) const { return value.first == key_; } - bool key_equals(const Key& key_, size_t /*hash_*/, const State& /*state*/) const { - return value.first == key_; - } - - void set_hash(size_t /*hash_value*/) {} - size_t get_hash(const Hash& hash) const { return hash(value.first); } - - bool is_zero(const State& state) const { return is_zero(value.first, state); } - static bool is_zero(const Key& key, const State& /*state*/) { return ZeroTraits::check(key); } - - /// Set the key value to zero. - void set_zero() { ZeroTraits::set(value.first); } - - /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). - static constexpr bool need_zero_value_storage = true; - - void set_mapped(const value_type& value_) { value.second = value_.second; } -}; - -template <typename Key, typename Mapped, typename Hash, typename State> -ALWAYS_INLINE inline auto lookup_result_get_key(HashMapCell<Key, Mapped, Hash, State>* cell) { - return &cell->get_first(); -} - -template <typename Key, typename Mapped, typename Hash, typename State> -ALWAYS_INLINE inline auto lookup_result_get_mapped(HashMapCell<Key, Mapped, Hash, State>* cell) { - return &cell->get_second(); -} - -template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> -struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> { - using Base = HashMapCell<Key, TMapped, Hash, TState>; - - size_t saved_hash; - - using Base::Base; - - bool key_equals(const Key& key_) const { return this->value.first == key_; } - bool key_equals(const Key& key_, size_t hash_) const { - return saved_hash == hash_ && this->value.first == key_; - } - bool key_equals(const Key& key_, size_t hash_, const typename Base::State&) const { - return key_equals(key_, hash_); - } - - void set_hash(size_t hash_value) { saved_hash = hash_value; } - size_t get_hash(const Hash& /*hash_function*/) const { return saved_hash; } -}; - -template <typename Key, typename Mapped, typename Hash, typename State> -ALWAYS_INLINE inline auto lookup_result_get_key( - HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) { - return &cell->get_first(); -} - -template <typename Key, typename Mapped, typename Hash, typename State> -ALWAYS_INLINE inline auto lookup_result_get_mapped( - HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) { - return &cell->get_second(); -} - -template <typename Key, typename Cell, typename Hash = DefaultHash<Key>, - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> { -public: - using Self = HashMapTable; - using Base = HashTable<Key, Cell, Hash, Grower, Allocator>; - - using key_type = Key; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; - - using LookupResult = typename Base::LookupResult; - - using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable; - - /// Call func(Mapped &) for each hash map element. - template <typename Func> - void for_each_mapped(Func&& func) { - for (auto& v : *this) func(v.get_second()); - } - - mapped_type& ALWAYS_INLINE operator[](Key x) { - typename HashMapTable::LookupResult it; - bool inserted; - this->emplace(x, it, inserted); - - /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), - * since the hash table memory is initially initialized with zeros. - * But, in fact, an empty cell may not be initialized with zeros in the following cases: - * - ZeroValueStorage (it only zeros the key); - * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. - * - * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately - * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. - * - * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. - * When we do the initialization, for new cells, it's enough to make `store 1` right away. - * And if we did not initialize, then even though there was zero in the cell, - * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. - */ - if (inserted) new (lookup_result_get_mapped(it)) mapped_type(); - - return *lookup_result_get_mapped(it); - } - - template <typename MappedType> - char* get_null_key_data() { - return nullptr; - } - bool has_null_key_data() const { return false; } -}; - -template <typename Key, typename Cell, typename Hash = DefaultHash<Key>, - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator> { +namespace doris { +template <typename Key, typename Hash = DefaultHash<Key>> +class JoinHashTable { public: - using Self = JoinHashMapTable; - using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>; - using key_type = Key; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; - - using LookupResult = typename Base::LookupResult; + using mapped_type = void*; + using value_type = void*; + size_t hash(const Key& x) const { return Hash()(x); } static uint32_t calc_bucket_size(size_t num_elem) { size_t expect_bucket_size = num_elem + (num_elem - 1) / 7; @@ -226,17 +54,17 @@ public: first.resize(bucket_size + 1); next.resize(num_elem); - if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType == TJoinOp::FULL_OUTER_JOIN || + JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || + JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { visited.resize(num_elem); } } uint32_t get_bucket_size() const { return bucket_size; } - size_t size() const { return Base::size() == 0 ? next.size() : Base::size(); } + size_t size() const { return next.size(); } std::vector<uint8_t>& get_visited() { return visited; } @@ -255,9 +83,8 @@ public: auto find_batch(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, uint32_t build_idx, int probe_rows, uint32_t* __restrict probe_idxs, bool& probe_visited, - uint32_t* __restrict build_idxs, - doris::vectorized::ColumnFilterHelper* mark_column) { - if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { + uint32_t* __restrict build_idxs, vectorized::ColumnFilterHelper* mark_column) { + if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { if (_empty_build_side) { return _process_null_aware_left_anti_join_for_empty_build_side< JoinOpType, with_other_conjuncts, is_mark_join>( @@ -275,22 +102,21 @@ public: probe_idxs, build_idxs, mark_column); } - if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) { + if constexpr (JoinOpType == TJoinOp::INNER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN || + JoinOpType == TJoinOp::LEFT_OUTER_JOIN || + JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) { return _find_batch_inner_outer_join<JoinOpType>(keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, probe_visited, build_idxs); } - if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { + if constexpr (JoinOpType == TJoinOp::LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::LEFT_SEMI_JOIN || + JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { return _find_batch_left_semi_anti<JoinOpType, need_judge_null>( keys, build_idx_map, probe_idx, probe_rows, probe_idxs); } - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx, probe_rows); } return std::tuple {0, 0U, 0}; @@ -306,7 +132,7 @@ public: while (count < batch_size && iter_idx < elem_num) { const auto matched = visited[iter_idx]; build_idxs[count] = iter_idx; - if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) { count += !matched; } else { count += matched; @@ -336,7 +162,7 @@ private: auto _find_batch_mark(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs, uint32_t* __restrict build_idxs, - doris::vectorized::ColumnFilterHelper* mark_column) { + vectorized::ColumnFilterHelper* mark_column) { auto matched_cnt = 0; const auto batch_size = max_batch_size; @@ -351,8 +177,8 @@ private: // mark result as null when probe row is null mark_column->insert_null(); } else { - bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 - : build_idx == 0; + bool matched = + JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 : build_idx == 0; if (!matched && _has_null_key) { mark_column->insert_null(); } else { @@ -370,8 +196,8 @@ private: template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join> auto _process_null_aware_left_anti_join_for_empty_build_side( int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs, - uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) { - static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN); + uint32_t* __restrict build_idxs, vectorized::ColumnFilterHelper* mark_column) { + static_assert(JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN); auto matched_cnt = 0; const auto batch_size = max_batch_size; @@ -427,8 +253,7 @@ private: while (build_idx && keys[probe_idx] != build_keys[build_idx]) { build_idx = next[build_idx]; } - bool matched = - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 : build_idx == 0; + bool matched = JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 : build_idx == 0; probe_idxs[matched_cnt] = probe_idx++; matched_cnt += matched; } @@ -444,8 +269,8 @@ private: auto do_the_probe = [&]() { while (build_idx && matched_cnt < batch_size) { - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { if (!visited[build_idx] && keys[probe_idx] == build_keys[build_idx]) { probe_idxs[matched_cnt] = probe_idx; build_idxs[matched_cnt] = build_idx; @@ -459,11 +284,11 @@ private: build_idx = next[build_idx]; } - if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || - JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { + if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN || + JoinOpType == TJoinOp::FULL_OUTER_JOIN || + JoinOpType == TJoinOp::LEFT_SEMI_JOIN || + JoinOpType == TJoinOp::LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { // may over batch_size when emplace 0 into build_idxs if (!build_idx) { probe_idxs[matched_cnt] = probe_idx; @@ -503,8 +328,8 @@ private: probe_idxs[matched_cnt] = probe_idx; build_idxs[matched_cnt] = build_idx; matched_cnt++; - if constexpr (JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) { + if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || + JoinOpType == TJoinOp::FULL_OUTER_JOIN) { if (!visited[build_idx]) { visited[build_idx] = 1; } @@ -513,8 +338,8 @@ private: build_idx = next[build_idx]; } - if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || - JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) { + if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN || + JoinOpType == TJoinOp::FULL_OUTER_JOIN) { // `(!matched_cnt || probe_idxs[matched_cnt - 1] != probe_idx)` means not match one build side probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1] == probe_idx); if (!build_idx) { @@ -553,27 +378,8 @@ private: // use in iter hash map mutable uint32_t iter_idx = 1; - Cell cell; - doris::vectorized::Arena* pool; + vectorized::Arena* pool; bool _has_null_key = false; bool _empty_build_side = true; }; - -template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>, - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>; - -template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>> -using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>; - -template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>, - typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> -using HashMapWithSavedHash = - HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>; - -template <typename Key, typename Mapped, typename Hash, size_t initial_size_degree> -using HashMapWithStackMemory = HashMapTable< - Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, - HashTableGrower<initial_size_degree>, - HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) * - sizeof(HashMapCellWithSavedHash<Key, Mapped, Hash>)>>; +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h b/be/src/vec/common/hash_table/partitioned_hash_map.h index f23b0a347de..a2db6fece35 100644 --- a/be/src/vec/common/hash_table/partitioned_hash_map.h +++ b/be/src/vec/common/hash_table/partitioned_hash_map.h @@ -22,7 +22,7 @@ #include "vec/common/hash_table/hash_map.h" #include "vec/common/hash_table/partitioned_hash_table.h" #include "vec/common/hash_table/ph_hash_map.h" - +namespace doris { template <typename ImplTable> class PartitionedHashMapTable : public PartitionedHashTable<ImplTable> { public: @@ -57,3 +57,4 @@ using PartitionedHashMap = template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>> using PHNormalHashMap = PHHashMap<Key, Mapped, Hash, false>; +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/common/hash_table/string_hash_map.h b/be/src/vec/common/hash_table/string_hash_map.h index f1efd0fab12..61d304cf7d8 100644 --- a/be/src/vec/common/hash_table/string_hash_map.h +++ b/be/src/vec/common/hash_table/string_hash_map.h @@ -23,6 +23,7 @@ #include "vec/common/hash_table/hash_map.h" #include "vec/common/hash_table/string_hash_table.h" +namespace doris { template <typename Key, typename TMapped> struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState> { using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>; @@ -152,3 +153,4 @@ public: } bool has_null_key_data() const { return false; } }; +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 39e05936397..e6c00d94a2f 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -1049,7 +1049,7 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) { return; } - if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32, RowRefListType>( + if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32, RowRefListType>( *_hash_table_variants, _build_expr_ctxs)) { _hash_table_variants->emplace<SerializedHashTableContext<RowRefListType>>(); } diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index 3c47638ef42..75317b4c933 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -183,16 +183,16 @@ void VSetOperationNode<is_intersect>::hash_table_init() { switch (_child_expr_lists[0][0]->root()->result_type()) { case TYPE_BOOLEAN: case TYPE_TINYINT: - _hash_table_variants->emplace<I8HashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>(); break; case TYPE_SMALLINT: - _hash_table_variants->emplace<I16HashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>(); break; case TYPE_INT: case TYPE_FLOAT: case TYPE_DATEV2: case TYPE_DECIMAL32: - _hash_table_variants->emplace<I32HashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>(); break; case TYPE_BIGINT: case TYPE_DOUBLE: @@ -200,21 +200,21 @@ void VSetOperationNode<is_intersect>::hash_table_init() { case TYPE_DATE: case TYPE_DECIMAL64: case TYPE_DATETIMEV2: - _hash_table_variants->emplace<I64HashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>(); break; case TYPE_LARGEINT: case TYPE_DECIMALV2: case TYPE_DECIMAL128I: - _hash_table_variants->emplace<I128HashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>(); break; default: - _hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetSerializedHashTableContext>(); } return; } - if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32, RowRefListWithFlags>( + if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, RowRefListWithFlags>( *_hash_table_variants, _child_expr_lists[0])) { - _hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>(); + _hash_table_variants->emplace<SetSerializedHashTableContext>(); } } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index b1ab9c47650..ce5a8eb1dbc 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -31,6 +31,7 @@ #include "vec/columns/column.h" #include "vec/common/arena.h" #include "vec/core/block.h" +#include "vec/core/types.h" #include "vec/exec/join/process_hash_table_probe.h" #include "vec/exec/join/vhash_join_node.h" @@ -45,18 +46,14 @@ class VExprContext; struct RowRefListWithFlags; using SetHashTableVariants = std::variant< - std::monostate, SerializedHashTableContext<RowRefListWithFlags>, - I8HashTableContext<RowRefListWithFlags>, I16HashTableContext<RowRefListWithFlags>, - I32HashTableContext<RowRefListWithFlags>, I64HashTableContext<RowRefListWithFlags>, - I128HashTableContext<RowRefListWithFlags>, I256HashTableContext<RowRefListWithFlags>, - I64FixedKeyHashTableContext<true, RowRefListWithFlags>, - I64FixedKeyHashTableContext<false, RowRefListWithFlags>, - I128FixedKeyHashTableContext<true, RowRefListWithFlags>, - I128FixedKeyHashTableContext<false, RowRefListWithFlags>, - I256FixedKeyHashTableContext<true, RowRefListWithFlags>, - I256FixedKeyHashTableContext<false, RowRefListWithFlags>, - I136FixedKeyHashTableContext<true, RowRefListWithFlags>, - I136FixedKeyHashTableContext<false, RowRefListWithFlags>>; + std::monostate, MethodSerialized<HashMap<StringRef, RowRefListWithFlags>>, + SetPrimaryTypeHashTableContext<UInt8>, SetPrimaryTypeHashTableContext<UInt16>, + SetPrimaryTypeHashTableContext<UInt32>, SetPrimaryTypeHashTableContext<UInt64>, + SetPrimaryTypeHashTableContext<UInt128>, SetPrimaryTypeHashTableContext<UInt256>, + SetFixedKeyHashTableContext<UInt64, true>, SetFixedKeyHashTableContext<UInt64, false>, + SetFixedKeyHashTableContext<UInt128, true>, SetFixedKeyHashTableContext<UInt128, false>, + SetFixedKeyHashTableContext<UInt256, true>, SetFixedKeyHashTableContext<UInt256, false>, + SetFixedKeyHashTableContext<UInt136, true>, SetFixedKeyHashTableContext<UInt136, false>>; template <bool is_intersect> class VSetOperationNode final : public ExecNode { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
