This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1d7d7ee9b686424d22c06b7507feca8b21c8d806
Author: Pxl <[email protected]>
AuthorDate: Thu Jan 25 10:08:25 2024 +0800

    [Chore](join) split out join hash map from hash map (#30280)
    
    split out join hash map from hash map
---
 be/src/pipeline/exec/hashjoin_build_sink.cpp       |   2 +-
 be/src/pipeline/pipeline_x/dependency.h            |  27 +-
 be/src/vec/common/hash_table/hash_map.h            | 376 +--------------------
 be/src/vec/common/hash_table/hash_map_context.h    |  56 ++-
 .../hash_table/{hash_map.h => join_hash_table.h}   | 276 +++------------
 .../vec/common/hash_table/partitioned_hash_map.h   |   3 +-
 be/src/vec/common/hash_table/string_hash_map.h     |   2 +
 be/src/vec/exec/join/vhash_join_node.cpp           |   2 +-
 be/src/vec/exec/vset_operation_node.cpp            |  16 +-
 be/src/vec/exec/vset_operation_node.h              |  21 +-
 10 files changed, 122 insertions(+), 659 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp 
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index f02e203c783..2711b0d8852 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -364,7 +364,7 @@ void 
HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) {
                     }
                     return;
                 }
-                if (!try_get_hash_map_context_fixed<JoinFixedHashMap, 
HashCRC32, RowRefListType>(
+                if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32, 
RowRefListType>(
                             *_shared_state->hash_table_variants, 
_build_expr_ctxs)) {
                     _shared_state->hash_table_variants
                             
->emplace<vectorized::SerializedHashTableContext<RowRefListType>>();
diff --git a/be/src/pipeline/pipeline_x/dependency.h 
b/be/src/pipeline/pipeline_x/dependency.h
index 59f9fee3775..8a58973be35 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/pipeline_x/dependency.h
@@ -35,6 +35,7 @@
 #include "vec/common/hash_table/hash_map_context_creator.h"
 #include "vec/common/sort/partition_sorter.h"
 #include "vec/common/sort/sorter.h"
+#include "vec/core/types.h"
 #include "vec/exec/join/process_hash_table_probe.h"
 #include "vec/exec/join/vhash_join_node.h"
 #include "vec/exec/vaggregation_node.h"
@@ -524,24 +525,22 @@ public:
 
     /// called in setup_local_state
     void hash_table_init() {
+        using namespace vectorized;
         if (child_exprs_lists[0].size() == 1 && (!build_not_ignore_null[0])) {
             // Single column optimization
             switch (child_exprs_lists[0][0]->root()->result_type()) {
             case TYPE_BOOLEAN:
             case TYPE_TINYINT:
-                hash_table_variants->emplace<
-                        
vectorized::I8HashTableContext<vectorized::RowRefListWithFlags>>();
+                
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>();
                 break;
             case TYPE_SMALLINT:
-                hash_table_variants->emplace<
-                        
vectorized::I16HashTableContext<vectorized::RowRefListWithFlags>>();
+                
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>();
                 break;
             case TYPE_INT:
             case TYPE_FLOAT:
             case TYPE_DATEV2:
             case TYPE_DECIMAL32:
-                hash_table_variants->emplace<
-                        
vectorized::I32HashTableContext<vectorized::RowRefListWithFlags>>();
+                
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>();
                 break;
             case TYPE_BIGINT:
             case TYPE_DOUBLE:
@@ -549,27 +548,21 @@ public:
             case TYPE_DATE:
             case TYPE_DECIMAL64:
             case TYPE_DATETIMEV2:
-                hash_table_variants->emplace<
-                        
vectorized::I64HashTableContext<vectorized::RowRefListWithFlags>>();
+                
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>();
                 break;
             case TYPE_LARGEINT:
             case TYPE_DECIMALV2:
             case TYPE_DECIMAL128I:
-                hash_table_variants->emplace<
-                        
vectorized::I128HashTableContext<vectorized::RowRefListWithFlags>>();
+                
hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>();
                 break;
             default:
-                hash_table_variants->emplace<
-                        
vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>();
+                hash_table_variants->emplace<SetSerializedHashTableContext>();
             }
             return;
         }
-
-        if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32,
-                                            vectorized::RowRefListWithFlags>(
+        if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, 
RowRefListWithFlags>(
                     *hash_table_variants, child_exprs_lists[0])) {
-            hash_table_variants->emplace<
-                    
vectorized::SerializedHashTableContext<vectorized::RowRefListWithFlags>>();
+            hash_table_variants->emplace<SetSerializedHashTableContext>();
         }
     }
 };
diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index 382f46acb74..d10b24ade21 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -27,7 +27,9 @@
 #include "vec/common/hash_table/hash.h"
 #include "vec/common/hash_table/hash_table.h"
 #include "vec/common/hash_table/hash_table_allocator.h"
+#include "vec/common/hash_table/join_hash_table.h"
 
+namespace doris {
 /** NOTE HashMap could only be used for memmoveable (position independent) 
types.
   * Example: std::string is not position independent in libstdc++ with C++11 
ABI or in libc++.
   * Also, key in hash table must be of type, that zero bytes is compared 
equals to zero key.
@@ -192,379 +194,15 @@ public:
     bool has_null_key_data() const { return false; }
 };
 
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
-          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
-class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
-public:
-    using Self = JoinHashMapTable;
-    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
-
-    using key_type = Key;
-    using value_type = typename Cell::value_type;
-    using mapped_type = typename Cell::Mapped;
-
-    using LookupResult = typename Base::LookupResult;
-
-    static uint32_t calc_bucket_size(size_t num_elem) {
-        size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
-        return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
-    }
-
-    size_t get_byte_size() const {
-        auto cal_vector_mem = [](const auto& vec) { return vec.capacity() * 
sizeof(vec[0]); };
-        return cal_vector_mem(visited) + cal_vector_mem(first) + 
cal_vector_mem(next);
-    }
-
-    template <int JoinOpType>
-    void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
-        _has_null_key = has_null_key;
-
-        // the first row in build side is not really from build side table
-        _empty_build_side = num_elem <= 1;
-        max_batch_size = batch_size;
-        bucket_size = calc_bucket_size(num_elem + 1);
-        first.resize(bucket_size + 1);
-        next.resize(num_elem);
-
-        if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
-            visited.resize(num_elem);
-        }
-    }
-
-    uint32_t get_bucket_size() const { return bucket_size; }
-
-    size_t size() const { return Base::size() == 0 ? next.size() : 
Base::size(); }
-
-    std::vector<uint8_t>& get_visited() { return visited; }
-
-    void build(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
-               size_t num_elem) {
-        build_keys = keys;
-        for (size_t i = 1; i < num_elem; i++) {
-            uint32_t bucket_num = bucket_nums[i];
-            next[i] = first[bucket_num];
-            first[bucket_num] = i;
-        }
-        first[bucket_size] = 0; // index = bucket_num means null
-    }
-
-    template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join, 
bool need_judge_null>
-    auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
build_idx_map,
-                    int probe_idx, uint32_t build_idx, int probe_rows,
-                    uint32_t* __restrict probe_idxs, bool& probe_visited,
-                    uint32_t* __restrict build_idxs,
-                    doris::vectorized::ColumnFilterHelper* mark_column) {
-        if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) 
{
-            if (_empty_build_side) {
-                return _process_null_aware_left_anti_join_for_empty_build_side<
-                        JoinOpType, with_other_conjuncts, is_mark_join>(
-                        probe_idx, probe_rows, probe_idxs, build_idxs, 
mark_column);
-            }
-        }
-
-        if constexpr (with_other_conjuncts) {
-            return _find_batch_conjunct<JoinOpType>(keys, build_idx_map, 
probe_idx, build_idx,
-                                                    probe_rows, probe_idxs, 
build_idxs);
-        }
-
-        if constexpr (is_mark_join) {
-            return _find_batch_mark<JoinOpType>(keys, build_idx_map, 
probe_idx, probe_rows,
-                                                probe_idxs, build_idxs, 
mark_column);
-        }
-
-        if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
-                      JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
-            return _find_batch_inner_outer_join<JoinOpType>(keys, 
build_idx_map, probe_idx,
-                                                            build_idx, 
probe_rows, probe_idxs,
-                                                            probe_visited, 
build_idxs);
-        }
-        if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
-                      JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) 
{
-            return _find_batch_left_semi_anti<JoinOpType, need_judge_null>(
-                    keys, build_idx_map, probe_idx, probe_rows, probe_idxs);
-        }
-        if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
-            return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx, 
probe_rows);
-        }
-        return std::tuple {0, 0U, 0};
-    }
-
-    template <int JoinOpType>
-    bool iterate_map(std::vector<uint32_t>& build_idxs) const {
-        const auto batch_size = max_batch_size;
-        const auto elem_num = visited.size();
-        int count = 0;
-        build_idxs.resize(batch_size);
-
-        while (count < batch_size && iter_idx < elem_num) {
-            const auto matched = visited[iter_idx];
-            build_idxs[count] = iter_idx;
-            if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) {
-                count += !matched;
-            } else {
-                count += matched;
-            }
-            iter_idx++;
-        }
-
-        build_idxs.resize(count);
-        return iter_idx >= elem_num;
-    }
-
-    bool has_null_key() { return _has_null_key; }
-
-    void pre_build_idxs(std::vector<uint32>& bucksets, const uint8_t* 
null_map) {
-        if (null_map) {
-            first[bucket_size] = bucket_size; // distinguish between not 
matched and null
-        }
-
-        for (uint32_t i = 0; i < bucksets.size(); i++) {
-            bucksets[i] = first[bucksets[i]];
-        }
-    }
-
-private:
-    // only LEFT_ANTI_JOIN/LEFT_SEMI_JOIN/NULL_AWARE_LEFT_ANTI_JOIN/CROSS_JOIN 
support mark join
-    template <int JoinOpType>
-    auto _find_batch_mark(const Key* __restrict keys, const uint32_t* 
__restrict build_idx_map,
-                          int probe_idx, int probe_rows, uint32_t* __restrict 
probe_idxs,
-                          uint32_t* __restrict build_idxs,
-                          doris::vectorized::ColumnFilterHelper* mark_column) {
-        auto matched_cnt = 0;
-        const auto batch_size = max_batch_size;
-
-        while (probe_idx < probe_rows && matched_cnt < batch_size) {
-            auto build_idx = build_idx_map[probe_idx] == bucket_size ? 0 : 
build_idx_map[probe_idx];
-
-            while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
-                build_idx = next[build_idx];
-            }
-
-            if (build_idx_map[probe_idx] == bucket_size) {
-                // mark result as null when probe row is null
-                mark_column->insert_null();
-            } else {
-                bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? 
build_idx != 0
-                                                                            : 
build_idx == 0;
-                if (!matched && _has_null_key) {
-                    mark_column->insert_null();
-                } else {
-                    mark_column->insert_value(matched);
-                }
-            }
-
-            probe_idxs[matched_cnt] = probe_idx++;
-            build_idxs[matched_cnt] = build_idx;
-            matched_cnt++;
-        }
-        return std::tuple {probe_idx, 0U, matched_cnt};
-    }
-
-    template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
-    auto _process_null_aware_left_anti_join_for_empty_build_side(
-            int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
-            uint32_t* __restrict build_idxs, 
doris::vectorized::ColumnFilterHelper* mark_column) {
-        static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
-        auto matched_cnt = 0;
-        const auto batch_size = max_batch_size;
-
-        while (probe_idx < probe_rows && matched_cnt < batch_size) {
-            probe_idxs[matched_cnt] = probe_idx++;
-            if constexpr (is_mark_join) {
-                build_idxs[matched_cnt] = 0;
-            }
-            ++matched_cnt;
-        }
-
-        if constexpr (is_mark_join && !with_other_conjuncts) {
-            mark_column->resize_fill(matched_cnt, 1);
-        }
-
-        return std::tuple {probe_idx, 0U, matched_cnt};
-    }
-
-    auto _find_batch_right_semi_anti(const Key* __restrict keys,
-                                     const uint32_t* __restrict build_idx_map, 
int probe_idx,
-                                     int probe_rows) {
-        while (probe_idx < probe_rows) {
-            auto build_idx = build_idx_map[probe_idx];
-
-            while (build_idx) {
-                if (!visited[build_idx] && keys[probe_idx] == 
build_keys[build_idx]) {
-                    visited[build_idx] = 1;
-                }
-                build_idx = next[build_idx];
-            }
-            probe_idx++;
-        }
-        return std::tuple {probe_idx, 0U, 0};
-    }
-
-    template <int JoinOpType, bool need_judge_null>
-    auto _find_batch_left_semi_anti(const Key* __restrict keys,
-                                    const uint32_t* __restrict build_idx_map, 
int probe_idx,
-                                    int probe_rows, uint32_t* __restrict 
probe_idxs) {
-        auto matched_cnt = 0;
-        const auto batch_size = max_batch_size;
-
-        while (probe_idx < probe_rows && matched_cnt < batch_size) {
-            if constexpr (need_judge_null) {
-                if (build_idx_map[probe_idx] == bucket_size) {
-                    probe_idx++;
-                    continue;
-                }
-            }
-
-            auto build_idx = build_idx_map[probe_idx];
-
-            while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
-                build_idx = next[build_idx];
-            }
-            bool matched =
-                    JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx 
!= 0 : build_idx == 0;
-            probe_idxs[matched_cnt] = probe_idx++;
-            matched_cnt += matched;
-        }
-        return std::tuple {probe_idx, 0U, matched_cnt};
-    }
-
-    template <int JoinOpType>
-    auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t* 
__restrict build_idx_map,
-                              int probe_idx, uint32_t build_idx, int 
probe_rows,
-                              uint32_t* __restrict probe_idxs, uint32_t* 
__restrict build_idxs) {
-        auto matched_cnt = 0;
-        const auto batch_size = max_batch_size;
-
-        auto do_the_probe = [&]() {
-            while (build_idx && matched_cnt < batch_size) {
-                if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                              JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
-                    if (!visited[build_idx] && keys[probe_idx] == 
build_keys[build_idx]) {
-                        probe_idxs[matched_cnt] = probe_idx;
-                        build_idxs[matched_cnt] = build_idx;
-                        matched_cnt++;
-                    }
-                } else if (keys[probe_idx] == build_keys[build_idx]) {
-                    build_idxs[matched_cnt] = build_idx;
-                    probe_idxs[matched_cnt] = probe_idx;
-                    matched_cnt++;
-                }
-                build_idx = next[build_idx];
-            }
-
-            if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
-                          JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
-                          JoinOpType == 
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
-                // may over batch_size when emplace 0 into build_idxs
-                if (!build_idx) {
-                    probe_idxs[matched_cnt] = probe_idx;
-                    build_idxs[matched_cnt] = 0;
-                    matched_cnt++;
-                }
-            }
-
-            probe_idx++;
-        };
-
-        if (build_idx) {
-            do_the_probe();
-        }
-
-        while (probe_idx < probe_rows && matched_cnt < batch_size) {
-            build_idx = build_idx_map[probe_idx];
-            do_the_probe();
-        }
-
-        probe_idx -= (build_idx != 0);
-        return std::tuple {probe_idx, build_idx, matched_cnt};
-    }
-
-    template <int JoinOpType>
-    auto _find_batch_inner_outer_join(const Key* __restrict keys,
-                                      const uint32_t* __restrict 
build_idx_map, int probe_idx,
-                                      uint32_t build_idx, int probe_rows,
-                                      uint32_t* __restrict probe_idxs, bool& 
probe_visited,
-                                      uint32_t* __restrict build_idxs) {
-        auto matched_cnt = 0;
-        const auto batch_size = max_batch_size;
-
-        auto do_the_probe = [&]() {
-            while (build_idx && matched_cnt < batch_size) {
-                if (keys[probe_idx] == build_keys[build_idx]) {
-                    probe_idxs[matched_cnt] = probe_idx;
-                    build_idxs[matched_cnt] = build_idx;
-                    matched_cnt++;
-                    if constexpr (JoinOpType == 
doris::TJoinOp::RIGHT_OUTER_JOIN ||
-                                  JoinOpType == 
doris::TJoinOp::FULL_OUTER_JOIN) {
-                        if (!visited[build_idx]) {
-                            visited[build_idx] = 1;
-                        }
-                    }
-                }
-                build_idx = next[build_idx];
-            }
-
-            if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
-                // `(!matched_cnt || probe_idxs[matched_cnt - 1] != 
probe_idx)` means not match one build side
-                probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1] 
== probe_idx);
-                if (!build_idx) {
-                    if (!probe_visited) {
-                        probe_idxs[matched_cnt] = probe_idx;
-                        build_idxs[matched_cnt] = 0;
-                        matched_cnt++;
-                    }
-                    probe_visited = false;
-                }
-            }
-            probe_idx++;
-        };
-
-        if (build_idx) {
-            do_the_probe();
-        }
-
-        while (probe_idx < probe_rows && matched_cnt < batch_size) {
-            build_idx = build_idx_map[probe_idx];
-            do_the_probe();
-        }
-
-        probe_idx -= (build_idx != 0);
-        return std::tuple {probe_idx, build_idx, matched_cnt};
-    }
-
-    const Key* __restrict build_keys;
-    std::vector<uint8_t> visited;
-
-    uint32_t bucket_size = 1;
-    int max_batch_size = 4064;
-
-    std::vector<uint32_t> first = {0};
-    std::vector<uint32_t> next = {0};
-
-    // use in iter hash map
-    mutable uint32_t iter_idx = 1;
-    Cell cell;
-    doris::vectorized::Arena* pool;
-    bool _has_null_key = false;
-    bool _empty_build_side = true;
-};
-
 template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
           typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
 using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, 
Grower, Allocator>;
 
 template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, 
Hash>;
+using NormalHashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash>;
+
+template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
+using JoinHashMap = JoinHashTable<Key, Hash>;
 
 template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
           typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
@@ -577,3 +215,5 @@ using HashMapWithStackMemory = HashMapTable<
         HashTableGrower<initial_size_degree>,
         HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
                                           sizeof(HashMapCellWithSavedHash<Key, 
Mapped, Hash>)>>;
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_map_context.h 
b/be/src/vec/common/hash_table/hash_map_context.h
index d96aa2d7c65..f8861ccfcd7 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -31,6 +31,7 @@
 #include "vec/common/hash_table/string_hash_map.h"
 #include "vec/common/string_ref.h"
 #include "vec/core/types.h"
+#include "vec/exec/join/join_op.h"
 #include "vec/utils/util.hpp"
 
 namespace doris::vectorized {
@@ -41,15 +42,13 @@ template <typename Base>
 struct DataWithNullKey;
 
 template <typename HashMap>
-struct MethodBase {
+struct MethodBaseInner {
     using Key = typename HashMap::key_type;
     using Mapped = typename HashMap::mapped_type;
     using Value = typename HashMap::value_type;
-    using Iterator = typename HashMap::iterator;
     using HashMapType = HashMap;
 
     std::shared_ptr<HashMap> hash_table;
-    Iterator iterator;
     bool inited_iterator = false;
     Key* keys = nullptr;
     Arena arena;
@@ -58,21 +57,14 @@ struct MethodBase {
     // use in join case
     std::vector<uint32_t> bucket_nums;
 
-    MethodBase() { hash_table.reset(new HashMap()); }
-    virtual ~MethodBase() = default;
+    MethodBaseInner() { hash_table.reset(new HashMap()); }
+    virtual ~MethodBaseInner() = default;
 
     virtual void reset() {
         arena.clear();
         inited_iterator = false;
     }
 
-    void init_iterator() {
-        if (!inited_iterator) {
-            inited_iterator = true;
-            iterator = hash_table->begin();
-        }
-    }
-
     virtual void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t 
num_rows,
                                       const uint8_t* null_map = nullptr, bool 
is_join = false,
                                       bool is_build = false, uint32_t 
bucket_size = 0) = 0;
@@ -170,6 +162,29 @@ struct MethodBase {
                                           size_t num_rows) = 0;
 };
 
+template <typename T>
+concept IteratoredMap = requires(T* map) { typename T::iterator; };
+
+template <typename HashMap>
+struct MethodBase : public MethodBaseInner<HashMap> {
+    using Iterator = void*;
+    Iterator iterator;
+    void init_iterator() { MethodBaseInner<HashMap>::inited_iterator = true; }
+};
+
+template <IteratoredMap HashMap>
+struct MethodBase<HashMap> : public MethodBaseInner<HashMap> {
+    using Iterator = typename HashMap::iterator;
+    using Base = MethodBaseInner<HashMap>;
+    Iterator iterator;
+    void init_iterator() {
+        if (!Base::inited_iterator) {
+            Base::inited_iterator = true;
+            iterator = Base::hash_table->begin();
+        }
+    }
+};
+
 template <typename TData>
 struct MethodSerialized : public MethodBase<TData> {
     using Base = MethodBase<TData>;
@@ -555,14 +570,23 @@ struct MethodSingleNullableColumn : public 
SingleColumnMethod {
 };
 
 template <typename RowRefListType>
-using SerializedHashTableContext = 
MethodSerialized<JoinFixedHashMap<StringRef, RowRefListType>>;
+using SerializedHashTableContext = MethodSerialized<JoinHashMap<StringRef, 
RowRefListType>>;
 
 template <class T, typename RowRefListType>
 using PrimaryTypeHashTableContext =
-        MethodOneNumber<T, JoinFixedHashMap<T, RowRefListType, HashCRC32<T>>>;
+        MethodOneNumber<T, JoinHashMap<T, RowRefListType, HashCRC32<T>>>;
 
 template <class Key, bool has_null, typename Value>
-using FixedKeyHashTableContext =
-        MethodKeysFixed<JoinFixedHashMap<Key, Value, HashCRC32<Key>>, 
has_null>;
+using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, Value, 
HashCRC32<Key>>, has_null>;
+
+template <class Key, bool has_null>
+using SetFixedKeyHashTableContext =
+        MethodKeysFixed<HashMap<Key, RowRefListWithFlags, HashCRC32<Key>>, 
has_null>;
+
+template <class T>
+using SetPrimaryTypeHashTableContext =
+        MethodOneNumber<T, HashMap<T, RowRefListWithFlags, HashCRC32<T>>>;
+
+using SetSerializedHashTableContext = MethodSerialized<HashMap<StringRef, 
RowRefListWithFlags>>;
 
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/join_hash_table.h
similarity index 53%
copy from be/src/vec/common/hash_table/hash_map.h
copy to be/src/vec/common/hash_table/join_hash_table.h
index 382f46acb74..b190d3d89ce 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -14,196 +14,24 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashMap.h
-// and modified by Doris
 
 #pragma once
 
 #include <gen_cpp/PlanNodes_types.h>
 
-#include "common/compiler_util.h"
 #include "vec/columns/column_filter_helper.h"
 #include "vec/common/hash_table/hash.h"
 #include "vec/common/hash_table/hash_table.h"
 #include "vec/common/hash_table/hash_table_allocator.h"
 
-/** NOTE HashMap could only be used for memmoveable (position independent) 
types.
-  * Example: std::string is not position independent in libstdc++ with C++11 
ABI or in libc++.
-  * Also, key in hash table must be of type, that zero bytes is compared 
equals to zero key.
-  */
-
-struct NoInitTag {};
-
-/// A pair that does not initialize the elements, if not needed.
-template <typename First, typename Second>
-struct PairNoInit {
-    First first;
-    Second second;
-
-    PairNoInit() {}
-
-    template <typename First_>
-    PairNoInit(First_&& first_, NoInitTag) : 
first(std::forward<First_>(first_)) {}
-
-    template <typename First_, typename Second_>
-    PairNoInit(First_&& first_, Second_&& second_)
-            : first(std::forward<First_>(first_)), 
second(std::forward<Second_>(second_)) {}
-};
-
-template <typename Key, typename TMapped, typename Hash, typename TState = 
HashTableNoState>
-struct HashMapCell {
-    using Mapped = TMapped;
-    using State = TState;
-
-    using value_type = PairNoInit<Key, Mapped>;
-    using mapped_type = Mapped;
-    using key_type = Key;
-
-    value_type value;
-
-    HashMapCell() = default;
-    HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {}
-    HashMapCell(const Key& key_, const Mapped& mapped_) : value(key_, mapped_) 
{}
-    HashMapCell(const value_type& value_, const State&) : value(value_) {}
-
-    const Key& get_first() const { return value.first; }
-    Mapped& get_second() { return value.second; }
-    const Mapped& get_second() const { return value.second; }
-
-    const value_type& get_value() const { return value; }
-
-    static const Key& get_key(const value_type& value) { return value.first; }
-    Mapped& get_mapped() { return value.second; }
-    const Mapped& get_mapped() const { return value.second; }
-
-    bool key_equals(const Key& key_) const { return value.first == key_; }
-    bool key_equals(const Key& key_, size_t /*hash_*/) const { return 
value.first == key_; }
-    bool key_equals(const Key& key_, size_t /*hash_*/, const State& /*state*/) 
const {
-        return value.first == key_;
-    }
-
-    void set_hash(size_t /*hash_value*/) {}
-    size_t get_hash(const Hash& hash) const { return hash(value.first); }
-
-    bool is_zero(const State& state) const { return is_zero(value.first, 
state); }
-    static bool is_zero(const Key& key, const State& /*state*/) { return 
ZeroTraits::check(key); }
-
-    /// Set the key value to zero.
-    void set_zero() { ZeroTraits::set(value.first); }
-
-    /// Do I need to store the zero key separately (that is, can a zero key be 
inserted into the hash table).
-    static constexpr bool need_zero_value_storage = true;
-
-    void set_mapped(const value_type& value_) { value.second = value_.second; }
-};
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_key(HashMapCell<Key, Mapped, Hash, 
State>* cell) {
-    return &cell->get_first();
-}
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_mapped(HashMapCell<Key, Mapped, 
Hash, State>* cell) {
-    return &cell->get_second();
-}
-
-template <typename Key, typename TMapped, typename Hash, typename TState = 
HashTableNoState>
-struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, 
TState> {
-    using Base = HashMapCell<Key, TMapped, Hash, TState>;
-
-    size_t saved_hash;
-
-    using Base::Base;
-
-    bool key_equals(const Key& key_) const { return this->value.first == key_; 
}
-    bool key_equals(const Key& key_, size_t hash_) const {
-        return saved_hash == hash_ && this->value.first == key_;
-    }
-    bool key_equals(const Key& key_, size_t hash_, const typename 
Base::State&) const {
-        return key_equals(key_, hash_);
-    }
-
-    void set_hash(size_t hash_value) { saved_hash = hash_value; }
-    size_t get_hash(const Hash& /*hash_function*/) const { return saved_hash; }
-};
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_key(
-        HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
-    return &cell->get_first();
-}
-
-template <typename Key, typename Mapped, typename Hash, typename State>
-ALWAYS_INLINE inline auto lookup_result_get_mapped(
-        HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
-    return &cell->get_second();
-}
-
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
-          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
-class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> {
-public:
-    using Self = HashMapTable;
-    using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
-
-    using key_type = Key;
-    using value_type = typename Cell::value_type;
-    using mapped_type = typename Cell::Mapped;
-
-    using LookupResult = typename Base::LookupResult;
-
-    using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
-
-    /// Call func(Mapped &) for each hash map element.
-    template <typename Func>
-    void for_each_mapped(Func&& func) {
-        for (auto& v : *this) func(v.get_second());
-    }
-
-    mapped_type& ALWAYS_INLINE operator[](Key x) {
-        typename HashMapTable::LookupResult it;
-        bool inserted;
-        this->emplace(x, it, inserted);
-
-        /** It may seem that initialization is not necessary for POD-types (or 
__has_trivial_constructor),
-          *  since the hash table memory is initially initialized with zeros.
-          * But, in fact, an empty cell may not be initialized with zeros in 
the following cases:
-          * - ZeroValueStorage (it only zeros the key);
-          * - after resizing and moving a part of the cells to the new half of 
the hash table, the old cells also have only the key to zero.
-          *
-          * On performance, there is almost always no difference, due to the 
fact that it->second is usually assigned immediately
-          *  after calling `operator[]`, and since `operator[]` is inlined, 
the compiler removes unnecessary initialization.
-          *
-          * Sometimes due to initialization, the performance even grows. This 
occurs in code like `++map[key]`.
-          * When we do the initialization, for new cells, it's enough to make 
`store 1` right away.
-          * And if we did not initialize, then even though there was zero in 
the cell,
-          *  the compiler can not guess about this, and generates the `load`, 
`increment`, `store` code.
-          */
-        if (inserted) new (lookup_result_get_mapped(it)) mapped_type();
-
-        return *lookup_result_get_mapped(it);
-    }
-
-    template <typename MappedType>
-    char* get_null_key_data() {
-        return nullptr;
-    }
-    bool has_null_key_data() const { return false; }
-};
-
-template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
-          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
-class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
+namespace doris {
+template <typename Key, typename Hash = DefaultHash<Key>>
+class JoinHashTable {
 public:
-    using Self = JoinHashMapTable;
-    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
-
     using key_type = Key;
-    using value_type = typename Cell::value_type;
-    using mapped_type = typename Cell::Mapped;
-
-    using LookupResult = typename Base::LookupResult;
+    using mapped_type = void*;
+    using value_type = void*;
+    size_t hash(const Key& x) const { return Hash()(x); }
 
     static uint32_t calc_bucket_size(size_t num_elem) {
         size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
@@ -226,17 +54,17 @@ public:
         first.resize(bucket_size + 1);
         next.resize(num_elem);
 
-        if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+        if constexpr (JoinOpType == TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
             visited.resize(num_elem);
         }
     }
 
     uint32_t get_bucket_size() const { return bucket_size; }
 
-    size_t size() const { return Base::size() == 0 ? next.size() : 
Base::size(); }
+    size_t size() const { return next.size(); }
 
     std::vector<uint8_t>& get_visited() { return visited; }
 
@@ -255,9 +83,8 @@ public:
     auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
build_idx_map,
                     int probe_idx, uint32_t build_idx, int probe_rows,
                     uint32_t* __restrict probe_idxs, bool& probe_visited,
-                    uint32_t* __restrict build_idxs,
-                    doris::vectorized::ColumnFilterHelper* mark_column) {
-        if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) 
{
+                    uint32_t* __restrict build_idxs, 
vectorized::ColumnFilterHelper* mark_column) {
+        if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
             if (_empty_build_side) {
                 return _process_null_aware_left_anti_join_for_empty_build_side<
                         JoinOpType, with_other_conjuncts, is_mark_join>(
@@ -275,22 +102,21 @@ public:
                                                 probe_idxs, build_idxs, 
mark_column);
         }
 
-        if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
-                      JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
+        if constexpr (JoinOpType == TJoinOp::INNER_JOIN || JoinOpType == 
TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) {
             return _find_batch_inner_outer_join<JoinOpType>(keys, 
build_idx_map, probe_idx,
                                                             build_idx, 
probe_rows, probe_idxs,
                                                             probe_visited, 
build_idxs);
         }
-        if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
-                      JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) 
{
+        if constexpr (JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+                      JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
+                      JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
             return _find_batch_left_semi_anti<JoinOpType, need_judge_null>(
                     keys, build_idx_map, probe_idx, probe_rows, probe_idxs);
         }
-        if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+        if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
             return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx, 
probe_rows);
         }
         return std::tuple {0, 0U, 0};
@@ -306,7 +132,7 @@ public:
         while (count < batch_size && iter_idx < elem_num) {
             const auto matched = visited[iter_idx];
             build_idxs[count] = iter_idx;
-            if constexpr (JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) {
                 count += !matched;
             } else {
                 count += matched;
@@ -336,7 +162,7 @@ private:
     auto _find_batch_mark(const Key* __restrict keys, const uint32_t* 
__restrict build_idx_map,
                           int probe_idx, int probe_rows, uint32_t* __restrict 
probe_idxs,
                           uint32_t* __restrict build_idxs,
-                          doris::vectorized::ColumnFilterHelper* mark_column) {
+                          vectorized::ColumnFilterHelper* mark_column) {
         auto matched_cnt = 0;
         const auto batch_size = max_batch_size;
 
@@ -351,8 +177,8 @@ private:
                 // mark result as null when probe row is null
                 mark_column->insert_null();
             } else {
-                bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? 
build_idx != 0
-                                                                            : 
build_idx == 0;
+                bool matched =
+                        JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 
: build_idx == 0;
                 if (!matched && _has_null_key) {
                     mark_column->insert_null();
                 } else {
@@ -370,8 +196,8 @@ private:
     template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
     auto _process_null_aware_left_anti_join_for_empty_build_side(
             int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
-            uint32_t* __restrict build_idxs, 
doris::vectorized::ColumnFilterHelper* mark_column) {
-        static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
+            uint32_t* __restrict build_idxs, vectorized::ColumnFilterHelper* 
mark_column) {
+        static_assert(JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
         auto matched_cnt = 0;
         const auto batch_size = max_batch_size;
 
@@ -427,8 +253,7 @@ private:
             while (build_idx && keys[probe_idx] != build_keys[build_idx]) {
                 build_idx = next[build_idx];
             }
-            bool matched =
-                    JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx 
!= 0 : build_idx == 0;
+            bool matched = JoinOpType == TJoinOp::LEFT_SEMI_JOIN ? build_idx 
!= 0 : build_idx == 0;
             probe_idxs[matched_cnt] = probe_idx++;
             matched_cnt += matched;
         }
@@ -444,8 +269,8 @@ private:
 
         auto do_the_probe = [&]() {
             while (build_idx && matched_cnt < batch_size) {
-                if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
-                              JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+                if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+                              JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
                     if (!visited[build_idx] && keys[probe_idx] == 
build_keys[build_idx]) {
                         probe_idxs[matched_cnt] = probe_idx;
                         build_idxs[matched_cnt] = build_idx;
@@ -459,11 +284,11 @@ private:
                 build_idx = next[build_idx];
             }
 
-            if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
-                          JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
-                          JoinOpType == 
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
+            if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+                          JoinOpType == TJoinOp::FULL_OUTER_JOIN ||
+                          JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
+                          JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+                          JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
                 // may over batch_size when emplace 0 into build_idxs
                 if (!build_idx) {
                     probe_idxs[matched_cnt] = probe_idx;
@@ -503,8 +328,8 @@ private:
                     probe_idxs[matched_cnt] = probe_idx;
                     build_idxs[matched_cnt] = build_idx;
                     matched_cnt++;
-                    if constexpr (JoinOpType == 
doris::TJoinOp::RIGHT_OUTER_JOIN ||
-                                  JoinOpType == 
doris::TJoinOp::FULL_OUTER_JOIN) {
+                    if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
+                                  JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
                         if (!visited[build_idx]) {
                             visited[build_idx] = 1;
                         }
@@ -513,8 +338,8 @@ private:
                 build_idx = next[build_idx];
             }
 
-            if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
-                          JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
+            if constexpr (JoinOpType == TJoinOp::LEFT_OUTER_JOIN ||
+                          JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
                 // `(!matched_cnt || probe_idxs[matched_cnt - 1] != 
probe_idx)` means not match one build side
                 probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1] 
== probe_idx);
                 if (!build_idx) {
@@ -553,27 +378,8 @@ private:
 
     // use in iter hash map
     mutable uint32_t iter_idx = 1;
-    Cell cell;
-    doris::vectorized::Arena* pool;
+    vectorized::Arena* pool;
     bool _has_null_key = false;
     bool _empty_build_side = true;
 };
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
-          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
-using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, 
Grower, Allocator>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
-using JoinFixedHashMap = JoinHashMapTable<Key, HashMapCell<Key, Mapped, Hash>, 
Hash>;
-
-template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
-          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
-using HashMapWithSavedHash =
-        HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, 
Grower, Allocator>;
-
-template <typename Key, typename Mapped, typename Hash, size_t 
initial_size_degree>
-using HashMapWithStackMemory = HashMapTable<
-        Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash,
-        HashTableGrower<initial_size_degree>,
-        HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
-                                          sizeof(HashMapCellWithSavedHash<Key, 
Mapped, Hash>)>>;
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h 
b/be/src/vec/common/hash_table/partitioned_hash_map.h
index f23b0a347de..a2db6fece35 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_map.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_map.h
@@ -22,7 +22,7 @@
 #include "vec/common/hash_table/hash_map.h"
 #include "vec/common/hash_table/partitioned_hash_table.h"
 #include "vec/common/hash_table/ph_hash_map.h"
-
+namespace doris {
 template <typename ImplTable>
 class PartitionedHashMapTable : public PartitionedHashTable<ImplTable> {
 public:
@@ -57,3 +57,4 @@ using PartitionedHashMap =
 
 template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>>
 using PHNormalHashMap = PHHashMap<Key, Mapped, Hash, false>;
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/string_hash_map.h 
b/be/src/vec/common/hash_table/string_hash_map.h
index f1efd0fab12..61d304cf7d8 100644
--- a/be/src/vec/common/hash_table/string_hash_map.h
+++ b/be/src/vec/common/hash_table/string_hash_map.h
@@ -23,6 +23,7 @@
 #include "vec/common/hash_table/hash_map.h"
 #include "vec/common/hash_table/string_hash_table.h"
 
+namespace doris {
 template <typename Key, typename TMapped>
 struct StringHashMapCell : public HashMapCell<Key, TMapped, 
StringHashTableHash, HashTableNoState> {
     using Base = HashMapCell<Key, TMapped, StringHashTableHash, 
HashTableNoState>;
@@ -152,3 +153,4 @@ public:
     }
     bool has_null_key_data() const { return false; }
 };
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 39e05936397..e6c00d94a2f 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -1049,7 +1049,7 @@ void HashJoinNode::_hash_table_init(RuntimeState* state) {
                     return;
                 }
 
-                if (!try_get_hash_map_context_fixed<JoinFixedHashMap, 
HashCRC32, RowRefListType>(
+                if (!try_get_hash_map_context_fixed<JoinHashMap, HashCRC32, 
RowRefListType>(
                             *_hash_table_variants, _build_expr_ctxs)) {
                     
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListType>>();
                 }
diff --git a/be/src/vec/exec/vset_operation_node.cpp 
b/be/src/vec/exec/vset_operation_node.cpp
index 3c47638ef42..75317b4c933 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -183,16 +183,16 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
         switch (_child_expr_lists[0][0]->root()->result_type()) {
         case TYPE_BOOLEAN:
         case TYPE_TINYINT:
-            
_hash_table_variants->emplace<I8HashTableContext<RowRefListWithFlags>>();
+            
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt8>>();
             break;
         case TYPE_SMALLINT:
-            
_hash_table_variants->emplace<I16HashTableContext<RowRefListWithFlags>>();
+            
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt16>>();
             break;
         case TYPE_INT:
         case TYPE_FLOAT:
         case TYPE_DATEV2:
         case TYPE_DECIMAL32:
-            
_hash_table_variants->emplace<I32HashTableContext<RowRefListWithFlags>>();
+            
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt32>>();
             break;
         case TYPE_BIGINT:
         case TYPE_DOUBLE:
@@ -200,21 +200,21 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
         case TYPE_DATE:
         case TYPE_DECIMAL64:
         case TYPE_DATETIMEV2:
-            
_hash_table_variants->emplace<I64HashTableContext<RowRefListWithFlags>>();
+            
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt64>>();
             break;
         case TYPE_LARGEINT:
         case TYPE_DECIMALV2:
         case TYPE_DECIMAL128I:
-            
_hash_table_variants->emplace<I128HashTableContext<RowRefListWithFlags>>();
+            
_hash_table_variants->emplace<SetPrimaryTypeHashTableContext<UInt128>>();
             break;
         default:
-            
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>();
+            _hash_table_variants->emplace<SetSerializedHashTableContext>();
         }
         return;
     }
-    if (!try_get_hash_map_context_fixed<JoinFixedHashMap, HashCRC32, 
RowRefListWithFlags>(
+    if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, 
RowRefListWithFlags>(
                 *_hash_table_variants, _child_expr_lists[0])) {
-        
_hash_table_variants->emplace<SerializedHashTableContext<RowRefListWithFlags>>();
+        _hash_table_variants->emplace<SetSerializedHashTableContext>();
     }
 }
 
diff --git a/be/src/vec/exec/vset_operation_node.h 
b/be/src/vec/exec/vset_operation_node.h
index b1ab9c47650..ce5a8eb1dbc 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -31,6 +31,7 @@
 #include "vec/columns/column.h"
 #include "vec/common/arena.h"
 #include "vec/core/block.h"
+#include "vec/core/types.h"
 #include "vec/exec/join/process_hash_table_probe.h"
 #include "vec/exec/join/vhash_join_node.h"
 
@@ -45,18 +46,14 @@ class VExprContext;
 struct RowRefListWithFlags;
 
 using SetHashTableVariants = std::variant<
-        std::monostate, SerializedHashTableContext<RowRefListWithFlags>,
-        I8HashTableContext<RowRefListWithFlags>, 
I16HashTableContext<RowRefListWithFlags>,
-        I32HashTableContext<RowRefListWithFlags>, 
I64HashTableContext<RowRefListWithFlags>,
-        I128HashTableContext<RowRefListWithFlags>, 
I256HashTableContext<RowRefListWithFlags>,
-        I64FixedKeyHashTableContext<true, RowRefListWithFlags>,
-        I64FixedKeyHashTableContext<false, RowRefListWithFlags>,
-        I128FixedKeyHashTableContext<true, RowRefListWithFlags>,
-        I128FixedKeyHashTableContext<false, RowRefListWithFlags>,
-        I256FixedKeyHashTableContext<true, RowRefListWithFlags>,
-        I256FixedKeyHashTableContext<false, RowRefListWithFlags>,
-        I136FixedKeyHashTableContext<true, RowRefListWithFlags>,
-        I136FixedKeyHashTableContext<false, RowRefListWithFlags>>;
+        std::monostate, MethodSerialized<HashMap<StringRef, 
RowRefListWithFlags>>,
+        SetPrimaryTypeHashTableContext<UInt8>, 
SetPrimaryTypeHashTableContext<UInt16>,
+        SetPrimaryTypeHashTableContext<UInt32>, 
SetPrimaryTypeHashTableContext<UInt64>,
+        SetPrimaryTypeHashTableContext<UInt128>, 
SetPrimaryTypeHashTableContext<UInt256>,
+        SetFixedKeyHashTableContext<UInt64, true>, 
SetFixedKeyHashTableContext<UInt64, false>,
+        SetFixedKeyHashTableContext<UInt128, true>, 
SetFixedKeyHashTableContext<UInt128, false>,
+        SetFixedKeyHashTableContext<UInt256, true>, 
SetFixedKeyHashTableContext<UInt256, false>,
+        SetFixedKeyHashTableContext<UInt136, true>, 
SetFixedKeyHashTableContext<UInt136, false>>;
 
 template <bool is_intersect>
 class VSetOperationNode final : public ExecNode {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to