This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dfe308f501 [Improvement](join) refine prefetch strategy (#13286)
dfe308f501 is described below
commit dfe308f501fe7acb927b00ee2b9c1096f49019d9
Author: Gabriel <[email protected]>
AuthorDate: Wed Oct 12 19:02:06 2022 +0800
[Improvement](join) refine prefetch strategy (#13286)
---
be/src/vec/common/columns_hashing_impl.h | 6 ++++++
be/src/vec/common/hash_table/hash_table.h | 11 +++++++++++
be/src/vec/exec/join/vhash_join_node.cpp | 18 +++++++++++-------
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/be/src/vec/common/columns_hashing_impl.h
b/be/src/vec/common/columns_hashing_impl.h
index 7bf4fd3132..e2f03f26c6 100644
--- a/be/src/vec/common/columns_hashing_impl.h
+++ b/be/src/vec/common/columns_hashing_impl.h
@@ -180,6 +180,12 @@ public:
data.prefetch(key_holder);
}
+ template <bool READ, typename Data>
+ ALWAYS_INLINE void prefetch(Data& data, size_t row, Arena& pool) {
+ auto key_holder = static_cast<Derived&>(*this).get_key_holder(row,
pool);
+ data.template prefetch<READ>(key_holder);
+ }
+
protected:
Cache cache;
diff --git a/be/src/vec/common/hash_table/hash_table.h
b/be/src/vec/common/hash_table/hash_table.h
index a59cf972ff..e588ed1b8a 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -908,6 +908,17 @@ public:
__builtin_prefetch(&buf[place_value]);
}
+ template <bool READ, typename KeyHolder>
+ void ALWAYS_INLINE prefetch(KeyHolder& key_holder) {
+ // Two optional arguments:
+ // 'rw': 1 means the memory access is write
+ // 'locality': 0-3. 0 means no temporal locality. 3 means high
temporal locality.
+ const auto& key = key_holder_get_key(key_holder);
+ auto hash_value = hash(key);
+ auto place_value = grower.place(hash_value);
+ __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1);
+ }
+
/// Reinsert node pointed to by iterator
void ALWAYS_INLINE reinsert(iterator& it, size_t hash_value) {
reinsert(*it.get_ptr(), hash_value);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index 05040fc2b6..dd77bc2a18 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -30,6 +30,10 @@
namespace doris::vectorized {
+// TODO: Best prefetch step is decided by machine. We should also provide a
+// SQL hint to allow users to tune by hand.
+static constexpr int PREFETCH_STEP = 64;
+
using ProfileCounter = RuntimeProfile::Counter;
template <class HashTableContext>
struct ProcessHashTableBuild {
@@ -80,8 +84,9 @@ struct ProcessHashTableBuild {
auto emplace_result =
key_getter.emplace_key(hash_table_ctx.hash_table, k,
_join_node->_arena);
- if (k + 1 < _rows) {
- key_getter.prefetch(hash_table_ctx.hash_table, k + 1,
_join_node->_arena);
+ if (k + PREFETCH_STEP < _rows) {
+ key_getter.template prefetch<false>(hash_table_ctx.hash_table,
k + PREFETCH_STEP,
+ _join_node->_arena);
}
if (emplace_result.is_inserted()) {
@@ -321,6 +326,10 @@ struct ProcessHashTableProbe {
_arena)) {nullptr, false}
:
key_getter.find_key(hash_table_ctx.hash_table,
_probe_index,
_arena);
+ // prefetch is more useful while matching to multiple rows
+ if (_probe_index + PREFETCH_STEP < _probe_rows)
+ key_getter.template
prefetch<true>(hash_table_ctx.hash_table,
+ _probe_index +
PREFETCH_STEP, _arena);
if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) {
if (!find_result.is_found()) {
@@ -344,11 +353,6 @@ struct ProcessHashTableProbe {
++current_offset;
}
} else {
- // prefetch is more useful while matching to
multiple rows
- if (_probe_index + 2 < _probe_rows)
- key_getter.prefetch(hash_table_ctx.hash_table,
_probe_index + 2,
- _arena);
-
for (auto it = mapped.begin(); it.ok(); ++it) {
if constexpr (!is_right_semi_anti_join) {
if (current_offset < _batch_size) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]