This is an automated email from the ASF dual-hosted git repository.

BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ec722fd6a12 [fix](be) Avoid signaling unbuilt shared hash table 
(#63345)
ec722fd6a12 is described below

commit ec722fd6a12ec21cb8a6098e1d44ad26c2b12f56
Author: Pxl <[email protected]>
AuthorDate: Wed May 20 16:12:07 2026 +0800

    [fix](be) Avoid signaling unbuilt shared hash table (#63345)
    
    For shared hash table hash joins, the builder task may leave
    `process_build_block()` before the hash table is actually built, for
    example when the build side returns an error before the EOS build step
    completes. In that state the shared hash table variant can still be
    `monostate`.
    
    `HashJoinBuildSinkLocalState::close()` previously set `_signaled = true`
    whenever the builder was not terminated. That wakes non-builder tasks
    even when the shared hash table was never built, so they can reach the
    shared hash table copy path and report a misleading `Hash table type
    mismatch when share hash table` instead of preserving the original
    build-side error.
    
    This PR gates `_signaled` on the actual shared hash table state.
    Non-builder tasks are signaled only when the builder is not terminated
    and the shared hash table variant is no longer `monostate`. If the
    builder closes after an error before the hash table is built,
    `_signaled` stays false and non-builders take the existing EOF guard
    instead of visiting an unbuilt hash table.
---
 be/src/exec/operator/hashjoin_build_sink.cpp       | 22 +++++++++---------
 be/test/exec/operator/hashjoin_build_sink_test.cpp | 26 ++++++++++++++++++++++
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp 
b/be/src/exec/operator/hashjoin_build_sink.cpp
index b75bd253026..338c43cac7a 100644
--- a/be/src/exec/operator/hashjoin_build_sink.cpp
+++ b/be/src/exec/operator/hashjoin_build_sink.cpp
@@ -32,6 +32,15 @@
 #include "util/uid_util.h"
 
 namespace doris {
+namespace {
+bool hash_table_built(const HashJoinSharedState* shared_state) {
+    DORIS_CHECK(shared_state != nullptr);
+    DORIS_CHECK(!shared_state->hash_table_variant_vector.empty());
+    return !std::holds_alternative<std::monostate>(
+            shared_state->hash_table_variant_vector.front()->method_variant);
+}
+} // namespace
+
 
HashJoinBuildSinkLocalState::HashJoinBuildSinkLocalState(DataSinkOperatorXBase* 
parent,
                                                          RuntimeState* state)
         : JoinBuildSinkLocalState(parent, state) {
@@ -243,16 +252,9 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* 
state, Status exec_statu
 
         if (p._use_shared_hash_table) {
             LockGuard lock(p._mutex);
-            // Only signal non-builder tasks when the builder actually built 
the hash table.
-            // When the builder is terminated (woken up early because the 
probe side finished
-            // first), it never called process_build_block() so the hash table 
variant is still
-            // monostate. Setting _signaled=true in that case would cause 
non-builder tasks to
-            // enter std::visit on monostate and crash with "Hash table type 
mismatch".
-            //
-            // _terminated is reliably true here when the task was woken up 
early, because
-            // operator terminate() is called from the execute() Defer in 
PipelineTask
-            // before close() is invoked.
-            if (!_terminated) {
+            // Do not wake non-builders into a monostate hash table after 
early termination/errors.
+            const auto should_signal = !_terminated && 
hash_table_built(_shared_state);
+            if (should_signal) {
                 p._signaled = true;
             }
             for (auto& dep : _shared_state->sink_deps) {
diff --git a/be/test/exec/operator/hashjoin_build_sink_test.cpp 
b/be/test/exec/operator/hashjoin_build_sink_test.cpp
index a7cd7cda595..b5cd8ee3589 100644
--- a/be/test/exec/operator/hashjoin_build_sink_test.cpp
+++ b/be/test/exec/operator/hashjoin_build_sink_test.cpp
@@ -540,6 +540,32 @@ TEST_F(SharedHashTableSignalTest, 
BuilderTerminatedDoesNotSignal) {
     ASSERT_TRUE(st.ok()) << st.to_string();
 }
 
+TEST_F(SharedHashTableSignalTest, UnbuiltHashTableDoesNotSignal) {
+    auto setup = setup_broadcast_join(2);
+    auto* builder_local_state =
+            
assert_cast<HashJoinBuildSinkLocalState*>(setup.builder_state->get_sink_local_state());
+
+    ASSERT_FALSE(builder_local_state->_terminated);
+    ASSERT_TRUE(
+            
std::holds_alternative<std::monostate>(setup.shared_state->cast<HashJoinSharedState>()
+                                                           
->hash_table_variant_vector.front()
+                                                           ->method_variant));
+
+    auto st = setup.sink_op->close(setup.builder_state, Status::OK());
+    ASSERT_TRUE(st.ok()) << st.to_string();
+    ASSERT_FALSE(setup.sink_op->_signaled)
+            << "_signaled should NOT be set when build failed before hash 
table was built";
+
+    auto* non_builder_state = setup.non_builder_states[0].get();
+    Block empty_block;
+    st = setup.sink_op->sink(non_builder_state, &empty_block, true);
+    ASSERT_TRUE(st.is<ErrorCode::END_OF_FILE>())
+            << "Non-builder should return EOF after builder failure, got: " << 
st.to_string();
+
+    st = setup.sink_op->close(non_builder_state, Status::OK());
+    ASSERT_TRUE(st.ok()) << st.to_string();
+}
+
 // Test the normal path: when the builder completes normally (not terminated),
 // close() should set _signaled=true, and non-builder tasks should proceed
 // to share the hash table successfully.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to