This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c138902a0de [Fix](bug) fix the divide zero in local shuffle (#37906)
c138902a0de is described below

commit c138902a0dea538343acabff7b54cbd6010e934c
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Tue Jul 16 20:37:32 2024 +0800

    [Fix](bug) fix the divide zero in local shuffle (#37906)
    
    if 'num_buckets == 0' means the fragment is colocated by exchange node
    not the
    scan node. so here use `_num_instance` to replace the `num_buckets` to
    prevent dividing 0
      still keep colocate plan after local shuffle
    
    
    `coredump`:
    ```
    SIGFPE integer divide by zero (@0x56431791a54a) received by PID 33673 (TID 
37768 OR 0x7f8028018640) from PID 395421002; stack trace: ***
    0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/common/signal_handler.h:421
    1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
    2# JVM_handle_linux_signal in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
    3# 0x00007F8C47895520 in /lib/x86_64-linux-gnu/libc.so.6
    4# doris::vectorized::Partitioner::do_partitioning(doris::RuntimeState*, 
doris::vectorized::Block*, doris::MemTracker*) const at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/vec/runtime/partitioner.cpp:50
    5# doris::pipeline::ShuffleExchanger::sink(doris::RuntimeState*, 
doris::vectorized::Block*, bool, doris::pipeline::LocalExchangeSinkLocalState&) 
at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:33
    6# doris::pipeline::LocalExchangeSinkOperatorX::sink(doris::RuntimeState*, 
doris::vectorized::Block*, bool) in 
/mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be
    7# doris::pipeline::PipelineTask::execute(bool*) at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/pipeline/pipeline_task.cpp:359
    8# doris::pipeline::TaskScheduler::_do_work(unsigned long) at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/pipeline/task_scheduler.cpp:138
    9# doris::ThreadPool::dispatch_thread() in 
/mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be
    10# doris::Thread::supervise_thread(void*) at 
/home/zcp/repo_center/doris_branch-3.0/doris/be/src/util/thread.cpp:499
    11# start_thread at ./nptl/pthread_create.c:442
    12# 0x00007F8C47979850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    ```
---
 be/src/pipeline/pipeline_fragment_context.cpp      |  6 ++-
 .../data/query_p0/limit/sql/withGroupByUnion.out   | 52 ++++++++++++++++++++++
 .../suites/query_p0/limit/sql/withGroupByUnion.sql |  1 +
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/be/src/pipeline/pipeline_fragment_context.cpp 
b/be/src/pipeline/pipeline_fragment_context.cpp
index 8138c7594b8..39555d3614e 100644
--- a/be/src/pipeline/pipeline_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_fragment_context.cpp
@@ -884,8 +884,12 @@ Status PipelineFragmentContext::_plan_local_exchange(
             }
         }
 
+        // if 'num_buckets == 0' means the fragment is colocated by exchange 
node not the
+        // scan node. so here use `_num_instance` to replace the `num_buckets` 
to prevent dividing 0
+        // still keep colocate plan after local shuffle
         RETURN_IF_ERROR(_plan_local_exchange(
-                
_pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution()
+                
_pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution() ||
+                                num_buckets == 0
                         ? _num_instances
                         : num_buckets,
                 pip_idx, _pipelines[pip_idx], bucket_seq_to_instance_idx,
diff --git a/regression-test/data/query_p0/limit/sql/withGroupByUnion.out 
b/regression-test/data/query_p0/limit/sql/withGroupByUnion.out
new file mode 100644
index 00000000000..2d0e2af41c5
--- /dev/null
+++ b/regression-test/data/query_p0/limit/sql/withGroupByUnion.out
@@ -0,0 +1,52 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !withGroupByUnion --
+0      ALGERIA
+1      ALGERIA
+1      ARGENTINA
+1      BRAZIL
+1      CANADA
+1      CHINA
+1      EGYPT
+1      ETHIOPIA
+1      FRANCE
+1      GERMANY
+1      INDIA
+1      INDONESIA
+1      IRAN
+1      IRAQ
+1      JAPAN
+1      JORDAN
+1      KENYA
+1      MOROCCO
+1      MOZAMBIQUE
+1      PERU
+1      ROMANIA
+1      RUSSIA
+1      SAUDI ARABIA
+1      UNITED KINGDOM
+1      UNITED STATES
+1      VIETNAM
+2      BRAZIL
+3      CANADA
+4      EGYPT
+5      ETHIOPIA
+6      FRANCE
+7      GERMANY
+8      INDIA
+9      INDONESIA
+10     IRAN
+11     IRAQ
+12     JAPAN
+13     JORDAN
+14     KENYA
+15     MOROCCO
+16     MOZAMBIQUE
+17     PERU
+18     CHINA
+19     ROMANIA
+20     SAUDI ARABIA
+21     VIETNAM
+22     RUSSIA
+23     UNITED KINGDOM
+24     UNITED STATES
+
diff --git a/regression-test/suites/query_p0/limit/sql/withGroupByUnion.sql 
b/regression-test/suites/query_p0/limit/sql/withGroupByUnion.sql
new file mode 100644
index 00000000000..b47b0d17ff2
--- /dev/null
+++ b/regression-test/suites/query_p0/limit/sql/withGroupByUnion.sql
@@ -0,0 +1 @@
+select * from (select count(a.nationkey), a.name from tpch_tiny_nation a  
join[shuffle] tpch_tiny_nation b on a.name = b.name group by a.name union 
select sum(c.nationkey), c.name from tpch_tiny_nation c  join[shuffle] 
tpch_tiny_nation d on c.name = d.name group by c.name) t order by 1,2 limit 50;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to