This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch tpc_preview6
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/tpc_preview6 by this push:
     new 43a1f7000b6 tpch 12/14 不下推AGG.当任一 group key的ndv 接近 行数(0.9 倍)时,不下推agg
43a1f7000b6 is described below

commit 43a1f7000b6dfdcc2a0789b4440cc39354eae884
Author: englefly <[email protected]>
AuthorDate: Thu Feb 5 15:06:44 2026 +0800

    tpch 12/14 不下推AGG.当任一 group key的ndv 接近 行数(0.9 倍)时,不下推agg
---
 .../nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java | 5 ++++-
 regression-test/data/shape_check/tpch_sf1000/hint/q12.out        | 5 ++---
 regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out    | 5 ++---
 regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out    | 9 +++------
 regression-test/data/shape_check/tpch_sf1000/shape/q12.out       | 5 ++---
 regression-test/data/shape_check/tpch_sf1000/shape/q14.out       | 9 +++------
 6 files changed, 16 insertions(+), 22 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
index d61a0ef0e48..83a97a8e28a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
@@ -471,7 +471,7 @@ public class EagerAggRewriter extends 
DefaultPlanRewriter<PushDownAggContext> {
         if (stats == null) {
             stats = plan.accept(derive, new StatsDerive.DeriveContext());
         }
-        if (stats.getRowCount() == 0) {
+        if (stats.getRowCount() <= 0) {
             return false;
         }
 
@@ -488,6 +488,9 @@ public class EagerAggRewriter extends 
DefaultPlanRewriter<PushDownAggContext> {
             if (colStats.isUnKnown) {
                 return false;
             }
+            if (stats.getRowCount() * 0.9 <= colStats.ndv) {
+                return false;
+            }
             groupKeysStats.add(colStats);
             cards[groupByCardinality(colStats, 
stats.getRowCount())].add(colStats);
         }
diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q12.out 
b/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
index e347b5fa0b7..bd619416286 100644
--- a/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
@@ -9,9 +9,8 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] 
hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=()
-------------------hashAgg[GLOBAL]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[orders]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[orders]
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) 
and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= 
'1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate 
< lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out 
b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
index dc6bf364a29..8df830dd428 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
@@ -9,9 +9,8 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] 
hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=() 
build RFs:RF0 l_orderkey->[o_orderkey]
-------------------hashAgg[GLOBAL]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) 
and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= 
'1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate 
< lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out 
b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
index c3cf6656e5c..6df1a05fa3b 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
@@ -9,10 +9,7 @@ PhysicalResultSink
 ------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((lineitem.l_partkey = part.p_partkey)) otherCondition=() build 
RFs:RF0 l_partkey->[p_partkey]
 --------------PhysicalProject
 ----------------PhysicalOlapScan[part] apply RFs: RF0
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------filter((lineitem.l_shipdate < '1995-10-01') and 
(lineitem.l_shipdate >= '1995-09-01'))
-------------------------PhysicalOlapScan[lineitem]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate < '1995-10-01') and 
(lineitem.l_shipdate >= '1995-09-01'))
+------------------PhysicalOlapScan[lineitem]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q12.out 
b/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
index dc6bf364a29..8df830dd428 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
@@ -9,9 +9,8 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] 
hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=() 
build RFs:RF0 l_orderkey->[o_orderkey]
-------------------hashAgg[GLOBAL]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) 
and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= 
'1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate 
< lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q14.out 
b/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
index c3cf6656e5c..6df1a05fa3b 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
@@ -9,10 +9,7 @@ PhysicalResultSink
 ------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((lineitem.l_partkey = part.p_partkey)) otherCondition=() build 
RFs:RF0 l_partkey->[p_partkey]
 --------------PhysicalProject
 ----------------PhysicalOlapScan[part] apply RFs: RF0
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------filter((lineitem.l_shipdate < '1995-10-01') and 
(lineitem.l_shipdate >= '1995-09-01'))
-------------------------PhysicalOlapScan[lineitem]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate < '1995-10-01') and 
(lineitem.l_shipdate >= '1995-09-01'))
+------------------PhysicalOlapScan[lineitem]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to