This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new c3fd70c8d80 branch-3.1: [enhance](agg) Make aggfuncs that not support
multi_distinct can be split in splitMultiDistinct (#58973)
c3fd70c8d80 is described below
commit c3fd70c8d80e0d8a8e29de7b426f24db306299c4
Author: feiniaofeiafei <[email protected]>
AuthorDate: Fri Dec 19 14:23:28 2025 +0800
branch-3.1: [enhance](agg) Make aggfuncs that not support multi_distinct
can be split in splitMultiDistinct (#58973)
1.Query like this will use cte to split and compute agg before this pr, but
it is not necessary.
select count(distinct a),sum(distinct a) from t1;
after this pr, it will not split.
2.Query like this will report error:
select array_agg(distinct a), array_agg(distinct b) from t1;
errCode = 2, detailMessage = array_agg(DISTINCT a#1) can't support multi
distinct.
after this pr, the query will use cte to split and compute, and will not
report error.
---
.../nereids/rules/rewrite/SplitMultiDistinct.java | 16 +++++++-----
.../nereids/trees/plans/algebra/Aggregate.java | 14 ++++++++++
.../distinct_split/disitinct_split.out | 30 ++++++++++++++++++++++
.../data/nereids_syntax_p0/analyze_agg.out | 3 +++
.../distinct_split/disitinct_split.groovy | 10 ++++++++
.../suites/nereids_syntax_p0/analyze_agg.groovy | 5 +---
6 files changed, 67 insertions(+), 11 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
index 6adb3270b5b..437d3a186f6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
@@ -47,10 +47,8 @@ import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -216,23 +214,27 @@ public class SplitMultiDistinct extends
DefaultPlanRewriter<DistinctSplitContext
if (agg.getSourceRepeat().isPresent()) {
return false;
}
- Set<Expression> distinctFunc = new HashSet<>();
+ if (agg.distinctFuncNum() < 2 || agg.getDistinctArguments().size() <
2) {
+ return false;
+ }
boolean distinctMultiColumns = false;
+ boolean hasNotSupportMultiDistinctFunc = false;
for (NamedExpression namedExpression : agg.getOutputExpressions()) {
if (!(namedExpression instanceof Alias) ||
!(namedExpression.child(0) instanceof AggregateFunction)) {
continue;
}
AggregateFunction aggFunc = (AggregateFunction)
namedExpression.child(0);
- if (aggFunc instanceof SupportMultiDistinct &&
aggFunc.isDistinct()) {
+ if (aggFunc.isDistinct()) {
+ hasNotSupportMultiDistinctFunc = hasNotSupportMultiDistinctFunc
+ || !(aggFunc instanceof SupportMultiDistinct);
aliases.add((Alias) namedExpression);
- distinctFunc.add(aggFunc);
distinctMultiColumns = distinctMultiColumns ||
isDistinctMultiColumns(aggFunc);
} else {
otherAggFuncs.add((Alias) namedExpression);
}
}
- if (distinctFunc.size() <= 1) {
- return false;
+ if (hasNotSupportMultiDistinctFunc) {
+ return true;
}
// when this aggregate is not distinctMultiColumns, and group by
expressions is not empty
// e.g. sql1: select count(distinct a), count(distinct b) from t1
group by c;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
index 7a283c740e5..371dd43f91d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
@@ -106,4 +106,18 @@ public interface Aggregate<CHILD_TYPE extends Plan>
extends UnaryPlan<CHILD_TYPE
return getOutputExpressions().stream().allMatch(e -> e instanceof Slot)
&& getGroupByExpressions().stream().allMatch(e -> e instanceof
Slot);
}
+
+ /**
+ * distinctFuncNum
+ * @return number of distinct aggregate functions
+ */
+ default int distinctFuncNum() {
+ int num = 0;
+ for (AggregateFunction aggFunc : getAggregateFunctions()) {
+ if (aggFunc.isDistinct()) {
+ ++num;
+ }
+ }
+ return num;
+ }
}
diff --git
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
index ede0fb5259c..ceed693e8cc 100644
--- a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
+++ b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
@@ -501,3 +501,33 @@ PhysicalResultSink
-- !null_hash --
1 \N 0 0.0
+-- !array_agg_nogby --
+[2] [1]
+
+-- !array_agg_gby --
+[2] [1]
+[2] [1]
+
+-- !array_agg_and_other --
+[2] 2
+
+-- !not_split_cte_when_same_col --
+3 2 1.5
+
+-- !not_split_cte_when_same_col_shape --
+PhysicalResultSink
+--hashAgg[DISTINCT_LOCAL]
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[test_distinct_multi]
+
+-- !not_split_cte_when_same_col_gby --
+3 2 1.5
+
+-- !not_split_cte_when_same_col_gby__shape --
+PhysicalResultSink
+--hashAgg[DISTINCT_LOCAL]
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[test_distinct_multi]
+
diff --git a/regression-test/data/nereids_syntax_p0/analyze_agg.out
b/regression-test/data/nereids_syntax_p0/analyze_agg.out
index 8316c4aefe2..b0428c9e56b 100644
--- a/regression-test/data/nereids_syntax_p0/analyze_agg.out
+++ b/regression-test/data/nereids_syntax_p0/analyze_agg.out
@@ -1,6 +1,9 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !sql --
+-- !multi_agg_distinct_func --
+0 \N
+
-- !test_sum0 --
0 0
0 3
diff --git
a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
index 9efe6ce487a..569c086112c 100644
---
a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
+++
b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
@@ -213,4 +213,14 @@ suite("distinct_split") {
sql "create table test_distinct_multi_null_hash(a int, b int, c int, d
varchar(10), e date) distributed by hash(a) properties('replication_num'='1');"
sql "insert into test_distinct_multi_null_hash
values(1,null,null,null,'2024-12-08');"
qt_null_hash "SELECT a, b, count(distinct c,e), count(distinct
concat(d,e))/count(distinct e) FROM test_distinct_multi_null_hash where e =
'2024-12-08' GROUP BY a, b;"
+
+ // test agg function not support multi_distinct
+ sql "select array_agg(distinct b), array_agg(distinct a) from
test_distinct_multi"
+ qt_array_agg_nogby "select array_agg(distinct b), array_agg(distinct a)
from test_distinct_multi where a=1 and b=2"
+ qt_array_agg_gby "select array_agg(distinct b), array_agg(distinct a) from
test_distinct_multi where a=1 and b=2 group by c"
+ qt_array_agg_and_other "select array_agg(distinct b), count(distinct a)
from test_distinct_multi where b=2"
+ qt_not_split_cte_when_same_col "select sum(distinct a), count(distinct
a),avg(distinct a) from test_distinct_multi"
+ qt_not_split_cte_when_same_col_shape "explain shape plan select
sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi"
+ order_qt_not_split_cte_when_same_col_gby "select sum(distinct a),
count(distinct a),avg(distinct a) from test_distinct_multi group by b"
+ qt_not_split_cte_when_same_col_gby__shape "explain shape plan select
sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi
group by b"
}
diff --git a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
index cf93cad471c..5903b9e474e 100644
--- a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
+++ b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
@@ -70,10 +70,7 @@ suite("analyze_agg") {
tt2.c;
"""
- test {
- sql "select count(distinct t2.b), variance(distinct t2.c) from t2"
- exception "variance(DISTINCT c#2) can't support multi distinct."
- }
+ qt_multi_agg_distinct_func "select count(distinct t2.b), variance(distinct
t2.c) from t2"
// should not bind g /g in group by again, otherwise will throw exception
sql "select g / g as nu, sum(c) from t2 group by nu"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]