This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a8864637701 [fix](Nereids) set correct sort key for aggregate (#45369)
a8864637701 is described below
commit a8864637701093e5aefe8996bc68ad88a944e143
Author: minghong <[email protected]>
AuthorDate: Wed Dec 18 14:33:10 2024 +0800
[fix](Nereids) set correct sort key for aggregate (#45369)
### What problem does this PR solve?
in previous #44042, we supported more patterns for PushTopnToAgg rule.
the new pattern:
topn
+-->agg(global)
+-->shuffle
+-->agg(local)
In order to support this new pattern, the group by keys and orderkeys
are identical, but group keys can be in different order.
that is
topn(orderkey=[B,A])->agg(groupkey=[A,B,C])
=>
topn(orderkey=[B, A, C]) ->agg(groupKey=[A, B, C])
---
.../doris/nereids/jobs/executor/Analyzer.java | 2 +-
.../doris/nereids/jobs/executor/Rewriter.java | 2 +
.../nereids/processor/post/PushTopnToAgg.java | 161 ++++-----------
.../org/apache/doris/nereids/rules/RuleType.java | 1 +
.../rules/analysis/CompressedMaterialize.java | 12 +-
.../nereids/rules/rewrite/LimitAggToTopNAgg.java | 221 ++++++++++++++-------
.../rules/rewrite/SimplifyEncodeDecode.java | 68 +++++++
.../doris/nereids/trees/plans/algebra/Project.java | 13 ++
.../trees/plans/logical/LogicalAggregate.java | 5 +
.../java/org/apache/doris/qe/SessionVariable.java | 2 -
.../data/query_p0/limit/test_group_by_limit.out | 26 +--
...aggr_distinct_through_join_one_side_cust.groovy | 48 +++--
.../nereids_tpch_p0/tpch/push_topn_to_agg.groovy | 111 +++--------
.../query_p0/limit/test_group_by_limit.groovy | 101 ++++++++--
14 files changed, 428 insertions(+), 345 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
index 8985dadc0bd..03dbb6c7110 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
@@ -167,7 +167,7 @@ public class Analyzer extends AbstractBatchJobExecutor {
topDown(new EliminateGroupByConstant()),
topDown(new SimplifyAggGroupBy()),
- topDown(new CompressedMaterialize()),
+ bottomUp(new CompressedMaterialize()),
topDown(new NormalizeAggregate()),
topDown(new HavingToFilter()),
topDown(new QualifyToFilter()),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index dbaaf2a6b32..5b276258263 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -132,6 +132,7 @@ import
org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion;
import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows;
import org.apache.doris.nereids.rules.rewrite.ReorderJoin;
import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren;
+import org.apache.doris.nereids.rules.rewrite.SimplifyEncodeDecode;
import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression;
import org.apache.doris.nereids.rules.rewrite.SplitLimit;
import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite;
@@ -371,6 +372,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
// generate one PhysicalLimit if current
distribution is gather or two
// PhysicalLimits with gather exchange
topDown(new LimitSortToTopN()),
+ topDown(new SimplifyEncodeDecode()),
topDown(new LimitAggToTopNAgg()),
topDown(new MergeTopNs()),
topDown(new SplitLimit()),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java
index aca3f21a7d1..d3161a0dba3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java
@@ -21,157 +21,84 @@
package org.apache.doris.nereids.processor.post;
import org.apache.doris.nereids.CascadesContext;
-import org.apache.doris.nereids.properties.DistributionSpecGather;
-import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.plans.AggMode;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute;
import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate;
import
org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate.TopnPushInfo;
-import org.apache.doris.nereids.trees.plans.physical.PhysicalLimit;
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.qe.ConnectContext;
-import org.apache.hadoop.util.Lists;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
/**
- * Add SortInfo to Agg. This SortInfo is used as boundary, not used to sort
elements.
+ * Add TopNInfo to Agg. This TopNInfo is used as boundary, not used to sort
elements.
* example
* sql: select count(*) from orders group by o_clerk order by o_clerk limit 1;
* plan: topn(1) -> aggGlobal -> shuffle -> aggLocal -> scan
* optimization: aggLocal and aggGlobal only need to generate the smallest row
with respect to o_clerk.
*
- * TODO: the following case is not covered:
- * sql: select sum(o_shippriority) from orders group by o_clerk limit 1;
- * plan: limit -> aggGlobal -> shuffle -> aggLocal -> scan
- * aggGlobal may receive partial aggregate results, and hence is not supported
now
- * instance1: input (key=2, v=1) => localAgg => (2, 1) => aggGlobal inst1 =>
(2, 1)
- * instance2: input (key=1, v=1), (key=2, v=2) => localAgg inst2 => (1, 1)
- * (2,1),(1,1) => limit => may output (2, 1), which is not complete, missing
(2, 2) in instance2
- *
- *TOPN:
- * Precondition: topn orderkeys are the prefix of group keys
- * TODO: topnKeys could be subset of groupKeys. This will be implemented in
future
- * Pattern 2-phase agg:
- * topn -> aggGlobal -> distribute -> aggLocal
- * =>
- * topn(n) -> aggGlobal(topn=n) -> distribute -> aggLocal(topn=n)
- * Pattern 1-phase agg:
- * topn->agg->Any(not agg) -> topn -> agg(topn=n) -> any
- *
- * LIMIT:
- * Pattern 1: limit->agg(1phase)->any
- * Pattern 2: limit->agg(global)->gather->agg(local)
+ * This rule only applies to the patterns
+ * 1. topn->project->agg, or
+ * 2. topn->agg
+ * that
+ * 1. orderKeys and groupkeys are one-one mapping
+ * 2. aggregate is not scalar agg
+ * Refer to LimitAggToTopNAgg rule.
*/
public class PushTopnToAgg extends PlanPostProcessor {
@Override
public Plan visitPhysicalTopN(PhysicalTopN<? extends Plan> topN,
CascadesContext ctx) {
topN.child().accept(this, ctx);
- if (ConnectContext.get().getSessionVariable().topnOptLimitThreshold <=
topN.getLimit() + topN.getOffset()) {
+ if (ConnectContext.get().getSessionVariable().topnOptLimitThreshold <=
topN.getLimit() + topN.getOffset()
+ && !ConnectContext.get().getSessionVariable().pushTopnToAgg) {
return topN;
}
- Plan topnChild = topN.child();
- if (topnChild instanceof PhysicalProject) {
- topnChild = topnChild.child(0);
+ Plan topNChild = topN.child();
+ if (topNChild instanceof PhysicalProject) {
+ topNChild = topNChild.child(0);
}
- if (topnChild instanceof PhysicalHashAggregate) {
- PhysicalHashAggregate<? extends Plan> upperAgg =
(PhysicalHashAggregate<? extends Plan>) topnChild;
- List<OrderKey> orderKeys =
tryGenerateOrderKeyByGroupKeyAndTopnKey(topN, upperAgg);
- if (!orderKeys.isEmpty()) {
-
- if (upperAgg.getAggPhase().isGlobal() && upperAgg.getAggMode()
== AggMode.BUFFER_TO_RESULT) {
- upperAgg.setTopnPushInfo(new TopnPushInfo(
- orderKeys,
- topN.getLimit() + topN.getOffset()));
- if (upperAgg.child() instanceof PhysicalDistribute
- && upperAgg.child().child(0) instanceof
PhysicalHashAggregate) {
- PhysicalHashAggregate<? extends Plan> bottomAgg =
- (PhysicalHashAggregate<? extends Plan>)
upperAgg.child().child(0);
+ if (topNChild instanceof PhysicalHashAggregate) {
+ PhysicalHashAggregate<? extends Plan> upperAgg =
(PhysicalHashAggregate<? extends Plan>) topNChild;
+ if (isGroupKeyIdenticalToOrderKey(topN, upperAgg)) {
+ upperAgg.setTopnPushInfo(new TopnPushInfo(
+ topN.getOrderKeys(),
+ topN.getLimit() + topN.getOffset()));
+ if (upperAgg.child() instanceof PhysicalDistribute
+ && upperAgg.child().child(0) instanceof
PhysicalHashAggregate) {
+ PhysicalHashAggregate<? extends Plan> bottomAgg =
+ (PhysicalHashAggregate<? extends Plan>)
upperAgg.child().child(0);
+ if (isGroupKeyIdenticalToOrderKey(topN, bottomAgg)) {
+ bottomAgg.setTopnPushInfo(new TopnPushInfo(
+ topN.getOrderKeys(),
+ topN.getLimit() + topN.getOffset()));
+ }
+ } else if (upperAgg.child() instanceof PhysicalHashAggregate) {
+ // multi-distinct plan
+ PhysicalHashAggregate<? extends Plan> bottomAgg =
+ (PhysicalHashAggregate<? extends Plan>)
upperAgg.child();
+ if (isGroupKeyIdenticalToOrderKey(topN, bottomAgg)) {
bottomAgg.setTopnPushInfo(new TopnPushInfo(
- orderKeys,
+ topN.getOrderKeys(),
topN.getLimit() + topN.getOffset()));
}
- } else if (upperAgg.getAggPhase().isLocal() &&
upperAgg.getAggMode() == AggMode.INPUT_TO_RESULT) {
- // one phase agg
- upperAgg.setTopnPushInfo(new TopnPushInfo(
- orderKeys,
- topN.getLimit() + topN.getOffset()));
}
}
}
return topN;
}
- /**
- return true, if topn order-key is prefix of agg group-key, ignore
asc/desc and null_first
- TODO order-key can be subset of group-key. BE does not support now.
- */
- private List<OrderKey>
tryGenerateOrderKeyByGroupKeyAndTopnKey(PhysicalTopN<? extends Plan> topN,
- PhysicalHashAggregate<? extends Plan> agg) {
- List<OrderKey> orderKeys =
Lists.newArrayListWithCapacity(agg.getGroupByExpressions().size());
- if (topN.getOrderKeys().size() > agg.getGroupByExpressions().size()) {
- return orderKeys;
- }
- List<Expression> topnKeys = topN.getOrderKeys().stream()
- .map(OrderKey::getExpr).collect(Collectors.toList());
- for (int i = 0; i < topN.getOrderKeys().size(); i++) {
- // prefix check
- if (!topnKeys.get(i).equals(agg.getGroupByExpressions().get(i))) {
- return Lists.newArrayList();
- }
- orderKeys.add(topN.getOrderKeys().get(i));
- }
- for (int i = topN.getOrderKeys().size(); i <
agg.getGroupByExpressions().size(); i++) {
- orderKeys.add(new OrderKey(agg.getGroupByExpressions().get(i),
true, false));
- }
- return orderKeys;
- }
-
- @Override
- public Plan visitPhysicalLimit(PhysicalLimit<? extends Plan> limit,
CascadesContext ctx) {
- limit.child().accept(this, ctx);
- if (ConnectContext.get().getSessionVariable().topnOptLimitThreshold <=
limit.getLimit() + limit.getOffset()) {
- return limit;
+ private boolean isGroupKeyIdenticalToOrderKey(PhysicalTopN<? extends Plan>
topN,
+
PhysicalHashAggregate<? extends Plan> agg) {
+ if (topN.getOrderKeys().size() != agg.getGroupByExpressions().size()) {
+ return false;
}
- Plan limitChild = limit.child();
- if (limitChild instanceof PhysicalProject) {
- limitChild = limitChild.child(0);
- }
- if (limitChild instanceof PhysicalHashAggregate) {
- PhysicalHashAggregate<? extends Plan> upperAgg =
(PhysicalHashAggregate<? extends Plan>) limitChild;
- if (upperAgg.getAggPhase().isGlobal() && upperAgg.getAggMode() ==
AggMode.BUFFER_TO_RESULT) {
- Plan child = upperAgg.child();
- Plan grandChild = child.child(0);
- if (child instanceof PhysicalDistribute
- && ((PhysicalDistribute<?>)
child).getDistributionSpec() instanceof DistributionSpecGather
- && grandChild instanceof PhysicalHashAggregate) {
- upperAgg.setTopnPushInfo(new TopnPushInfo(
- generateOrderKeyByGroupKey(upperAgg),
- limit.getLimit() + limit.getOffset()));
- PhysicalHashAggregate<? extends Plan> bottomAgg =
- (PhysicalHashAggregate<? extends Plan>) grandChild;
- bottomAgg.setTopnPushInfo(new TopnPushInfo(
- generateOrderKeyByGroupKey(bottomAgg),
- limit.getLimit() + limit.getOffset()));
- }
- } else if (upperAgg.getAggMode() == AggMode.INPUT_TO_RESULT) {
- // 1-phase agg
- upperAgg.setTopnPushInfo(new TopnPushInfo(
- generateOrderKeyByGroupKey(upperAgg),
- limit.getLimit() + limit.getOffset()));
+ for (int i = 0; i < agg.getGroupByExpressions().size(); i++) {
+ Expression groupByKey = agg.getGroupByExpressions().get(i);
+ Expression orderKey = topN.getOrderKeys().get(i).getExpr();
+ if (!groupByKey.equals(orderKey)) {
+ return false;
}
}
- return limit;
- }
-
- private List<OrderKey> generateOrderKeyByGroupKey(PhysicalHashAggregate<?
extends Plan> agg) {
- return agg.getGroupByExpressions().stream()
- .map(key -> new OrderKey(key, true, false))
- .collect(Collectors.toList());
+ return true;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index e25050b012d..d348889818a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -108,6 +108,7 @@ public enum RuleType {
// rewrite rules
COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE),
COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE),
+ SIMPLIFY_ENCODE_DECODE(RuleTypeClass.REWRITE),
NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE),
NORMALIZE_SORT(RuleTypeClass.REWRITE),
NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
index 7d8a7664f82..814269a6398 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
@@ -78,7 +78,7 @@ public class CompressedMaterialize implements
AnalysisRuleFactory {
private LogicalSort<Plan> compressMaterializeSort(LogicalSort<Plan> sort) {
List<OrderKey> newOrderKeys = Lists.newArrayList();
- boolean changed = false;
+ List<Expression> orderKeysToEncode = Lists.newArrayList();
for (OrderKey orderKey : sort.getOrderKeys()) {
Expression expr = orderKey.getExpr();
Optional<Expression> encode = getEncodeExpression(expr);
@@ -86,12 +86,18 @@ public class CompressedMaterialize implements
AnalysisRuleFactory {
newOrderKeys.add(new OrderKey(encode.get(),
orderKey.isAsc(),
orderKey.isNullFirst()));
- changed = true;
+ orderKeysToEncode.add(expr);
} else {
newOrderKeys.add(orderKey);
}
}
- return changed ? sort.withOrderKeys(newOrderKeys) : sort;
+ if (orderKeysToEncode.isEmpty()) {
+ return sort;
+ } else {
+ sort = sort.withOrderKeys(newOrderKeys);
+ return sort;
+ }
+
}
private Optional<Expression> getEncodeExpression(Expression expression) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LimitAggToTopNAgg.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LimitAggToTopNAgg.java
index dfa1230a8f8..f0f9c434725 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LimitAggToTopNAgg.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LimitAggToTopNAgg.java
@@ -17,24 +17,29 @@
package org.apache.doris.nereids.rules.rewrite;
+import org.apache.doris.common.Pair;
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
-import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import java.util.HashMap;
import java.util.List;
-import java.util.Optional;
+import java.util.Map;
+import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -54,109 +59,171 @@ public class LimitAggToTopNAgg implements
RewriteRuleFactory {
&&
ConnectContext.get().getSessionVariable().pushTopnToAgg
&&
ConnectContext.get().getSessionVariable().topnOptLimitThreshold
>= limit.getLimit() + limit.getOffset())
+ .when(limit -> {
+ LogicalAggregate<? extends Plan> agg =
limit.child();
+ return !agg.getGroupByExpressions().isEmpty() &&
!agg.getSourceRepeat().isPresent();
+ })
.then(limit -> {
LogicalAggregate<? extends Plan> agg =
limit.child();
- Optional<OrderKey> orderKeysOpt =
tryGenerateOrderKeyByTheFirstGroupKey(agg);
- if (!orderKeysOpt.isPresent()) {
- return null;
- }
- List<OrderKey> orderKeys =
Lists.newArrayList(orderKeysOpt.get());
+ List<OrderKey> orderKeys =
generateOrderKeyByGroupKey(agg);
return new LogicalTopN<>(orderKeys,
limit.getLimit(), limit.getOffset(), agg);
}).toRule(RuleType.LIMIT_AGG_TO_TOPN_AGG),
- //limit->project->agg to topn->project->agg
+ //limit->project->agg to project->topn->agg
logicalLimit(logicalProject(logicalAggregate()))
.when(limit -> ConnectContext.get() != null
&&
ConnectContext.get().getSessionVariable().pushTopnToAgg
&&
ConnectContext.get().getSessionVariable().topnOptLimitThreshold
>= limit.getLimit() + limit.getOffset())
+ .when(limit -> {
+ LogicalAggregate<? extends Plan> agg =
limit.child().child();
+ return !agg.getGroupByExpressions().isEmpty() &&
!agg.getSourceRepeat().isPresent();
+ })
.then(limit -> {
LogicalProject<? extends Plan> project =
limit.child();
- LogicalAggregate<? extends Plan> agg
- = (LogicalAggregate<? extends Plan>)
project.child();
- Optional<OrderKey> orderKeysOpt =
tryGenerateOrderKeyByTheFirstGroupKey(agg);
- if (!orderKeysOpt.isPresent()) {
- return null;
- }
- List<OrderKey> orderKeys =
Lists.newArrayList(orderKeysOpt.get());
- Plan result;
-
- if (outputAllGroupKeys(limit, agg)) {
- result = new LogicalTopN<>(orderKeys,
limit.getLimit(),
- limit.getOffset(), project);
- } else {
- // add the first group by key to topn, and
prune this key by upper project
- // topn order keys are prefix of group by keys
- // refer to
PushTopnToAgg.tryGenerateOrderKeyByGroupKeyAndTopnKey()
- Expression firstGroupByKey =
agg.getGroupByExpressions().get(0);
- if (!(firstGroupByKey instanceof
SlotReference)) {
- return null;
- }
- boolean shouldPruneFirstGroupByKey = true;
- if
(project.getOutputs().contains(firstGroupByKey)) {
- shouldPruneFirstGroupByKey = false;
- } else {
- List<NamedExpression> bottomProjections =
Lists.newArrayList(project.getProjects());
- bottomProjections.add((SlotReference)
firstGroupByKey);
- project =
project.withProjects(bottomProjections);
- }
- LogicalTopN topn = new
LogicalTopN<>(orderKeys, limit.getLimit(),
- limit.getOffset(), project);
- if (shouldPruneFirstGroupByKey) {
- List<NamedExpression> limitOutput =
limit.getOutput().stream()
- .map(e -> (NamedExpression)
e).collect(Collectors.toList());
- result = new LogicalProject<>(limitOutput,
topn);
- } else {
- result = topn;
- }
- }
- return result;
+ LogicalAggregate<? extends Plan> agg =
(LogicalAggregate<? extends Plan>) project.child();
+ List<OrderKey> orderKeys =
generateOrderKeyByGroupKey(agg);
+ LogicalTopN topn = new LogicalTopN<>(orderKeys,
limit.getLimit(),
+ limit.getOffset(), agg);
+ project = (LogicalProject<? extends Plan>)
project.withChildren(topn);
+ return project;
}).toRule(RuleType.LIMIT_AGG_TO_TOPN_AGG),
- // topn -> agg: add all group key to sort key, if sort key is
prefix of group key
+ // topn -> agg: append group key(if it is not sort key) to
sort key
logicalTopN(logicalAggregate())
.when(topn -> ConnectContext.get() != null
&&
ConnectContext.get().getSessionVariable().pushTopnToAgg
&&
ConnectContext.get().getSessionVariable().topnOptLimitThreshold
>= topn.getLimit() + topn.getOffset())
+ .when(topn -> {
+ LogicalAggregate<? extends Plan> agg =
topn.child();
+ return !agg.getGroupByExpressions().isEmpty() &&
!agg.getSourceRepeat().isPresent();
+ })
.then(topn -> {
- LogicalAggregate<? extends Plan> agg =
(LogicalAggregate<? extends Plan>) topn.child();
- List<OrderKey> newOrders =
tryGenerateOrderKeyByGroupKeyAndTopnKey(topn, agg);
- if (newOrders.isEmpty()) {
- return topn;
+ LogicalAggregate<? extends Plan> agg =
topn.child();
+ Pair<List<OrderKey>, List<Expression>> pair =
+
supplementOrderKeyByGroupKeyIfCompatible(topn, agg);
+ if (pair != null) {
+ agg = agg.withGroupBy(pair.second);
+ topn = (LogicalTopN) topn.withChildren(agg);
+ topn = (LogicalTopN)
topn.withOrderKeys(pair.first);
+ }
+ return topn;
+ }).toRule(RuleType.LIMIT_AGG_TO_TOPN_AGG),
+ //topn -> project ->agg: add all group key to sort key, and
prune column
+ logicalTopN(logicalProject(logicalAggregate()))
+ .when(topn -> ConnectContext.get() != null
+ &&
ConnectContext.get().getSessionVariable().pushTopnToAgg
+ &&
ConnectContext.get().getSessionVariable().topnOptLimitThreshold
+ >= topn.getLimit() + topn.getOffset())
+ .when(topn -> {
+ LogicalAggregate<? extends Plan> agg =
topn.child().child();
+ return !agg.getGroupByExpressions().isEmpty() &&
!agg.getSourceRepeat().isPresent();
+ })
+ .then(topn -> {
+ LogicalTopN originTopn = topn;
+ LogicalProject<? extends Plan> project =
topn.child();
+ LogicalAggregate<? extends Plan> agg =
(LogicalAggregate) project.child();
+ if (!project.isAllSlots()) {
+ /*
+ topn(orderKey=[a])
+ +-->project(b as a)
+ +--> agg(groupKey[b]
+ =>
+ topn(orderKey=[b])
+ +-->project(b as a)
+ +-->agg(groupKey[b])
+ and then exchange topn and project
+ */
+ Map<SlotReference, SlotReference> keyAsKey =
new HashMap<>();
+ for (NamedExpression e :
project.getProjects()) {
+ if (e instanceof Alias && e.child(0)
instanceof SlotReference) {
+ keyAsKey.put((SlotReference)
e.toSlot(), (SlotReference) e.child(0));
+ }
+ }
+ List<OrderKey> projectOrderKeys =
Lists.newArrayList();
+ boolean hasNew = false;
+ for (OrderKey orderKey : topn.getOrderKeys()) {
+ if
(keyAsKey.containsKey(orderKey.getExpr())) {
+
projectOrderKeys.add(orderKey.withExpression(keyAsKey.get(orderKey.getExpr())));
+ hasNew = true;
+ } else {
+ projectOrderKeys.add(orderKey);
+ }
+ }
+ if (hasNew) {
+ topn = (LogicalTopN)
topn.withOrderKeys(projectOrderKeys);
+ }
+ }
+ Pair<List<OrderKey>, List<Expression>> pair =
+
supplementOrderKeyByGroupKeyIfCompatible(topn, agg);
+ Plan result;
+ if (pair == null) {
+ result = originTopn;
} else {
- return topn.withOrderKeys(newOrders);
+ agg = agg.withGroupBy(pair.second);
+ topn = (LogicalTopN)
topn.withOrderKeys(pair.first);
+ if (isOrderKeysInProject(topn, project)) {
+ project = (LogicalProject<? extends Plan>)
project.withChildren(agg);
+ topn =
(LogicalTopN<LogicalProject<LogicalAggregate<Plan>>>)
+ topn.withChildren(project);
+ result = topn;
+ } else {
+ topn = (LogicalTopN)
topn.withChildren(agg);
+ project = (LogicalProject<? extends Plan>)
project.withChildren(topn);
+ result = project;
+ }
}
- }).toRule(RuleType.LIMIT_AGG_TO_TOPN_AGG));
+ return result;
+ }).toRule(RuleType.LIMIT_AGG_TO_TOPN_AGG)
+ );
}
- private List<OrderKey>
tryGenerateOrderKeyByGroupKeyAndTopnKey(LogicalTopN<? extends Plan> topN,
- LogicalAggregate<? extends Plan> agg) {
- List<OrderKey> orderKeys =
Lists.newArrayListWithCapacity(agg.getGroupByExpressions().size());
- if (topN.getOrderKeys().size() > agg.getGroupByExpressions().size()) {
- return orderKeys;
- }
- List<Expression> topnKeys = topN.getOrderKeys().stream()
- .map(OrderKey::getExpr).collect(Collectors.toList());
- for (int i = 0; i < topN.getOrderKeys().size(); i++) {
- // prefix check
- if (!topnKeys.get(i).equals(agg.getGroupByExpressions().get(i))) {
- return Lists.newArrayList();
+ private boolean isOrderKeysInProject(LogicalTopN<? extends Plan> topn,
LogicalProject project) {
+ Set<Slot> projectSlots = project.getOutputSet();
+ for (OrderKey orderKey : topn.getOrderKeys()) {
+ if (!projectSlots.contains(orderKey.getExpr())) {
+ return false;
}
- orderKeys.add(topN.getOrderKeys().get(i));
- }
- for (int i = topN.getOrderKeys().size(); i <
agg.getGroupByExpressions().size(); i++) {
- orderKeys.add(new OrderKey(agg.getGroupByExpressions().get(i),
true, false));
}
- return orderKeys;
+ return true;
}
- private boolean outputAllGroupKeys(LogicalLimit limit, LogicalAggregate
agg) {
- return limit.getOutputSet().containsAll(agg.getGroupByExpressions());
+ private List<OrderKey> generateOrderKeyByGroupKey(LogicalAggregate<?
extends Plan> agg) {
+ return agg.getGroupByExpressions().stream()
+ .map(key -> new OrderKey(key, true, false))
+ .collect(Collectors.toList());
}
- private Optional<OrderKey>
tryGenerateOrderKeyByTheFirstGroupKey(LogicalAggregate<? extends Plan> agg) {
- if (agg.getGroupByExpressions().isEmpty()) {
- return Optional.empty();
+ /**
+ * compatible: if order key is subset of group by keys
+ * example:
+ * 1. orderKey[a, b], groupKeys[b, a, c]
+ * compatible, return Pair(orderKey[a, b, c], groupKey[a, b, c])
+ * 2. orderKey[a, b+1], groupKeys[a, b]
+ * not compatible, return null
+ */
+ private Pair<List<OrderKey>, List<Expression>>
supplementOrderKeyByGroupKeyIfCompatible(
+ LogicalTopN<? extends Plan> topn, LogicalAggregate<? extends Plan>
agg) {
+ Set<Expression> groupKeySet =
Sets.newHashSet(agg.getGroupByExpressions());
+ List<Expression> orderKeyList = topn.getOrderKeys().stream()
+ .map(OrderKey::getExpr).collect(Collectors.toList());
+ Set<Expression> orderKeySet = Sets.newHashSet(orderKeyList);
+ boolean compatible = groupKeySet.containsAll(orderKeyList);
+ if (compatible) {
+ List<OrderKey> newOrderKeys =
Lists.newArrayList(topn.getOrderKeys());
+ List<Expression> newGroupExpressions =
Lists.newArrayListWithCapacity(agg.getGroupByExpressions().size());
+ for (OrderKey orderKey : newOrderKeys) {
+ newGroupExpressions.add(orderKey.getExpr());
+ }
+
+ for (Expression groupKey : agg.getGroupByExpressions()) {
+ if (!orderKeySet.contains(groupKey)) {
+ newOrderKeys.add(new OrderKey(groupKey, true, false));
+ newGroupExpressions.add(groupKey);
+ }
+ }
+ return Pair.of(newOrderKeys, newGroupExpressions);
+ } else {
+ return null;
}
- return Optional.of(new OrderKey(agg.getGroupByExpressions().get(0),
true, false));
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyEncodeDecode.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyEncodeDecode.java
new file mode 100644
index 00000000000..b6513fc7580
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyEncodeDecode.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.List;
+
+/**
+ * project (..., encode(decode(A)) as B, ...)
+ * =>
+ * project (..., A as B,...)
+ */
+public class SimplifyEncodeDecode implements RewriteRuleFactory {
+
+ @Override
+ public List<Rule> buildRules() {
+ return ImmutableList.of(
+ RuleType.SIMPLIFY_ENCODE_DECODE.build(
+ logicalProject()
+ .then(project -> {
+ List<NamedExpression> newProjections =
+
Lists.newArrayListWithCapacity(project.getProjects().size());
+ boolean changed = false;
+ for (NamedExpression namedExpression :
project.getProjects()) {
+ if (namedExpression instanceof Alias
+ && namedExpression.child(0)
instanceof EncodeStrToInteger
+ &&
namedExpression.child(0).child(0)
+ instanceof DecodeAsVarchar) {
+ Alias alias = (Alias)
namedExpression;
+ Expression body =
namedExpression.child(0)
+ .child(0).child(0);
+ newProjections.add((Alias)
alias.withChildren(body));
+ changed = true;
+ } else {
+
newProjections.add(namedExpression);
+ }
+ }
+ return changed ?
project.withProjects(newProjections) : project;
+ })
+ )
+ );
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java
index a5d15f1d515..60625314835 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Project.java
@@ -18,6 +18,7 @@
package org.apache.doris.nereids.trees.plans.algebra;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
@@ -108,6 +109,18 @@ public interface Project {
return true;
}
+ /**
+ * project(A as B) is eventually slot project, where A is a slot
+ */
+ default boolean isEventuallyAllSlots() {
+ for (NamedExpression project : getProjects()) {
+ if (!project.isSlot() && !(project instanceof Alias &&
project.child(0) instanceof Slot)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
/** containsNoneMovableFunction */
default boolean containsNoneMovableFunction() {
for (NamedExpression expression : getProjects()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
index d80b4f3166d..d7d7767e602 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
@@ -273,6 +273,11 @@ public class LogicalAggregate<CHILD_TYPE extends Plan>
hasPushed, sourceRepeat, Optional.empty(), Optional.empty(),
child());
}
+ public LogicalAggregate<Plan> withGroupBy(List<Expression>
groupByExprList) {
+ return new LogicalAggregate<>(groupByExprList, outputExpressions,
normalized, ordinalIsResolved, generated,
+ hasPushed, sourceRepeat, Optional.empty(), Optional.empty(),
child());
+ }
+
public LogicalAggregate<Plan> withChildGroupByAndOutput(List<Expression>
groupByExprList,
List<NamedExpression> outputExpressionList, Plan newChild) {
return new LogicalAggregate<>(groupByExprList, outputExpressionList,
normalized, ordinalIsResolved, generated,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 380c758e575..126ed1135e9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2391,13 +2391,11 @@ public class SessionVariable implements Serializable,
Writable {
this.rewriteOrToInPredicateThreshold = 100000;
this.enableFunctionPushdown = false;
this.enableDeleteSubPredicateV2 = false;
- this.topnOptLimitThreshold = 0;
this.enableSyncRuntimeFilterSize = true;
} else {
this.rewriteOrToInPredicateThreshold = 2;
this.enableFunctionPushdown = true;
this.enableDeleteSubPredicateV2 = true;
- this.topnOptLimitThreshold = 1024;
this.enableSyncRuntimeFilterSize = false;
}
diff --git a/regression-test/data/query_p0/limit/test_group_by_limit.out
b/regression-test/data/query_p0/limit/test_group_by_limit.out
index d9ac2a2481a..4a396cdaca8 100644
--- a/regression-test/data/query_p0/limit/test_group_by_limit.out
+++ b/regression-test/data/query_p0/limit/test_group_by_limit.out
@@ -1,65 +1,65 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
--- !select --
+-- !select1 --
253967024 8491 AIR
259556658 8641 FOB
260402265 8669 MAIL
--- !select --
+-- !select2 --
449872500 15000 1
386605746 12900 2
320758616 10717 3
--- !select --
+-- !select3 --
198674527 6588 0.0
198679731 6563 0.01
198501055 6622 0.02
--- !select --
+-- !select4 --
27137 1 1992-02-02
45697 1 1992-02-04
114452 5 1992-02-05
--- !select --
+-- !select5 --
27137 1 1992-02-02T00:00
45697 1 1992-02-04T00:00
114452 5 1992-02-05T00:00
--- !select --
+-- !select6 --
139015016 4632 1
130287219 4313 2
162309750 5334 3
--- !select --
+-- !select7 --
64774969 2166 AIR 1
54166166 1804 AIR 2
45538267 1532 AIR 3
--- !select --
+-- !select8 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02
--- !select --
+-- !select9 --
7618 1 AIR 1 0.0 1992-02-06
2210 1 AIR 1 0.0 1992-03-24
16807 1 AIR 1 0.0 1992-03-29
--- !select --
+-- !select10 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02
--- !select --
+-- !select11 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02
--- !select --
+-- !select12 --
7707018 238 TRUCK 1 0.0
7467045 233 TRUCK 1 0.01
6927206 245 TRUCK 1 0.02
--- !select --
+-- !select13 --
7661562 249 TRUCK 1 0.08
6673139 228 TRUCK 1 0.07
8333862 265 TRUCK 1 0.06
diff --git
a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy
b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy
index 9134d66b76d..cc75e289a60 100644
---
a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy
+++
b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_aggr_distinct_through_join_one_side_cust.groovy
@@ -87,18 +87,22 @@ suite("push_down_aggr_distinct_through_join_one_side_cust")
{
);
"""
+ sql """
+ set topn_opt_limit_threshold=1024;
+ """
+
explain {
- sql("physical PLAN SELECT
/*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/" +
- "COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.gz_user_id)
AS a2c1a830_1," +
- "dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6," +
- "dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123 " +
- "FROM dwd_tracking_sensor_init_tmp_ymds " +
- "LEFT JOIN dwd_com_abtest_result_inc_ymds " +
- "ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id =
dwd_com_abtest_result_inc_ymds.user_key " +
- "AND dwd_tracking_sensor_init_tmp_ymds.dt =
dwd_com_abtest_result_inc_ymds.dt " +
- "WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN
'2024-08-15' AND '2024-08-15' " +
- "AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15'
AND '2024-08-15' " +
- "GROUP BY 2, 3 ORDER BY 3 asc limit 10000;");
+ sql("""physical PLAN SELECT
/*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/
+ COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.gz_user_id)
AS a2c1a830_1,
+ dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6,
+ dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123
+ FROM dwd_tracking_sensor_init_tmp_ymds
+ LEFT JOIN dwd_com_abtest_result_inc_ymds
+ ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id =
dwd_com_abtest_result_inc_ymds.user_key
+ AND dwd_tracking_sensor_init_tmp_ymds.dt =
dwd_com_abtest_result_inc_ymds.dt
+ WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN
'2024-08-15' AND '2024-08-15'
+ AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15' AND
'2024-08-15'
+ GROUP BY 2, 3 ORDER BY 3 asc limit 10000;""");
contains"groupByExpr=[gz_user_id#1, dt#2]"
contains"groupByExpr=[gz_user_id#1, dt#2, group_name#5],
outputExpr=[gz_user_id#1, dt#2, group_name#5]"
contains"[group_name#5, dt#2]"
@@ -106,17 +110,17 @@
suite("push_down_aggr_distinct_through_join_one_side_cust") {
}
explain {
- sql("physical PLAN SELECT
/*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/" +
- "COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.ip) AS
a2c1a830_1," +
- "dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6," +
- "dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123 " +
- "FROM dwd_tracking_sensor_init_tmp_ymds " +
- "LEFT JOIN dwd_com_abtest_result_inc_ymds " +
- "ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id =
dwd_com_abtest_result_inc_ymds.user_key " +
- "AND dwd_tracking_sensor_init_tmp_ymds.dt =
dwd_com_abtest_result_inc_ymds.dt " +
- "WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN
'2024-08-15' AND '2024-08-15' " +
- "AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15'
AND '2024-08-15' " +
- "GROUP BY 2, 3 ORDER BY 3 asc limit 10000;");
+ sql("""physical PLAN SELECT
/*+use_cbo_rule(PUSH_DOWN_AGG_WITH_DISTINCT_THROUGH_JOIN_ONE_SIDE)*/
+ COUNT(DISTINCT dwd_tracking_sensor_init_tmp_ymds.ip) AS
a2c1a830_1,
+ dwd_com_abtest_result_inc_ymds.group_name AS ab1011d6,
+ dwd_tracking_sensor_init_tmp_ymds.dt AS ad466123
+ FROM dwd_tracking_sensor_init_tmp_ymds
+ LEFT JOIN dwd_com_abtest_result_inc_ymds
+ ON dwd_tracking_sensor_init_tmp_ymds.gz_user_id =
dwd_com_abtest_result_inc_ymds.user_key
+ AND dwd_tracking_sensor_init_tmp_ymds.dt =
dwd_com_abtest_result_inc_ymds.dt
+ WHERE dwd_tracking_sensor_init_tmp_ymds.dt BETWEEN
'2024-08-15' AND '2024-08-15'
+ AND dwd_com_abtest_result_inc_ymds.dt BETWEEN '2024-08-15' AND
'2024-08-15'
+ GROUP BY 2, 3 ORDER BY 3 asc limit 10000;""");
contains"groupByExpr=[ip#0, gz_user_id#1, dt#2], outputExpr=[ip#0,
gz_user_id#1, dt#2]"
contains"groupByExpr=[ip#0, dt#2, group_name#5], outputExpr=[ip#0,
dt#2, group_name#5]"
contains"groupByExpr=[group_name#5, dt#2], outputExpr=[group_name#5,
dt#2, partial_count(ip#0) AS `partial_count(ip)`#12]"
diff --git
a/regression-test/suites/nereids_tpch_p0/tpch/push_topn_to_agg.groovy
b/regression-test/suites/nereids_tpch_p0/tpch/push_topn_to_agg.groovy
index 631656a6b19..5ae587910b6 100644
--- a/regression-test/suites/nereids_tpch_p0/tpch/push_topn_to_agg.groovy
+++ b/regression-test/suites/nereids_tpch_p0/tpch/push_topn_to_agg.groovy
@@ -50,22 +50,31 @@ suite("push_topn_to_agg") {
notContains("STREAMING")
}
- // order key should be prefix of group key
+ // order keys are part of group keys,
+ // 1. adjust group keys (o_custkey, o_clerk) -> o_clerk, o_custkey
+ // 2. append o_custkey to order key
explain{
- sql "select o_custkey, sum(o_shippriority), o_clerk from orders group
by o_custkey, o_clerk order by o_clerk, o_custkey limit 11;"
- multiContains("sortByGroupKey:false", 2)
+ sql "select sum(o_shippriority) from orders group by o_custkey,
o_clerk order by o_clerk limit 11;"
+ contains("sortByGroupKey:true")
+ contains("group by: o_clerk[#10], o_custkey[#9]")
+ contains("order by: o_clerk[#18] ASC, o_custkey[#19] ASC")
}
- // order key should be prefix of group key
- explain{
- sql "select o_custkey, o_clerk, sum(o_shippriority) as x from orders
group by o_custkey, o_clerk order by o_custkey, x limit 12;"
- multiContains("sortByGroupKey:false", 2)
+
+ // one distinct
+ explain {
+ sql "select sum(distinct o_shippriority) from orders group by
o_orderkey limit 13; "
+ contains("VTOP-N")
+ contains("order by: o_orderkey")
+ multiContains("sortByGroupKey:true", 1)
}
- // one phase agg is optimized
+ // multi distinct
explain {
- sql "select sum(o_shippriority) from orders group by o_orderkey limit
13; "
- contains("sortByGroupKey:true")
+ sql "select count(distinct o_clerk), sum(distinct o_shippriority) from
orders group by o_orderkey limit 14; "
+ contains("VTOP-N")
+ contains("order by: o_orderkey")
+ multiContains("sortByGroupKey:true", 2)
}
// use group key as sort key to enable topn-push opt
@@ -74,22 +83,17 @@ suite("push_topn_to_agg") {
contains("sortByGroupKey:true")
}
- // group key is part of output of limit, apply opt
+ // group key is expression
explain {
- sql "select sum(o_shippriority), o_clerk from orders group by o_clerk
limit 15; "
+ sql "select sum(o_shippriority), o_clerk+1 from orders group by
o_clerk+1 limit 15; "
contains("sortByGroupKey:true")
}
- // order key is not prefix of group key
+ // order key is not part of group key
explain {
sql "select o_custkey, sum(o_shippriority) from orders group by
o_custkey order by o_custkey+1 limit 16; "
contains("sortByGroupKey:false")
- }
-
- // order key is not prefix of group key
- explain {
- sql "select o_custkey, sum(o_shippriority) from orders group by
o_custkey order by o_custkey+1 limit 17; "
- contains("sortByGroupKey:false")
+ notContains("sortByGroupKey:true")
}
// topn + one phase agg
@@ -97,73 +101,4 @@ suite("push_topn_to_agg") {
sql "select sum(ps_availqty), ps_partkey, ps_suppkey from partsupp
group by ps_partkey, ps_suppkey order by ps_partkey, ps_suppkey limit 18;"
contains("sortByGroupKey:true")
}
-
- // sort key is prefix of group key, make all group key to sort
key(ps_suppkey) and then apply push-topn-agg rule
- explain {
- sql "select sum(ps_availqty), ps_partkey, ps_suppkey from partsupp
group by ps_partkey, ps_suppkey order by ps_partkey limit 19;"
- contains("sortByGroupKey:true")
- }
-
- explain {
- sql "select sum(ps_availqty), ps_suppkey, ps_availqty from partsupp
group by ps_suppkey, ps_availqty order by ps_suppkey limit 19;"
- contains("sortByGroupKey:true")
- }
-
- // sort key is not prefix of group key, deny
- explain {
- sql "select sum(ps_availqty), ps_partkey, ps_suppkey from partsupp
group by ps_partkey, ps_suppkey order by ps_suppkey limit 20;"
- contains("sortByGroupKey:false")
- }
-
- multi_sql """
- drop table if exists t1;
- CREATE TABLE IF NOT EXISTS t1
- (
- k1 TINYINT
- )
- ENGINE=olap
- AGGREGATE KEY(k1)
- DISTRIBUTED BY HASH(k1) BUCKETS 1
- PROPERTIES (
- "replication_num" = "1"
- );
-
- insert into t1 values (0),(1);
-
- drop table if exists t2;
- CREATE TABLE IF NOT EXISTS t2
- (
- k1 TINYINT
- )
- ENGINE=olap
- AGGREGATE KEY(k1)
- DISTRIBUTED BY HASH(k1) BUCKETS 1
- PROPERTIES (
- "replication_num" = "1"
- );
- insert into t2 values(5),(6);
- """
-
- // the result of following sql may be unstable, run 3 times
- qt_stable_1 """
- select * from (
- select k1 from t1
- UNION
- select k1 from t2
- ) as b order by k1 limit 2;
- """
- qt_stable_2 """
- select * from (
- select k1 from t1
- UNION
- select k1 from t2
- ) as b order by k1 limit 2;
- """
- qt_stable_3 """
- select * from (
- select k1 from t1
- UNION
- select k1 from t2
- ) as b order by k1 limit 2;
- """
}
\ No newline at end of file
diff --git a/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
index 271619c4a93..d6f49f9d7a3 100644
--- a/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
+++ b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
@@ -23,42 +23,99 @@ sql 'set enable_force_spill=false'
sql 'set topn_opt_limit_threshold=10'
+sql "set experimental_ENABLE_COMPRESS_MATERIALIZE=true;"
// different types
-qt_select """ select sum(orderkey), count(partkey), shipmode from
tpch_tiny_lineitem group by shipmode limit 3; """
-
-qt_select """ select sum(orderkey), count(partkey), linenumber from
tpch_tiny_lineitem group by linenumber limit 3; """
+qt_select1 """ select sum(orderkey), count(partkey), shipmode from
tpch_tiny_lineitem group by shipmode limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode from
tpch_tiny_lineitem group by shipmode limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
-qt_select """ select sum(orderkey), count(partkey), tax from
tpch_tiny_lineitem group by tax limit 3; """
+qt_select2 """ select sum(orderkey), count(partkey), linenumber from
tpch_tiny_lineitem group by linenumber limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), linenumber from
tpch_tiny_lineitem group by linenumber limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
-qt_select """ select sum(orderkey), count(partkey), commitdate from
tpch_tiny_lineitem group by commitdate limit 3; """
+qt_select3 """ select sum(orderkey), count(partkey), tax from
tpch_tiny_lineitem group by tax limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), tax from tpch_tiny_lineitem
group by tax limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
+qt_select4 """ select sum(orderkey), count(partkey), commitdate from
tpch_tiny_lineitem group by commitdate limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), commitdate from
tpch_tiny_lineitem group by commitdate limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
// group by functions
-qt_select """ select sum(orderkey), count(partkey), cast(commitdate as
datetime) from tpch_tiny_lineitem group by cast(commitdate as datetime) limit
3; """
-
-qt_select """ select sum(orderkey), count(partkey), month(commitdate) from
tpch_tiny_lineitem group by month(commitdate) limit 3; """
+qt_select5 """ select sum(orderkey), count(partkey), cast(commitdate as
datetime) from tpch_tiny_lineitem group by cast(commitdate as datetime) limit
3; """
+explain {
+ sql " select sum(orderkey), count(partkey), cast(commitdate as datetime)
from tpch_tiny_lineitem group by cast(commitdate as datetime) limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
+qt_select6 """ select sum(orderkey), count(partkey), month(commitdate) from
tpch_tiny_lineitem group by month(commitdate) limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), month(commitdate) from
tpch_tiny_lineitem group by month(commitdate) limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
// mutli column
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber from
tpch_tiny_lineitem group by shipmode, linenumber limit 3; """
-
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; """
-
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax , commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax,
commitdate limit 3; """
-
+qt_select7 """ select sum(orderkey), count(partkey), shipmode, linenumber
from tpch_tiny_lineitem group by shipmode, linenumber limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber from
tpch_tiny_lineitem group by shipmode, linenumber limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
+qt_select8 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax
from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
+qt_select9 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax , commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax,
commitdate limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax ,
commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax,
commitdate limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
// group by + order by
// group by columns eq order by columns
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode, linenumber, tax limit 3; """
-
+qt_select10 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode, linenumber, tax limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax
from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode,
linenumber, tax limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
// group by columns contains order by columns
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode limit 3; """
-
+qt_select11 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax
from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode
limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
// desc order by column
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax limit 3; """
-
-qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax desc limit 3; """
-
+qt_select12 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax
from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode
desc, linenumber, tax limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
+qt_select13 """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax desc limit 3; """
+explain{
+ sql " select sum(orderkey), count(partkey), shipmode, linenumber , tax
from tpch_tiny_lineitem group by shipmode, linenumber, tax order by shipmode
desc, linenumber, tax desc limit 3; "
+ contains("VTOP-N")
+ contains("sortByGroupKey:true")
+}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]