This is an automated email from the ASF dual-hosted git repository. starocean999 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c1999479954 [feature](mtmv) Support to use mv group dimension when query aggregate function is distinct (#36318) c1999479954 is described below commit c19994799543149e2967d297602a8e8f19578d6b Author: seawinde <149132972+seawi...@users.noreply.github.com> AuthorDate: Mon Jun 17 15:26:00 2024 +0800 [feature](mtmv) Support to use mv group dimension when query aggregate function is distinct (#36318) ## Proposed changes This extend the query rewrite by materialized view ability For example mv def is > CREATE MATERIALIZED VIEW mv1 > BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL > DISTRIBUTED BY RANDOM BUCKETS 2 > PROPERTIES ('replication_num' = '1') > AS > select > count(o_totalprice), > o_shippriority, > o_orderstatus, > bin(o_orderkey) > from orders > group by > o_orderstatus, > o_shippriority, > bin(o_orderkey); the query as following can be rewritten by materialized view successfully though `sum(distinct o_shippriority)` in query is not appear in mv output, but query aggregate function is distinct and it use the group by dimension in mv, in this scene, the `sum(distinct o_shippriority)` can use mv group dimension `o_shippriority` directly and the result is true. Suppport the following distinct aggregate function currently, others are supported in the furture on demand - max(distinct arg) - min(distinct arg) - sum(distinct arg) - avg(distinct arg) - count(distinct arg) > select > count(o_totalprice), > max(distinct o_shippriority), > min(distinct o_shippriority), > avg(distinct o_shippriority), > sum(distinct o_shippriority) / count(distinct o_shippriority) > o_orderstatus, > bin(o_orderkey) > from orders > group by > o_orderstatus, > bin(o_orderkey); --- .../mv/AbstractMaterializedViewAggregateRule.java | 21 +- .../mv/rollup/AggFunctionRollUpHandler.java | 7 +- .../mv/rollup/BothCombinatorRollupHandler.java | 9 +- .../ContainDistinctFunctionRollupHandler.java | 133 ++++++ .../exploration/mv/rollup/DirectRollupHandler.java | 10 +- .../mv/rollup/MappingRollupHandler.java | 8 +- .../mv/rollup/SingleCombinatorRollupHandler.java | 9 +- .../mv/agg_variety/agg_variety.out | 141 ++++++ .../mv/agg_variety/agg_variety.groovy | 508 +++++++++++++++++++++ 9 files changed, 825 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java index 53b0c29bde1..0418f735ccd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.rules.exploration.mv.StructInfo.PlanSplitContext import org.apache.doris.nereids.rules.exploration.mv.mapping.SlotMapping; import org.apache.doris.nereids.rules.exploration.mv.rollup.AggFunctionRollUpHandler; import org.apache.doris.nereids.rules.exploration.mv.rollup.BothCombinatorRollupHandler; +import org.apache.doris.nereids.rules.exploration.mv.rollup.ContainDistinctFunctionRollupHandler; import org.apache.doris.nereids.rules.exploration.mv.rollup.DirectRollupHandler; import org.apache.doris.nereids.rules.exploration.mv.rollup.MappingRollupHandler; import org.apache.doris.nereids.rules.exploration.mv.rollup.SingleCombinatorRollupHandler; @@ -71,7 +72,8 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate ImmutableList.of(DirectRollupHandler.INSTANCE, MappingRollupHandler.INSTANCE, SingleCombinatorRollupHandler.INSTANCE, - BothCombinatorRollupHandler.INSTANCE); + BothCombinatorRollupHandler.INSTANCE, + ContainDistinctFunctionRollupHandler.INSTANCE); protected static final AggregateExpressionRewriter AGGREGATE_EXPRESSION_REWRITER = new AggregateExpressionRewriter(); @@ -114,19 +116,23 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate viewToQuerySlotMapping, true, queryStructInfo.getTableBitSet()); + boolean isRewrittenQueryExpressionValid = true; if (!rewrittenQueryExpressions.isEmpty()) { List<NamedExpression> projects = new ArrayList<>(); for (Expression expression : rewrittenQueryExpressions) { if (expression.containsType(AggregateFunction.class)) { + // record the reason and then try to roll up aggregate function materializationContext.recordFailReason(queryStructInfo, "rewritten expression contains aggregate functions when group equals aggregate rewrite", () -> String.format("aggregate functions = %s\n", rewrittenQueryExpressions)); - return null; + isRewrittenQueryExpressionValid = false; } projects.add(expression instanceof NamedExpression ? (NamedExpression) expression : new Alias(expression)); } - return new LogicalProject<>(projects, tempRewritedPlan); + if (isRewrittenQueryExpressionValid) { + return new LogicalProject<>(projects, tempRewritedPlan); + } } // if fails, record the reason and then try to roll up aggregate function materializationContext.recordFailReason(queryStructInfo, @@ -356,11 +362,12 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate expressionEntry.getValue()); for (AggFunctionRollUpHandler rollUpHandler : ROLL_UP_HANDLERS) { if (!rollUpHandler.canRollup(queryAggregateFunction, queryAggregateFunctionShuttled, - mvExprToMvScanExprQueryBasedPair)) { + mvExprToMvScanExprQueryBasedPair, mvExprToMvScanExprQueryBased)) { continue; } Function rollupFunction = rollUpHandler.doRollup(queryAggregateFunction, - queryAggregateFunctionShuttled, mvExprToMvScanExprQueryBasedPair); + queryAggregateFunctionShuttled, mvExprToMvScanExprQueryBasedPair, + mvExprToMvScanExprQueryBased); if (rollupFunction != null) { return rollupFunction; } @@ -544,7 +551,7 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate /** * AggregateExpressionRewriteContext */ - protected static class AggregateExpressionRewriteContext { + public static class AggregateExpressionRewriteContext { private boolean valid = true; private final ExpressionRewriteMode expressionRewriteMode; private final Map<Expression, Expression> mvExprToMvScanExprQueryBasedMapping; @@ -587,7 +594,7 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate /** * The expression rewrite mode, which decide how the expression in query is rewritten by mv */ - protected enum ExpressionRewriteMode { + public enum ExpressionRewriteMode { /** * Try to use the expression in mv directly, and doesn't handle aggregate function */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/AggFunctionRollUpHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/AggFunctionRollUpHandler.java index 250d8a83c26..a96c272521a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/AggFunctionRollUpHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/AggFunctionRollUpHandler.java @@ -27,6 +27,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.RollUpTrait; import com.google.common.collect.ImmutableList; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -39,7 +40,8 @@ public abstract class AggFunctionRollUpHandler { */ public boolean canRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression viewExpression = mvExprToMvScanExprQueryBasedPair.key(); if (!(viewExpression instanceof RollUpTrait) || !((RollUpTrait) viewExpression).canRollUp()) { return false; @@ -54,7 +56,8 @@ public abstract class AggFunctionRollUpHandler { public abstract Function doRollup( AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair); + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap); /** * Extract the function arguments by functionWithAny pattern diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/BothCombinatorRollupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/BothCombinatorRollupHandler.java index b29b2668a7f..38c1dedcefe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/BothCombinatorRollupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/BothCombinatorRollupHandler.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunctio import org.apache.doris.nereids.trees.expressions.functions.agg.RollUpTrait; import org.apache.doris.nereids.trees.expressions.functions.combinator.Combinator; +import java.util.Map; import java.util.Objects; /** @@ -38,10 +39,11 @@ public class BothCombinatorRollupHandler extends AggFunctionRollUpHandler { @Override public boolean canRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression viewFunction = mvExprToMvScanExprQueryBasedPair.key(); if (!super.canRollup(queryAggregateFunction, queryAggregateFunctionShuttled, - mvExprToMvScanExprQueryBasedPair)) { + mvExprToMvScanExprQueryBasedPair, mvExprToMvScanExprQueryBasedMap)) { return false; } if (queryAggregateFunction instanceof Combinator && viewFunction instanceof Combinator) { @@ -57,7 +59,8 @@ public class BothCombinatorRollupHandler extends AggFunctionRollUpHandler { @Override public Function doRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression rollupParam = mvExprToMvScanExprQueryBasedPair.value(); return ((RollUpTrait) queryAggregateFunction).constructRollUp(rollupParam); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/ContainDistinctFunctionRollupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/ContainDistinctFunctionRollupHandler.java new file mode 100644 index 00000000000..4d9e6810ce4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/ContainDistinctFunctionRollupHandler.java @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.exploration.mv.rollup; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.trees.expressions.Any; +import org.apache.doris.nereids.trees.expressions.BinaryArithmetic; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.Function; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Avg; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.Max; +import org.apache.doris.nereids.trees.expressions.functions.agg.Min; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; + +import com.google.common.collect.ImmutableSet; + +import java.util.Map; +import java.util.Set; + +/** + * Try to roll up function which contains distinct, if the param in function is in + * materialized view group by dimension. + * For example + * materialized view def is select empid, deptno, count(salary) from distinctQuery group by empid, deptno; + * query is select deptno, count(distinct empid) from distinctQuery group by deptno; + * should rewrite successfully, count(distinct empid) should use the group by empid dimension in query. + */ +public class ContainDistinctFunctionRollupHandler extends AggFunctionRollUpHandler { + + public static final ContainDistinctFunctionRollupHandler INSTANCE = new ContainDistinctFunctionRollupHandler(); + public static Set<AggregateFunction> SUPPORTED_AGGREGATE_FUNCTION_SET = ImmutableSet.of( + new Max(true, Any.INSTANCE), new Min(true, Any.INSTANCE), + new Max(false, Any.INSTANCE), new Min(false, Any.INSTANCE), + new Count(true, Any.INSTANCE), new Sum(true, Any.INSTANCE), + new Avg(true, Any.INSTANCE)); + + @Override + public boolean canRollup(AggregateFunction queryAggregateFunction, + Expression queryAggregateFunctionShuttled, + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBased) { + Set<AggregateFunction> queryAggregateFunctions = + queryAggregateFunctionShuttled.collectToSet(AggregateFunction.class::isInstance); + if (queryAggregateFunctions.size() > 1) { + return false; + } + for (AggregateFunction aggregateFunction : queryAggregateFunctions) { + if (SUPPORTED_AGGREGATE_FUNCTION_SET.stream() + .noneMatch(supportFunction -> Any.equals(supportFunction, aggregateFunction))) { + return false; + } + if (aggregateFunction.getArguments().size() > 1) { + return false; + } + } + Set<Expression> mvExpressionsQueryBased = mvExprToMvScanExprQueryBased.keySet(); + Set<Slot> aggregateFunctionParamSlots = queryAggregateFunctionShuttled.collectToSet(Slot.class::isInstance); + if (aggregateFunctionParamSlots.stream().anyMatch(slot -> !mvExpressionsQueryBased.contains(slot))) { + return false; + } + return true; + } + + @Override + public Function doRollup(AggregateFunction queryAggregateFunction, + Expression queryAggregateFunctionShuttled, Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { + Expression argument = queryAggregateFunction.children().get(0); + RollupResult<Boolean> rollupResult = RollupResult.of(true); + Expression rewrittenArgument = argument.accept(new DefaultExpressionRewriter<RollupResult<Boolean>>() { + @Override + public Expression visitSlot(Slot slot, RollupResult<Boolean> context) { + if (!mvExprToMvScanExprQueryBasedMap.containsKey(slot)) { + context.param = false; + return slot; + } + return mvExprToMvScanExprQueryBasedMap.get(slot); + } + + @Override + public Expression visit(Expression expr, RollupResult<Boolean> context) { + if (!context.param) { + return expr; + } + if (expr instanceof Literal || expr instanceof BinaryArithmetic || expr instanceof Slot) { + return super.visit(expr, context); + } + context.param = false; + return expr; + } + }, rollupResult); + if (!rollupResult.param) { + return null; + } + return (Function) queryAggregateFunction.withChildren(rewrittenArgument); + } + + private static class RollupResult<T> { + public T param; + + private RollupResult(T param) { + this.param = param; + } + + public static <T> RollupResult<T> of(T param) { + return new RollupResult<>(param); + } + + public T getParam() { + return param; + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/DirectRollupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/DirectRollupHandler.java index e0106705bcd..091a9d55545 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/DirectRollupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/DirectRollupHandler.java @@ -24,6 +24,8 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunctio import org.apache.doris.nereids.trees.expressions.functions.agg.RollUpTrait; import org.apache.doris.nereids.trees.expressions.functions.combinator.Combinator; +import java.util.Map; + /** * Roll up directly, for example, * query is select c1, sum(c2) from t1 group by c1 @@ -38,10 +40,11 @@ public class DirectRollupHandler extends AggFunctionRollUpHandler { public boolean canRollup( AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression viewExpression = mvExprToMvScanExprQueryBasedPair.key(); if (!super.canRollup(queryAggregateFunction, queryAggregateFunctionShuttled, - mvExprToMvScanExprQueryBasedPair)) { + mvExprToMvScanExprQueryBasedPair, mvExprToMvScanExprQueryBasedMap)) { return false; } return queryAggregateFunctionShuttled.equals(viewExpression) @@ -53,7 +56,8 @@ public class DirectRollupHandler extends AggFunctionRollUpHandler { @Override public Function doRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression rollupParam = mvExprToMvScanExprQueryBasedPair.value(); if (rollupParam == null) { return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/MappingRollupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/MappingRollupHandler.java index cf14217c50d..f3f81235f3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/MappingRollupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/MappingRollupHandler.java @@ -137,12 +137,13 @@ public class MappingRollupHandler extends AggFunctionRollUpHandler { @Override public boolean canRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { // handle complex functions roll up by mapping and combinator expression // eg: query is count(distinct param), mv sql is bitmap_union(to_bitmap(param)) Expression viewExpression = mvExprToMvScanExprQueryBasedPair.key(); if (!super.canRollup(queryAggregateFunction, queryAggregateFunctionShuttled, - mvExprToMvScanExprQueryBasedPair)) { + mvExprToMvScanExprQueryBasedPair, mvExprToMvScanExprQueryBasedMap)) { return false; } Function viewFunction = (Function) viewExpression; @@ -174,7 +175,8 @@ public class MappingRollupHandler extends AggFunctionRollUpHandler { @Override public Function doRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression rollupParam = mvExprToMvScanExprQueryBasedPair.value(); return ((RollUpTrait) queryAggregateFunction).constructRollUp(rollupParam); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/SingleCombinatorRollupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/SingleCombinatorRollupHandler.java index d9677cbe6cc..4e7333f2140 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/SingleCombinatorRollupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/rollup/SingleCombinatorRollupHandler.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.expressions.functions.combinator.Combinato import org.apache.doris.nereids.trees.expressions.functions.combinator.StateCombinator; import org.apache.doris.nereids.trees.expressions.functions.combinator.UnionCombinator; +import java.util.Map; import java.util.Objects; /** @@ -44,10 +45,11 @@ public class SingleCombinatorRollupHandler extends AggFunctionRollUpHandler { @Override public boolean canRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { Expression viewFunction = mvExprToMvScanExprQueryBasedPair.key(); if (!super.canRollup(queryAggregateFunction, queryAggregateFunctionShuttled, - mvExprToMvScanExprQueryBasedPair)) { + mvExprToMvScanExprQueryBasedPair, mvExprToMvScanExprQueryBasedMap)) { return false; } if (!(queryAggregateFunction instanceof Combinator) @@ -62,7 +64,8 @@ public class SingleCombinatorRollupHandler extends AggFunctionRollUpHandler { @Override public Function doRollup(AggregateFunction queryAggregateFunction, Expression queryAggregateFunctionShuttled, - Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair) { + Pair<Expression, Expression> mvExprToMvScanExprQueryBasedPair, + Map<Expression, Expression> mvExprToMvScanExprQueryBasedMap) { FunctionRegistry functionRegistry = Env.getCurrentEnv().getFunctionRegistry(); String combinatorName = queryAggregateFunction.getName() + AggCombinerFunctionBuilder.MERGE_SUFFIX; Expression rollupParam = mvExprToMvScanExprQueryBasedPair.value(); diff --git a/regression-test/data/nereids_rules_p0/mv/agg_variety/agg_variety.out b/regression-test/data/nereids_rules_p0/mv/agg_variety/agg_variety.out new file mode 100644 index 00000000000..24060a546c9 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/agg_variety/agg_variety.out @@ -0,0 +1,141 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query1_0_after -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query1_1_before -- +1 1 1 1.0 1.0 o 10 +1 2 2 2.0 2.0 o 100 +2 1 1 1.0 1.0 o 1 +2 1 1 1.0 1.0 o 11 +2 2 2 2.0 2.0 o 101 + +-- !query1_1_after -- +1 1 1 1.0 1.0 o 10 +1 2 2 2.0 2.0 o 100 +2 1 1 1.0 1.0 o 1 +2 1 1 1.0 1.0 o 11 +2 2 2 2.0 2.0 o 101 + +-- !query1_2_before -- +1 2 2 2.0 2.0 1 +1 4 4 4.0 4.0 2 +2 2 2 2.0 2.0 1 +2 2 2 2.0 2.0 1 +2 4 4 4.0 4.0 2 + +-- !query1_2_after -- +1 2 2 2.0 2.0 1 +1 4 4 4.0 4.0 2 +2 2 2 2.0 2.0 1 +2 2 2 2.0 2.0 1 +2 4 4 4.0 4.0 2 + +-- !query1_3_before -- +1 3 3 2.0 2.0 10 +1 6 6 4.0 4.0 100 +2 2 2 2.0 2.0 1 +2 4 4 2.0 2.0 11 +2 7 7 4.0 4.0 101 + +-- !query1_3_after -- +1 3 3 2.0 2.0 10 +1 6 6 4.0 4.0 100 +2 2 2 2.0 2.0 1 +2 4 4 2.0 2.0 11 +2 7 7 4.0 4.0 101 + +-- !query2_0_before -- +1 1 1 1.0 1.0 1 o 10 +1 2 2 2.0 2.0 2 o 100 +2 1 1 1.0 1.0 1 o 1 +2 1 1 1.0 1.0 1 o 11 +2 2 2 2.0 2.0 2 o 101 + +-- !query2_0_after -- +1 1 1 1.0 1.0 1 o 10 +1 2 2 2.0 2.0 2 o 100 +2 1 1 1.0 1.0 1 o 1 +2 1 1 1.0 1.0 1 o 11 +2 2 2 2.0 2.0 2 o 101 + +-- !query2_1_before -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query2_1_after -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query2_2_before -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query2_2_after -- +1 1 1 1.0 1.0 10 +1 2 2 2.0 2.0 100 +2 1 1 1.0 1.0 1 +2 1 1 1.0 1.0 11 +2 2 2 2.0 2.0 101 + +-- !query2_3_before -- +1 3 3 1.0 1.0 2 o 10 +1 6 6 2.0 2.0 4 o 100 +2 2 2 1.0 1.0 1 o 1 +2 4 4 1.0 1.0 3 o 11 +2 7 7 2.0 2.0 5 o 101 + +-- !query2_3_after -- +1 3 3 1.0 1.0 2 o 10 +1 6 6 2.0 2.0 4 o 100 +2 2 2 1.0 1.0 1 o 1 +2 4 4 1.0 1.0 3 o 11 +2 7 7 2.0 2.0 5 o 101 + +-- !query2_4_before -- +1 3 3 1.0 1.0 2 o 1 10 +1 6 6 2.0 2.0 4 o 2 100 +2 2 2 1.0 1.0 1 o 1 1 +2 4 4 1.0 1.0 3 o 1 11 +2 7 7 2.0 2.0 5 o 2 101 + +-- !query2_4_after -- +1 3 3 1.0 1.0 2 o 1 10 +1 6 6 2.0 2.0 4 o 2 100 +2 2 2 1.0 1.0 1 o 1 1 +2 4 4 1.0 1.0 3 o 1 11 +2 7 7 2.0 2.0 5 o 2 101 + +-- !query2_5_before -- +1 3 3 1.0 1.0 2 o 1 10 +1 6 6 2.0 2.0 4 o 2 100 +2 2 2 1.0 1.0 1 o 1 1 +2 4 4 1.0 1.0 3 o 1 11 +2 7 7 2.0 2.0 5 o 2 101 + +-- !query2_5_after -- +1 3 3 1.0 1.0 2 o 1 10 +1 6 6 2.0 2.0 4 o 2 100 +2 2 2 1.0 1.0 1 o 1 1 +2 4 4 1.0 1.0 3 o 1 11 +2 7 7 2.0 2.0 5 o 2 101 + diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy new file mode 100644 index 00000000000..833d03c1eda --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy @@ -0,0 +1,508 @@ +package mv.agg_variety +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("agg_variety") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + + sql """ + drop table if exists orders + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) ( + PARTITION `day_2` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_3` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_4` VALUES LESS THAN ("2023-12-30") + ) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + + sql """ + drop table if exists lineitem + """ + + sql""" + CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey INTEGER NOT NULL, + l_partkey INTEGER NOT NULL, + l_suppkey INTEGER NOT NULL, + l_linenumber INTEGER NOT NULL, + l_quantity DECIMALV3(15,2) NOT NULL, + l_extendedprice DECIMALV3(15,2) NOT NULL, + l_discount DECIMALV3(15,2) NOT NULL, + l_tax DECIMALV3(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) ( + PARTITION `day_1` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_2` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_3` VALUES LESS THAN ("2023-12-30")) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ insert into lineitem values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx'); + """ + + sql """ + drop table if exists partsupp + """ + + sql """ + CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey INTEGER NOT NULL, + ps_suppkey INTEGER NOT NULL, + ps_availqty INTEGER NOT NULL, + ps_supplycost DECIMALV3(15,2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + ) + DUPLICATE KEY(ps_partkey, ps_suppkey) + DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + insert into partsupp values + (2, 3, 9, 10.01, 'supply1'), + (2, 3, 10, 11.01, 'supply2'); + """ + + sql """analyze table orders with sync;""" + sql """analyze table lineitem with sync;""" + sql """analyze table partsupp with sync;""" + + def check_rewrite_but_not_chose = { mv_sql, query_sql, mv_name -> + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS ${mv_sql} + """ + + def job_name = getJobName(db, mv_name); + waitingMTMVTaskFinished(job_name) + explain { + sql("${query_sql}") + check {result -> + def splitResult = result.split("MaterializedViewRewriteFail") + splitResult.length == 2 ? splitResult[0].contains(mv_name) : false + } + } + } + + // query dimension is less then mv + def mv1_0 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query1_0 = """ + select + count(o_totalprice), + max(distinct o_shippriority), + min(distinct o_shippriority), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority) + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + bin(o_orderkey); + """ + order_qt_query1_0_before "${query1_0}" + check_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0") + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + + def mv1_1 = """ + select + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query1_1 = """ + select + count(o_shippriority), + max(distinct o_shippriority), + min(distinct o_shippriority), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + bin(o_orderkey); + """ + order_qt_query1_1_before "${query1_1}" + // contains aggreagate function count with out distinct which is not supported, should fail + check_mv_rewrite_fail(db, mv1_1, query1_1, "mv1_1") + order_qt_query1_1_after "${query1_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1""" + + + def mv1_2 = """ + select + count(o_totalprice), + o_orderkey, + o_custkey, + o_shippriority, + bin(o_orderkey) + from orders + group by + o_orderkey, + o_custkey, + o_shippriority, + bin(o_orderkey); + """ + def query1_2 = """ + select + count(o_totalprice), + max(distinct o_custkey + o_shippriority), + min(distinct o_custkey + o_shippriority), + avg(distinct o_custkey + o_shippriority), + sum(distinct o_custkey + o_shippriority) / count(distinct o_custkey + o_shippriority) + o_custkey, + o_shippriority + from orders + group by + o_custkey, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query1_2_before "${query1_2}" + // test the arguments in aggregate function is complex, should success + check_mv_rewrite_success(db, mv1_2, query1_2, "mv1_2") + order_qt_query1_2_after "${query1_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_2""" + + + + def mv1_3 = """ + select + count(o_totalprice), + o_custkey, + o_shippriority, + bin(o_orderkey) + from orders + group by + o_custkey, + o_shippriority, + bin(o_orderkey); + """ + def query1_3 = """ + select + count(o_totalprice), + max(distinct o_orderkey + o_shippriority), + min(distinct o_orderkey + o_shippriority), + avg(distinct o_custkey + o_shippriority), + sum(distinct o_custkey + o_shippriority) / count(distinct o_custkey + o_shippriority) + o_shippriority, + bin(o_orderkey) + from orders + group by + o_shippriority, + bin(o_orderkey); + """ + order_qt_query1_3_before "${query1_3}" + // function use the dimension which is not in mv output, should fail + check_mv_rewrite_fail(db, mv1_3, query1_3, "mv1_3") + order_qt_query1_3_after "${query1_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_3""" + + + // query dimension is equals with mv + def mv2_0 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query2_0 = """ + select + count(o_totalprice), + max(distinct o_shippriority), + min(distinct o_shippriority), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_0_before "${query2_0}" + check_mv_rewrite_success(db, mv2_0, query2_0, "mv2_0") + order_qt_query2_0_after "${query2_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" + + + def mv2_1 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + // query use less dimension then group by dimension + def query2_1 = """ + select + count(o_totalprice), + max(distinct o_shippriority), + min(distinct o_shippriority), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_1_before "${query2_1}" + check_mv_rewrite_success(db, mv2_1, query2_1, "mv2_1") + order_qt_query2_1_after "${query2_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_1""" + + + def mv2_2 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query2_2 = """ + select + count(o_shippriority), + max(distinct o_shippriority), + min(distinct o_shippriority), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + bin(o_orderkey) + from orders + group by + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_2_before "${query2_2}" + // contains aggreagate function count which is not supported, should fail + check_mv_rewrite_fail(db, mv2_2, query2_2, "mv2_2") + order_qt_query2_2_after "${query2_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_2""" + + + def mv2_3 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey), + o_orderkey + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query2_3 = """ + select + count(o_totalprice), + max(distinct o_shippriority + o_orderkey), + min(distinct o_shippriority + o_orderkey), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_orderkey, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_3_before "${query2_3}" + // aggregate function use complex expression, should success + check_mv_rewrite_success(db, mv2_3, query2_3, "mv2_3") + order_qt_query2_3_after "${query2_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_3""" + + + def mv2_4 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey) + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + // query use less dimension then group by dimension + def query2_4 = """ + select + count(o_totalprice), + max(distinct o_shippriority + o_orderkey), + min(distinct o_shippriority + o_orderkey), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey) + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_4_before "${query2_4}" + // function use the dimension which is not in mv output, should fail + check_mv_rewrite_fail(db, mv2_4, query2_4, "mv2_4") + order_qt_query2_4_after "${query2_4}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_4""" + + + def mv2_5 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey), + o_orderkey + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + // query select use the same dimension with group by + def query2_5 = """ + select + count(o_totalprice), + max(distinct o_shippriority + o_orderkey), + min(distinct o_shippriority + o_orderkey), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey) + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_5_before "${query2_5}" + // aggregate function use complex expression, should success + check_mv_rewrite_success(db, mv2_5, query2_5, "mv2_5") + order_qt_query2_5_after "${query2_5}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_5""" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org