englefly commented on code in PR #60757:
URL: https://github.com/apache/doris/pull/60757#discussion_r3392837049


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java:
##########
@@ -0,0 +1,319 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite.eageraggregation;
+
+import org.apache.doris.nereids.jobs.JobContext;
+import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
+import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
+import org.apache.doris.nereids.trees.expressions.CaseWhen;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.Function;
+import 
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
+import org.apache.doris.nereids.trees.expressions.functions.agg.RollUpTrait;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Nvl;
+import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
+import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
+import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
+import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.qe.SessionVariable;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * push down aggregation
+ */
+public class PushDownAggregation extends DefaultPlanRewriter<JobContext> 
implements CustomRewriter {
+    private static final Logger LOG = 
LoggerFactory.getLogger(PushDownAggregation.class);
+
+    public final EagerAggRewriter writer = new EagerAggRewriter();
+
+    private final Set<Class> pushDownAggFunctionSet = Sets.newHashSet(
+            Count.class,
+            Sum.class,
+            Max.class,
+            Min.class);
+
+    private final Set<Class> acceptNodeType = Sets.newHashSet(
+            LogicalUnion.class,
+            LogicalProject.class,
+            LogicalFilter.class,
+            LogicalRelation.class,
+            LogicalJoin.class);
+
+    @Override
+    public Plan rewriteRoot(Plan plan, JobContext jobContext) {
+        if (SessionVariable.isFeDebug()) {
+            try {
+                new AdjustNullable(false).rewriteRoot(plan, null);
+            } catch (Exception e) {
+                LOG.warn("(PushDownAggregation) input plan has nullable 
problem", e);
+                return plan;
+            }
+        }
+        int mode = SessionVariable.getEagerAggregationMode();
+        if (mode < 0) {
+            return plan;
+        } else {
+            Plan result = plan.accept(this, jobContext);
+            if (SessionVariable.isFeDebug()) {
+                result = new AdjustNullable(true).rewriteRoot(result, null);
+            }
+            return result;
+        }
+    }
+
+    @Override
+    public Plan visitLogicalAggregate(LogicalAggregate<? extends Plan> agg, 
JobContext context) {
+        Plan newChild = agg.child().accept(this, context);
+        if (newChild != agg.child()) {
+            return agg.withChildren(newChild);
+        }
+
+        if (agg.getSourceRepeat().isPresent()) {
+            return agg;
+        }
+
+        List<SlotReference> groupKeys = new ArrayList<>();
+        for (Expression groupKey : agg.getGroupByExpressions()) {
+            if (groupKey instanceof SlotReference) {
+                groupKeys.add((SlotReference) groupKey);
+            } else {
+                SessionVariable.throwAnalysisExceptionWhenFeDebug(
+                        "PushDownAggregation failed: agg is not normalized\n "
+                        + agg.treeString());
+                return agg;
+            }
+        }
+
+        Set<AggregateFunction> aggFunctions = Sets.newHashSet();
+        boolean hasDecomposedAggIf = false;
+        boolean hasCaseWhen = false;
+        Map<NamedExpression, List<AggregateFunction>> 
aggFunctionsForOutputExpressions = Maps.newHashMap();
+        for (NamedExpression aggOutput : agg.getOutputExpressions()) {
+            List<AggregateFunction> funcs = Lists.newArrayList();
+            aggFunctionsForOutputExpressions.put(aggOutput, funcs);
+            for (Object obj : 
aggOutput.collect(AggregateFunction.class::isInstance)) {
+                AggregateFunction aggFunction = (AggregateFunction) obj;
+                if (aggFunction.isDistinct()) {
+                    return agg;
+                }
+                if (pushDownAggFunctionSet.contains(aggFunction.getClass())) {
+                    // CaseWhen and If (which CASE WHEN is normalized into) 
must both be checked.
+                    // When an agg function contains an If/CaseWhen whose 
condition tests IS NULL
+                    // (e.g. count(if(col IS NULL, value, NULL))), pushing it 
to the nullable side
+                    // of an outer join produces wrong results: null-extended 
rows make "col IS NULL"
+                    // TRUE at the top level, but the pre-aggregated count 
slot becomes NULL after
+                    // null-extension, and ifnull(sum(NULL), 0) = 0 instead of 
the correct 1.
+                    if (!hasCaseWhen && aggFunction.anyMatch(e -> e instanceof 
CaseWhen || e instanceof If)) {
+                        hasCaseWhen = true;
+                    }

Review Comment:
   不会有问题. ifnull(x, y) 会作为一个整体下推.
   if(a>0, b, c) 拆开推的原因是 a 可以来自左表, b/c 可以来自右表,所以拆开后可以推的范围更大.
   ifnull(x, y) 相当于 if(x is null, x, y), 如果条件和 value 
来自同一张表,那么也就没有拆开的必要了,只能当做整体下推.
   
   
   类似的函数也都整体下推
   IF(cond, a, b) | 条件为真返回 a,否则返回 b
   IFNULL(a, b) | a 不为 NULL 返回 a,否则返回 b(别名:NVL)
   CASE WHEN | 多分支条件逻辑,类似 switch-case
   COALESCE(a, b, ...) | 返回参数列表中第一个非 NULL 的值
   NULLIF(a, b) | 若 a = b 返回 NULL,否则返回 a
   LEAST(a, b, ...) | 返回多个参数中的最小值,任一为 NULL 则返回 NULL
   GREATEST(a, b, ...) | 返回多个参数中的最大值,任一为 NULL 则返回 NULL
   NULL_OR_EMPTY(str) | 字符串为 NULL 或空串时返回 true
   NOT_NULL_OR_EMPTY(str) | 字符串既不为 NULL 也不为空串时返回 true
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to