This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4322fdc96d [feature](Nereids): add or expansion in CBO(#22465)
4322fdc96d is described below

commit 4322fdc96d30351dbcab1db8c3e8ac8c4ced289c
Author: 谢健 <[email protected]>
AuthorDate: Thu Aug 3 13:29:33 2023 +0800

    [feature](Nereids): add or expansion in CBO(#22465)
---
 .../org/apache/doris/nereids/rules/RuleSet.java    |   3 +
 .../org/apache/doris/nereids/rules/RuleType.java   |   1 +
 .../nereids/rules/exploration/OrExpansion.java     | 158 +++++++++++++++++++++
 .../data/nereids_p0/union/or_expansion.out         |   9 ++
 .../suites/nereids_p0/union/or_expansion.groovy    |  37 +++++
 5 files changed, 208 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
index f3e9bf5c55..4ed887193f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.rules;
 
 import org.apache.doris.nereids.rules.exploration.MergeProjectsCBO;
+import org.apache.doris.nereids.rules.exploration.OrExpansion;
 import org.apache.doris.nereids.rules.exploration.TransposeAggSemiJoin;
 import org.apache.doris.nereids.rules.exploration.TransposeAggSemiJoinProject;
 import org.apache.doris.nereids.rules.exploration.join.InnerJoinLAsscom;
@@ -98,6 +99,7 @@ public class RuleSet {
 
     public static final List<Rule> EXPLORATION_RULES = planRuleFactories()
             .add(new MergeProjectsCBO())
+            .add(new OrExpansion())
             .build();
 
     public static final List<Rule> OTHER_REORDER_RULES = planRuleFactories()
@@ -196,6 +198,7 @@ public class RuleSet {
             .build();
 
     public static final List<Rule> DPHYP_REORDER_RULES = 
ImmutableList.<Rule>builder()
+            .addAll(EXPLORATION_RULES)
             .add(JoinCommute.BUSHY.build())
             .build();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 114c0529fa..d43438ee85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -250,6 +250,7 @@ public enum RuleType {
 
     // exploration rules
     TEST_EXPLORATION(RuleTypeClass.EXPLORATION),
+    OR_EXPANSION(RuleTypeClass.EXPLORATION),
     LOGICAL_JOIN_COMMUTE(RuleTypeClass.EXPLORATION),
     LOGICAL_INNER_JOIN_LASSCOM(RuleTypeClass.EXPLORATION),
     LOGICAL_INNER_JOIN_LASSCOM_PROJECT(RuleTypeClass.EXPLORATION),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/OrExpansion.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/OrExpansion.java
new file mode 100644
index 0000000000..fcb46525fa
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/OrExpansion.java
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.exploration;
+
+import org.apache.doris.common.Pair;
+import org.apache.doris.nereids.StatementContext;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.algebra.SetOperation.Qualifier;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
+import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
+import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.nereids.util.JoinUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * 
https://blogs.oracle.com/optimizer/post/optimizer-transformations-or-expansion
+ * NLJ (cond1 or cond2)              UnionAll
+ *                      =>         /        \
+ *                           HJ(cond1) HJ(cond2 and !cond1)
+ */
+public class OrExpansion extends OneExplorationRuleFactory {
+
+    @Override
+    public Rule build() {
+        return logicalJoin()
+                .when(JoinUtils::shouldNestedLoopJoin)
+                .when(join -> join.getJoinType().isInnerJoin())
+                .thenApply(ctx -> {
+                    LogicalJoin<? extends Plan, ? extends Plan> join = 
ctx.root;
+                    
Preconditions.checkArgument(join.getHashJoinConjuncts().isEmpty(),
+                            "Only Expansion nest loop join without hashCond");
+                    List<Expression> disjunctions = null;
+                    List<Expression> otherConditions = 
Lists.newArrayList(join.getOtherJoinConjuncts());
+
+                    // We pick the first or condition that can be split to 
EqualTo expressions.
+                    for (Expression expr : otherConditions) {
+                        Pair<List<Expression>, List<Expression>> pair = 
expandExpr(expr, join);
+                        // TODO: Now we don't support expand condition with 
complex expression
+                        if (pair.second.isEmpty() && pair.first.stream()
+                                .noneMatch(e -> !((EqualTo) e).left().isSlot()
+                                        && !((EqualTo) e).right().isSlot())) {
+                            disjunctions = pair.first;
+                            otherConditions.remove(expr);
+                            break;
+                        }
+                    }
+                    // If there is non-EqualTo expression, it means there is 
nlj child
+                    // Therefore refuse this case
+                    if (disjunctions == null) {
+                        return join;
+                    }
+                    //Construct CTE with the children
+                    LogicalCTEProducer<? extends Plan> leftProducer = new 
LogicalCTEProducer<>(
+                            ctx.statementContext.getNextCTEId(), join.left());
+                    LogicalCTEProducer<? extends Plan> rightProducer = new 
LogicalCTEProducer<>(
+                            ctx.statementContext.getNextCTEId(), join.right());
+                    // expand join to hash join with CTE
+                    List<Plan> joins = expandJoin(ctx.statementContext, 
disjunctions, otherConditions, join,
+                            leftProducer,
+                            rightProducer);
+
+                    LogicalUnion union = new LogicalUnion(Qualifier.ALL, new 
ArrayList<>(join.getOutput()),
+                            ImmutableList.of(),
+                            false, joins);
+                    LogicalCTEAnchor<? extends Plan, ? extends Plan> 
intermediateAnchor = new LogicalCTEAnchor<>(
+                            rightProducer.getCteId(), rightProducer, union);
+                    return new LogicalCTEAnchor<Plan, 
Plan>(leftProducer.getCteId(), leftProducer, intermediateAnchor);
+                }).toRule(RuleType.OR_EXPANSION);
+    }
+
+    // extract disjunctions for this otherExpr and divide them into HashCond 
and OtherCond
+    // return hash conditions and other conditions
+    private Pair<List<Expression>, List<Expression>> expandExpr(Expression 
otherExpr,
+            LogicalJoin<? extends Plan, ? extends Plan> join) {
+        List<Expression> disjunctions = 
ExpressionUtils.extractDisjunction(otherExpr);
+        return 
JoinUtils.extractExpressionForHashTable(join.left().getOutput(), 
join.right().getOutput(), disjunctions);
+    }
+
+    private List<Plan> expandJoin(StatementContext ctx, List<Expression> 
disjunctions, List<Expression> otherConditions,
+            LogicalJoin<? extends Plan, ? extends Plan> join, 
LogicalCTEProducer<? extends Plan> leftProducer,
+            LogicalCTEProducer<? extends Plan> rightProducer) {
+        List<Expression> notExprs = 
disjunctions.stream().map(Not::new).collect(Collectors.toList());
+        List<Plan> joins = Lists.newArrayList();
+
+        for (int hashCondIdx = 0; hashCondIdx < disjunctions.size(); 
hashCondIdx++) {
+            // extract hash conditions and other condition
+            Pair<List<Expression>, List<Expression>> pair =
+                    extractHashAndOtherConditions(hashCondIdx, disjunctions, 
notExprs);
+            pair.second.addAll(otherConditions);
+
+            LogicalCTEConsumer left = new 
LogicalCTEConsumer(ctx.getNextRelationId(), leftProducer.getCteId(), "",
+                    leftProducer);
+            LogicalCTEConsumer right = new 
LogicalCTEConsumer(ctx.getNextRelationId(), rightProducer.getCteId(), "",
+                    rightProducer);
+
+            //rewrite conjuncts to replace the old slots with CTE slots
+            Map<Slot, Slot> replaced = new 
HashMap<>(left.getProducerToConsumerOutputMap());
+            replaced.putAll(right.getProducerToConsumerOutputMap());
+            List<Expression> hashCond = pair.first.stream()
+                    .map(e -> e.rewriteUp(s -> replaced.containsKey(s) ? 
replaced.get(s) : s))
+                    .collect(Collectors.toList());
+            List<Expression> otherCond = pair.second.stream()
+                    .map(e -> e.rewriteUp(s -> replaced.containsKey(s) ? 
replaced.get(s) : s))
+                    .collect(Collectors.toList());
+
+            // TODO: normalize join condition
+            LogicalJoin<? extends Plan, ? extends Plan> newJoin = 
join.withJoinConjuncts(hashCond, otherCond)
+                    .withChildren(Lists.newArrayList(left, right));
+            joins.add(newJoin);
+        }
+        return joins;
+    }
+
+    // join(a or b or c) = join(a) union join(b) union join(c)
+    //                   = join(a) union all (join b and !a) union all join(c 
and !b and !a)
+    // return hashConditions and otherConditions
+    private Pair<List<Expression>, List<Expression>> 
extractHashAndOtherConditions(int hashCondIdx,
+            List<Expression> equal, List<Expression> not) {
+        List<Expression> others = new ArrayList<>();
+        for (int i = 0; i < hashCondIdx; i++) {
+            others.add(not.get(i));
+        }
+        return Pair.of(Lists.newArrayList(equal.get(hashCondIdx)), others);
+    }
+}
diff --git a/regression-test/data/nereids_p0/union/or_expansion.out 
b/regression-test/data/nereids_p0/union/or_expansion.out
new file mode 100644
index 0000000000..a5fe6f155e
--- /dev/null
+++ b/regression-test/data/nereids_p0/union/or_expansion.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !nsj --
+false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727 false   1       1989    1001    
11011902        123.123 true    1989-03-21      1989-03-21T13:00        
wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
+false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727 false   2       1986    1001    
11011903        1243.500        false   1901-12-31      1989-03-21T13:00        
wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727        false   1       
1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727        false   2       
1986    1001    11011903        1243.500        false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0       false   3       1989    1002    11011905        
24453.325       false   2012-03-14      2000-01-01T00:00        yunlj8@nk       
78945.0 3654.0  string12345     0
+false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0       false   7       -32767  1002    7210457 3.141   false   
1988-03-21      1901-01-01T00:00        jiw3n4  0.0     6058.0  string12345     
-20220101
+
diff --git a/regression-test/suites/nereids_p0/union/or_expansion.groovy 
b/regression-test/suites/nereids_p0/union/or_expansion.groovy
new file mode 100644
index 0000000000..6528719667
--- /dev/null
+++ b/regression-test/suites/nereids_p0/union/or_expansion.groovy
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("or_expansion") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+    def db = "nereids_test_query_db"
+    sql "use ${db}"
+
+    explain {
+        sql("""select * from bigtable 
+            join baseall 
+            on baseall.k0 = bigtable.k0 
+            or baseall.k1 = bigtable.k1""")
+        contains "VHASH JOIN"
+    }
+    
+    order_qt_nsj """select * from test 
+            join baseall 
+            on baseall.k1 = test.k1 
+            or baseall.k3 = test.k3
+            """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to