This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 861d9c985c [refactor](Nereids): refactor Join Reorder Rule. (#17809)
861d9c985c is described below
commit 861d9c985c49c6ac158dc25920e17a90d9dd5002
Author: jakevin <[email protected]>
AuthorDate: Tue Mar 21 16:12:07 2023 +0800
[refactor](Nereids): refactor Join Reorder Rule. (#17809)
---
.../org/apache/doris/nereids/cost/CostWeight.java | 2 +-
.../nereids/jobs/cascades/CostAndEnforcerJob.java | 8 ++---
.../org/apache/doris/nereids/rules/RuleSet.java | 14 ++++----
.../exploration/join/InnerJoinLAsscomProject.java | 19 +++++-----
.../rules/exploration/join/JoinReorderUtils.java | 42 +++++-----------------
.../exploration/join/OuterJoinAssocProject.java | 6 ++--
.../exploration/join/OuterJoinLAsscomProject.java | 6 ++--
.../join/PushdownProjectThroughInnerJoin.java | 16 ++++++---
8 files changed, 48 insertions(+), 65 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java
index a21febefa6..d09c72f6dd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java
@@ -47,7 +47,7 @@ public class CostWeight {
* Except stats information, there are some special criteria in doris.
* For example, in hash join cluster, BE could build hash tables
* in parallel for left deep tree. And hence, we need to punish right deep
tree.
- * penalyWeight is the factor of punishment.
+ * penaltyWeight is the factor of punishment.
* The punishment is denoted by stats.penalty.
*/
final double penaltyWeight;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
index 51854cd072..c2cdddb23c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
@@ -57,20 +57,20 @@ public class CostAndEnforcerJob extends Job implements
Cloneable {
// List of request property to children
// Example: Physical Hash Join
- // [ child item: [leftProperties, rightPropertie]]
+ // [ child item: [leftProperties, rightProperties]]
// [ [Properties {"", ANY}, Properties {"", BROADCAST}],
// [Properties {"", SHUFFLE_JOIN}, Properties {"", SHUFFLE_JOIN}]]
private List<List<PhysicalProperties>> requestChildrenPropertiesList;
- private List<List<PhysicalProperties>> outputChildrenPropertiesList = new
ArrayList<>();
+ private final List<List<PhysicalProperties>> outputChildrenPropertiesList
= new ArrayList<>();
// index of List<request property to children>
private int requestPropertiesIndex = 0;
private final List<GroupExpression> lowestCostChildren =
Lists.newArrayList();
- // current child index of travsing all children
+ // current child index of traversing all children
private int curChildIndex = -1;
- // child index in the last time of travsing all children
+ // child index in the last time of traversing all children
private int prevChildIndex = -1;
public CostAndEnforcerJob(GroupExpression groupExpression, JobContext
context) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
index 83544b96ff..11b98fe257 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
@@ -22,6 +22,8 @@ import
org.apache.doris.nereids.rules.exploration.join.InnerJoinLAsscomProject;
import org.apache.doris.nereids.rules.exploration.join.JoinCommute;
import org.apache.doris.nereids.rules.exploration.join.OuterJoinLAsscom;
import org.apache.doris.nereids.rules.exploration.join.OuterJoinLAsscomProject;
+import
org.apache.doris.nereids.rules.exploration.join.PushdownProjectThroughInnerJoin;
+import
org.apache.doris.nereids.rules.exploration.join.PushdownProjectThroughSemiJoin;
import
org.apache.doris.nereids.rules.exploration.join.SemiJoinLogicalJoinTranspose;
import
org.apache.doris.nereids.rules.exploration.join.SemiJoinLogicalJoinTransposeProject;
import
org.apache.doris.nereids.rules.exploration.join.SemiJoinSemiJoinTranspose;
@@ -86,12 +88,14 @@ public class RuleSet {
.add(SemiJoinLogicalJoinTransposeProject.LEFT_DEEP)
.add(SemiJoinSemiJoinTranspose.INSTANCE)
.add(SemiJoinSemiJoinTransposeProject.INSTANCE)
+ .add(PushdownProjectThroughInnerJoin.INSTANCE)
+ .add(PushdownProjectThroughSemiJoin.INSTANCE)
.build();
/**
* This rule need to be shadow in DPHyp
*/
- public static final List<Rule> JOINORDER_RULE = planRuleFactories()
+ public static final List<Rule> JOIN_REORDER_RULE = planRuleFactories()
.add(JoinCommute.ZIG_ZAG)
.add(InnerJoinLAsscom.INSTANCE)
.add(InnerJoinLAsscomProject.INSTANCE)
@@ -175,20 +179,18 @@ public class RuleSet {
public List<Rule> getExplorationRules() {
List<Rule> rules = new ArrayList<>();
- rules.addAll(JOINORDER_RULE);
+ rules.addAll(JOIN_REORDER_RULE);
rules.addAll(OTHER_REORDER_RULES);
rules.addAll(EXPLORATION_RULES);
return rules;
}
public List<Rule> getExplorationRulesWithoutReorder() {
- List<Rule> rules = new ArrayList<>();
- rules.addAll(EXPLORATION_RULES);
- return rules;
+ return new ArrayList<>(EXPLORATION_RULES);
}
public List<Rule> getJoinOrderRule() {
- return JOINORDER_RULE;
+ return JOIN_REORDER_RULE;
}
public List<Rule> getImplementationRules() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java
index b48e429ccf..b55c135dee 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java
@@ -65,10 +65,11 @@ public class InnerJoinLAsscomProject extends
OneExplorationRuleFactory {
GroupPlan a = bottomJoin.left();
GroupPlan b = bottomJoin.right();
GroupPlan c = topJoin.right();
+ Set<ExprId> bExprIdSet = b.getOutputExprIdSet();
Set<ExprId> cExprIdSet = c.getOutputExprIdSet();
/* ********** Split projects ********** */
- Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProjection(projects, b);
+ Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProject(projects, bExprIdSet);
List<NamedExpression> aProjects = map.get(false);
List<NamedExpression> bProjects = map.get(true);
if (aProjects.isEmpty()) {
@@ -76,14 +77,13 @@ public class InnerJoinLAsscomProject extends
OneExplorationRuleFactory {
}
/* ********** split HashConjuncts ********** */
- Set<ExprId> bExprIdSet =
JoinReorderUtils.combineProjectAndChildExprId(b, bProjects);
- Map<Boolean, List<Expression>> splitHashConjuncts =
splitConjunctsWithAlias(
+ Map<Boolean, List<Expression>> splitHashConjuncts =
splitConjuncts(
topJoin.getHashJoinConjuncts(),
bottomJoin.getHashJoinConjuncts(), bExprIdSet);
List<Expression> newTopHashConjuncts =
splitHashConjuncts.get(true);
List<Expression> newBottomHashConjuncts =
splitHashConjuncts.get(false);
/* ********** split OtherConjuncts ********** */
- Map<Boolean, List<Expression>> splitOtherConjuncts =
splitConjunctsWithAlias(
+ Map<Boolean, List<Expression>> splitOtherConjuncts =
splitConjuncts(
topJoin.getOtherJoinConjuncts(),
bottomJoin.getOtherJoinConjuncts(), bExprIdSet);
List<Expression> newTopOtherConjuncts =
splitOtherConjuncts.get(true);
List<Expression> newBottomOtherConjuncts =
splitOtherConjuncts.get(false);
@@ -93,16 +93,15 @@ public class InnerJoinLAsscomProject extends
OneExplorationRuleFactory {
}
// Add all slots used by OnCondition when projects not
empty.
- Set<ExprId> aExprIdSet =
JoinReorderUtils.combineProjectAndChildExprId(a, aProjects);
Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat(
newTopHashConjuncts.stream(),
newTopOtherConjuncts.stream())
.flatMap(onExpr -> onExpr.getInputSlots().stream())
.filter(slot ->
!cExprIdSet.contains(slot.getExprId()))
.collect(Collectors.partitioningBy(
- slot ->
aExprIdSet.contains(slot.getExprId()), Collectors.toSet()));
- JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true),
aProjects);
-
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects);
+ slot ->
bExprIdSet.contains(slot.getExprId()), Collectors.toSet()));
+
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), aProjects);
+ JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true),
bProjects);
aProjects.addAll(c.getOutput());
@@ -130,14 +129,14 @@ public class InnerJoinLAsscomProject extends
OneExplorationRuleFactory {
* True: contains B.
* False: just contains A C.
*/
- private Map<Boolean, List<Expression>>
splitConjunctsWithAlias(List<Expression> topConjuncts,
+ private Map<Boolean, List<Expression>> splitConjuncts(List<Expression>
topConjuncts,
List<Expression> bottomConjuncts, Set<ExprId> bExprIdSet) {
// top: (A B)(error) (A C) (B C) (A B C)
// Split topJoin Condition to two part according to include B.
Map<Boolean, List<Expression>> splitOn = topConjuncts.stream()
.collect(Collectors.partitioningBy(topHashOn -> {
Set<ExprId> usedExprIds = topHashOn.getInputSlotExprIds();
- return Utils.isIntersecting(bExprIdSet, usedExprIds);
+ return Utils.isIntersecting(usedExprIds, bExprIdSet);
}));
// * don't include B, just include (A C)
// we add it into newBottomJoin HashConjuncts.
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java
index 36c71cf01f..916ccdc6bb 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java
@@ -18,7 +18,6 @@
package org.apache.doris.nereids.rules.exploration.join;
import org.apache.doris.nereids.trees.expressions.ExprId;
-import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.GroupPlan;
@@ -26,13 +25,10 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
-import com.google.common.collect.ImmutableList;
-
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
/**
* Common
@@ -42,22 +38,19 @@ class JoinReorderUtils {
return project.getProjects().stream().allMatch(expr -> expr instanceof
Slot);
}
- static Map<Boolean, List<NamedExpression>>
splitProjection(List<NamedExpression> projects, Plan splitChild) {
- Set<ExprId> splitExprIds = splitChild.getOutputExprIdSet();
-
+ /**
+ * Split project according to whether namedExpr contains by
splitChildExprIds.
+ * Notice: projects must all be Slot.
+ */
+ static Map<Boolean, List<NamedExpression>>
splitProject(List<NamedExpression> projects,
+ Set<ExprId> splitChildExprIds) {
return projects.stream()
- .collect(Collectors.partitioningBy(projectExpr -> {
- Set<ExprId> usedExprIds =
projectExpr.getInputSlotExprIds();
- return splitExprIds.containsAll(usedExprIds);
+ .collect(Collectors.partitioningBy(expr -> {
+ Slot slot = (Slot) expr;
+ return splitChildExprIds.contains(slot.getExprId());
}));
}
- public static Set<ExprId> combineProjectAndChildExprId(Plan b,
List<NamedExpression> bProject) {
- return Stream.concat(
- b.getOutput().stream().map(NamedExpression::getExprId),
-
bProject.stream().map(NamedExpression::getExprId)).collect(Collectors.toSet());
- }
-
/**
* If projectExprs is empty or project output equal plan output, return
the original plan.
*/
@@ -76,23 +69,6 @@ class JoinReorderUtils {
return new LogicalProject<>(projectExprs, plan);
}
- /**
- * replace JoinConjuncts by using slots map.
- */
- public static List<Expression> replaceJoinConjuncts(List<Expression>
joinConjuncts,
- Map<ExprId, Slot> replaceMaps) {
- return joinConjuncts.stream()
- .map(expr ->
- expr.rewriteUp(e -> {
- if (e instanceof Slot &&
replaceMaps.containsKey(((Slot) e).getExprId())) {
- return replaceMaps.get(((Slot) e).getExprId());
- } else {
- return e;
- }
- })
- ).collect(ImmutableList.toImmutableList());
- }
-
/**
* When project not empty, we add all slots used by hashOnCondition into
projects.
*/
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java
index 06421c9c56..c86361f231 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java
@@ -66,9 +66,10 @@ public class OuterJoinAssocProject extends
OneExplorationRuleFactory {
GroupPlan a = bottomJoin.left();
GroupPlan b = bottomJoin.right();
GroupPlan c = topJoin.right();
+ Set<ExprId> aOutputExprIds = a.getOutputExprIdSet();
/* ********** Split projects ********** */
- Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProjection(projects, a);
+ Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProject(projects, aOutputExprIds);
List<NamedExpression> aProjects = map.get(true);
List<NamedExpression> bProjects = map.get(false);
if (bProjects.isEmpty()) {
@@ -84,13 +85,12 @@ public class OuterJoinAssocProject extends
OneExplorationRuleFactory {
}
// Add all slots used by OnCondition when projects not
empty.
- Set<ExprId> aExprIdSet =
JoinReorderUtils.combineProjectAndChildExprId(a, aProjects);
Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat(
bottomJoin.getHashJoinConjuncts().stream(),
bottomJoin.getHashJoinConjuncts().stream())
.flatMap(onExpr -> onExpr.getInputSlots().stream())
.collect(Collectors.partitioningBy(
- slot ->
aExprIdSet.contains(slot.getExprId()), Collectors.toSet()));
+ slot ->
aOutputExprIds.contains(slot.getExprId()), Collectors.toSet()));
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true),
aProjects);
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java
index 2661947189..96e77d7ab9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java
@@ -68,9 +68,10 @@ public class OuterJoinLAsscomProject extends
OneExplorationRuleFactory {
GroupPlan a = bottomJoin.left();
GroupPlan b = bottomJoin.right();
GroupPlan c = topJoin.right();
+ Set<ExprId> aOutputExprIds = a.getOutputExprIdSet();
/* ********** Split projects ********** */
- Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProjection(projects, a);
+ Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProject(projects, aOutputExprIds);
List<NamedExpression> aProjects = map.get(true);
if (aProjects.isEmpty()) {
return null;
@@ -86,13 +87,12 @@ public class OuterJoinLAsscomProject extends
OneExplorationRuleFactory {
}
// Add all slots used by OnCondition when projects not
empty.
- Set<ExprId> aExprIdSet =
JoinReorderUtils.combineProjectAndChildExprId(a, aProjects);
Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat(
bottomJoin.getHashJoinConjuncts().stream(),
bottomJoin.getHashJoinConjuncts().stream())
.flatMap(onExpr -> onExpr.getInputSlots().stream())
.collect(Collectors.partitioningBy(
- slot ->
aExprIdSet.contains(slot.getExprId()), Collectors.toSet()));
+ slot ->
aOutputExprIds.contains(slot.getExprId()), Collectors.toSet()));
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true),
aProjects);
JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java
index c46f2252a2..6e9fa8f130 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java
@@ -32,7 +32,6 @@ import com.google.common.collect.ImmutableList.Builder;
import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@@ -52,6 +51,7 @@ public class PushdownProjectThroughInnerJoin extends
OneExplorationRuleFactory {
@Override
public Rule build() {
return logicalProject(logicalJoin())
+ .whenNot(JoinReorderUtils::isAllSlotProject)
.when(project -> project.child().getJoinType().isInnerJoin())
.whenNot(project -> project.child().hasJoinHint())
.then(project -> {
@@ -68,10 +68,16 @@ public class PushdownProjectThroughInnerJoin extends
OneExplorationRuleFactory {
return null;
}
- Map<Boolean, List<NamedExpression>> map =
JoinReorderUtils.splitProjection(project.getProjects(),
- join.left());
- List<NamedExpression> aProjects = map.get(true);
- List<NamedExpression> bProjects = map.get(false);
+ List<NamedExpression> aProjects = new ArrayList<>();
+ List<NamedExpression> bProjects = new ArrayList<>();
+ for (NamedExpression namedExpression : project.getProjects()) {
+ Set<ExprId> usedExprIds =
namedExpression.getInputSlotExprIds();
+ if (aOutputExprIdSet.containsAll(usedExprIds)) {
+ aProjects.add(namedExpression);
+ } else {
+ bProjects.add(namedExpression);
+ }
+ }
boolean leftContains = aProjects.stream().anyMatch(e -> !(e
instanceof Slot));
boolean rightContains = bProjects.stream().anyMatch(e -> !(e
instanceof Slot));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]