This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 76620c21aa [improvement](nereids) prune hash join output slot ids list
(#20789)
76620c21aa is described below
commit 76620c21aa5a0086565ba5c52ad78f733583356d
Author: xzj7019 <[email protected]>
AuthorDate: Wed Jun 28 17:28:18 2023 +0800
[improvement](nereids) prune hash join output slot ids list (#20789)
1. prune hash join output slot ids list based on slot ids in required
project and other conjunctions, to reduce the be side effort.
2. support pruning for semi/anti also
---
.../glue/translator/PhysicalPlanTranslator.java | 57 +++++++++++++++++-----
.../org/apache/doris/planner/HashJoinNode.java | 21 +++++++-
2 files changed, 65 insertions(+), 13 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 1f504af02f..4aec102644 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1067,6 +1067,10 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
sd =
context.getDescTable().copySlotDescriptor(intermediateDescriptor,
leftSlotDescriptor);
} else {
sd = context.createSlotDesc(intermediateDescriptor, sf);
+ if (hashOutputSlotReferenceMap.get(sf.getExprId()) !=
null) {
+
hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId());
+
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(),
leftSlotDescriptor.getId());
+ }
}
leftIntermediateSlotDescriptor.add(sd);
}
@@ -1083,6 +1087,10 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
sd =
context.getDescTable().copySlotDescriptor(intermediateDescriptor,
rightSlotDescriptor);
} else {
sd = context.createSlotDesc(intermediateDescriptor, sf);
+ if (hashOutputSlotReferenceMap.get(sf.getExprId()) !=
null) {
+
hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId());
+
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(),
rightSlotDescriptor.getId());
+ }
}
rightIntermediateSlotDescriptor.add(sd);
}
@@ -1100,6 +1108,7 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
sd = context.createSlotDesc(intermediateDescriptor, sf);
if (hashOutputSlotReferenceMap.get(sf.getExprId()) !=
null) {
hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId());
+
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(),
leftSlotDescriptor.getId());
}
}
leftIntermediateSlotDescriptor.add(sd);
@@ -1117,6 +1126,7 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
sd = context.createSlotDesc(intermediateDescriptor, sf);
if (hashOutputSlotReferenceMap.get(sf.getExprId()) !=
null) {
hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId());
+
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(),
rightSlotDescriptor.getId());
}
}
rightIntermediateSlotDescriptor.add(sd);
@@ -1124,8 +1134,15 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
}
if (hashJoin.getMarkJoinSlotReference().isPresent()) {
-
outputSlotReferences.add(hashJoin.getMarkJoinSlotReference().get());
- context.createSlotDesc(intermediateDescriptor,
hashJoin.getMarkJoinSlotReference().get());
+ SlotReference sf = hashJoin.getMarkJoinSlotReference().get();
+ outputSlotReferences.add(sf);
+ context.createSlotDesc(intermediateDescriptor, sf);
+ if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) {
+ SlotRef markJoinSlotId = context.findSlotRef(sf.getExprId());
+ Preconditions.checkState(markJoinSlotId != null);
+
hashJoinNode.addSlotIdToHashOutputSlotIds(markJoinSlotId.getSlotId());
+ hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(),
markJoinSlotId.getSlotId());
+ }
}
// set slots as nullable for outer join
@@ -1390,6 +1407,15 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
.map(NamedExpression::toSlot)
.collect(Collectors.toList());
+ List<Expr> predicateList = inputPlanNode.getConjuncts();
+ Set<SlotId> requiredSlotIdSet = Sets.newHashSet();
+ for (Expr expr : execExprList) {
+ extractExecSlot(expr, requiredSlotIdSet);
+ }
+ Set<SlotId> requiredByProjectSlotIdSet =
Sets.newHashSet(requiredSlotIdSet);
+ for (Expr expr : predicateList) {
+ extractExecSlot(expr, requiredSlotIdSet);
+ }
// For hash join node, use vSrcToOutputSMap to describe the expression
calculation, use
// vIntermediateTupleDescList as input, and set vOutputTupleDesc as
the final output.
// TODO: HashJoinNode's be implementation is not support projection
yet, remove this after when supported.
@@ -1398,17 +1424,26 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
JoinNodeBase hashJoinNode = (JoinNodeBase) inputPlanNode;
hashJoinNode.setvOutputTupleDesc(tupleDescriptor);
hashJoinNode.setvSrcToOutputSMap(execExprList);
+ // prune the hashOutputSlotIds
+ if (hashJoinNode instanceof HashJoinNode) {
+ ((HashJoinNode) hashJoinNode).getHashOutputSlotIds().clear();
+ Set<ExprId> requiredExprIds = Sets.newHashSet();
+ Set<SlotId> requiredOtherConjunctsSlotIdSet =
Sets.newHashSet();
+ List<Expr> otherConjuncts = ((HashJoinNode)
hashJoinNode).getOtherJoinConjuncts();
+ for (Expr expr : otherConjuncts) {
+ extractExecSlot(expr, requiredOtherConjunctsSlotIdSet);
+ }
+ requiredOtherConjunctsSlotIdSet.forEach(e ->
requiredExprIds.add(context.findExprId(e)));
+ requiredSlotIdSet.forEach(e ->
requiredExprIds.add(context.findExprId(e)));
+ for (ExprId exprId : requiredExprIds) {
+ SlotId slotId = ((HashJoinNode)
hashJoinNode).getHashOutputExprSlotIdMap().get(exprId);
+ Preconditions.checkState(slotId != null);
+ ((HashJoinNode)
hashJoinNode).addSlotIdToHashOutputSlotIds(slotId);
+ }
+ }
return inputFragment;
}
- List<Expr> predicateList = inputPlanNode.getConjuncts();
- Set<SlotId> requiredSlotIdSet = Sets.newHashSet();
- for (Expr expr : execExprList) {
- extractExecSlot(expr, requiredSlotIdSet);
- }
- Set<SlotId> requiredByProjectSlotIdSet =
Sets.newHashSet(requiredSlotIdSet);
- for (Expr expr : predicateList) {
- extractExecSlot(expr, requiredSlotIdSet);
- }
+
if (inputPlanNode instanceof TableFunctionNode) {
TableFunctionNode tableFunctionNode = (TableFunctionNode)
inputPlanNode;
tableFunctionNode.setOutputSlotIds(Lists.newArrayList(requiredSlotIdSet));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 1f816db3df..02418fc01a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.CheckedMath;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
+import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.thrift.TEqJoinCondition;
import org.apache.doris.thrift.TExplainLevel;
@@ -47,12 +48,14 @@ import org.apache.doris.thrift.TPlanNodeType;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -76,7 +79,9 @@ public class HashJoinNode extends JoinNodeBase {
private boolean isColocate = false; //the flag for colocate join
private String colocateReason = ""; // if can not do colocate join, set
reason here
- private List<SlotId> hashOutputSlotIds = new ArrayList<>(); //init for
nereids
+ private Set<SlotId> hashOutputSlotIds = Sets.newHashSet(); //init for
nereids
+
+ private Map<ExprId, SlotId> hashOutputExprSlotIdMap = Maps.newHashMap();
/**
* Constructor of HashJoinNode.
@@ -230,7 +235,15 @@ public class HashJoinNode extends JoinNodeBase {
Expr.getIds(otherJoinConjuncts, null, otherAndConjunctSlotIds);
Expr.getIds(conjuncts, null, otherAndConjunctSlotIds);
hashOutputSlotIdSet.addAll(otherAndConjunctSlotIds);
- hashOutputSlotIds = new ArrayList<>(hashOutputSlotIdSet);
+ hashOutputSlotIds = new HashSet<>(hashOutputSlotIdSet);
+ }
+
+ public Map<ExprId, SlotId> getHashOutputExprSlotIdMap() {
+ return hashOutputExprSlotIdMap;
+ }
+
+ public Set<SlotId> getHashOutputSlotIds() {
+ return hashOutputSlotIds;
}
@Override
@@ -808,6 +821,10 @@ public class HashJoinNode extends JoinNodeBase {
this.otherJoinConjuncts = otherJoinConjuncts;
}
+ public List<Expr> getOtherJoinConjuncts() {
+ return otherJoinConjuncts;
+ }
+
SlotRef getMappedInputSlotRef(SlotRef slotRef) {
if (outputSmap != null) {
Expr mappedExpr = outputSmap.mappingForRhsExpr(slotRef);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]