This is an automated email from the ASF dual-hosted git repository. hyuan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push: new 640da7c [CALCITE-4097] Avoid requesting unnecessary trait request when deriving traits 640da7c is described below commit 640da7c6d85c3e83fe38fb45d7f23ef5e1000c4e Author: Haisheng Yuan <h.y...@alibaba-inc.com> AuthorDate: Tue Jun 30 20:12:25 2020 -0500 [CALCITE-4097] Avoid requesting unnecessary trait request when deriving traits If the child subset is used to derive new traits for current relnode, the subset will be marked REQUIRED when registering the new derived relnode and later will add enforcers between other delivered subsets. e.g. a MergeJoin request both inputs hash distributed by [a,b] sorted by [a,b]. If the left input R1 happens to be distributed by [a], the MergeJoin can derive new traits from this input and request both input to be distributed by [a] sorted by [a,b]. In case there is a alternative R2 with ANY distribution in the left input's RelSet, we end up with requesting hash distribution [a] on alternative R2, which is unnecessary and waste, because we request distribution by [a] because of R1 can deliver the exact same distribution and we don't need to enforce properties on other subsets that can't satisfy the specific trait requirement. --- .../apache/calcite/plan/volcano/OptimizeTask.java | 29 +++++++++++++++++++++- .../org/apache/calcite/plan/volcano/RelSet.java | 8 +++--- .../org/apache/calcite/plan/volcano/RelSubset.java | 17 +++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java b/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java index 71fb037..c72e2b8 100644 --- a/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java +++ b/core/src/main/java/org/apache/calcite/plan/volcano/OptimizeTask.java @@ -199,7 +199,6 @@ abstract class OptimizeTask { RelSubset subset = input.set.subsets.get(j); if (!subset.isDelivered() || equalsSansConvention( subset.getTraitSet(), rel.getCluster().traitSet())) { - // TODO: should use matching type to determine // Ideally we should stop deriving new relnodes when the // subset's traitSet equals with input traitSet, but // in case someone manually builds a physical relnode @@ -227,6 +226,34 @@ abstract class OptimizeTask { } else { RelNode newRel = rel.derive(subset.getTraitSet(), childId); if (newRel != null && !planner.isRegistered(newRel)) { + RelNode newInput = newRel.getInput(childId); + assert newInput instanceof RelSubset; + if (newInput == subset) { + // If the child subset is used to derive new traits for + // current relnode, the subset will be marked REQUIRED + // when registering the new derived relnode and later + // will add enforcers between other delivered subsets. + // e.g. a MergeJoin request both inputs hash distributed + // by [a,b] sorted by [a,b]. If the left input R1 happens to + // be distributed by [a], the MergeJoin can derive new + // traits from this input and request both input to be + // distributed by [a] sorted by [a,b]. In case there is a + // alternative R2 with ANY distribution in the left input's + // RelSet, we may end up with requesting hash distribution + // [a] on alternative R2, which is unnecessary and waste, + // because we request distribution by [a] because of R1 can + // deliver the exact same distribution and we don't need to + // enforce properties on other subsets that can't satisfy + // the specific trait requirement. + // Here we add a constraint that {@code newInput == subset}, + // because if the delivered child subset is HASH[a], but + // we require HASH[a].SORT[a,b], we still need to enable + // property enforcement on the required subset. Otherwise, + // we need to restrict enforcement between HASH[a].SORT[a,b] + // and HASH[a] only, which will make things a little bit + // complicated. We might optimize it in the future. + subset.disableEnforcing(); + } RelSubset relSubset = planner.register(newRel, node); assert relSubset.set == planner.getSubset(node).set; } diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java b/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java index 833aeba..5d20104 100644 --- a/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java +++ b/core/src/main/java/org/apache/calcite/plan/volcano/RelSet.java @@ -236,9 +236,11 @@ class RelSet { to = subset; } - if (from == to || useAbstractConverter - && !from.getConvention().useAbstractConvertersForConversion( - from.getTraitSet(), to.getTraitSet())) { + if (from == to + || to.isEnforceDisabled() + || useAbstractConverter + && !from.getConvention().useAbstractConvertersForConversion( + from.getTraitSet(), to.getTraitSet())) { continue; } diff --git a/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java b/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java index 72d423b..f77b5c7 100644 --- a/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java +++ b/core/src/main/java/org/apache/calcite/plan/volcano/RelSubset.java @@ -120,6 +120,14 @@ public class RelSubset extends AbstractRelNode { */ boolean triggerRule = false; + /** + * When the subset state is REQUIRED, whether enable property enforcing + * between this subset and other delivered subsets. When it is true, + * no enforcer operators will be added even if the other subset can't + * satisfy current subset's required traitSet. + */ + private boolean enforceDisabled = false; + //~ Constructors ----------------------------------------------------------- RelSubset( @@ -179,6 +187,15 @@ public class RelSubset extends AbstractRelNode { return (state & REQUIRED) == REQUIRED; } + void disableEnforcing() { + assert isDelivered(); + enforceDisabled = true; + } + + boolean isEnforceDisabled() { + return enforceDisabled; + } + public RelNode getBest() { return best; }