DRILL-1023: Fix issue where joins are over estimated causing excess parallelization. Use correct interfaces for retrieving row counts.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/a88ebb27 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/a88ebb27 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/a88ebb27 Branch: refs/heads/master Commit: a88ebb27d405305f41a72efe2fbee7305dc25ba8 Parents: 0dec032 Author: Jacques Nadeau <[email protected]> Authored: Tue Jun 17 16:02:54 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Wed Jun 18 21:37:46 2014 -0700 ---------------------------------------------------------------------- .../exec/planner/common/DrillJoinRelBase.java | 9 +++++++-- .../exec/planner/physical/AggPruleBase.java | 2 +- .../planner/physical/PhysicalPlanCreator.java | 2 +- .../exec/planner/physical/PlannerSettings.java | 6 ++++++ .../visitor/ExcessiveExchangeIdentifier.java | 3 +-- .../exec/server/options/SystemOptionManager.java | 1 + .../exec/server/options/TypeValidators.java | 19 +++++++++++++++++++ 7 files changed, 36 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java index 80f767c..3b3aa1a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillJoinRelBase.java @@ -22,6 +22,7 @@ import java.util.HashSet; import java.util.List; import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory; +import org.apache.drill.exec.planner.physical.PrelUtil; import org.eigenbase.rel.InvalidRelException; import org.eigenbase.rel.JoinRelBase; import org.eigenbase.rel.JoinRelType; @@ -41,10 +42,12 @@ import com.google.common.collect.Lists; public abstract class DrillJoinRelBase extends JoinRelBase implements DrillRelNode { protected List<Integer> leftKeys = Lists.newArrayList(); protected List<Integer> rightKeys = Lists.newArrayList() ; + private final double joinRowFactor; public DrillJoinRelBase(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition, JoinRelType joinType) throws InvalidRelException { super(cluster, traits, left, right, condition, joinType, Collections.<String> emptySet()); + this.joinRowFactor = PrelUtil.getPlannerSettings(cluster.getPlanner()).getRowCountEstimateFactor(); } @Override @@ -55,8 +58,10 @@ public abstract class DrillJoinRelBase extends JoinRelBase implements DrillRelNo return super.computeSelfCost(planner); } - - + @Override + public double getRows() { + return joinRowFactor * Math.max(this.getLeft().getRows(), this.getRight().getRows()); + } /** * Returns whether there are any elements in common between left and right. http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java index 1b1cc94..7b7e3b7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/AggPruleBase.java @@ -62,7 +62,7 @@ public abstract class AggPruleBase extends Prule { protected boolean create2PhasePlan(RelOptRuleCall call, DrillAggregateRel aggregate) { PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner()); RelNode child = call.rel(0).getInputs().get(0); - boolean smallInput = child.computeSelfCost(call.getPlanner()).getRows() < settings.getSliceTarget(); + boolean smallInput = child.getRows() < settings.getSliceTarget(); if (! settings.isMultiPhaseAggEnabled() || settings.isSingleMode() || smallInput) { return false; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java index bf1e51a..130ac87 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PhysicalPlanCreator.java @@ -60,7 +60,7 @@ public class PhysicalPlanCreator { public PhysicalOperator addMetadata(Prel originalPrel, PhysicalOperator op){ op.setOperatorId(opIdMap.get(originalPrel).getAsSingleInt()); - op.setCost(originalPrel.computeSelfCost(originalPrel.getCluster().getPlanner()).getRows()); + op.setCost(originalPrel.getRows()); return op; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java index edad125..e10b620 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PlannerSettings.java @@ -24,6 +24,7 @@ import org.apache.drill.exec.server.options.OptionManager; import org.apache.drill.exec.server.options.OptionValidator; import org.apache.drill.exec.server.options.TypeValidators.BooleanValidator; import org.apache.drill.exec.server.options.TypeValidators.PositiveLongValidator; +import org.apache.drill.exec.server.options.TypeValidators.RangeDoubleValidator; public class PlannerSettings implements FrameworkContext{ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlannerSettings.class); @@ -41,6 +42,7 @@ public class PlannerSettings implements FrameworkContext{ public static final OptionValidator MULTIPHASE = new BooleanValidator("planner.enable_multiphase_agg", true); public static final OptionValidator BROADCAST = new BooleanValidator("planner.enable_broadcast_join", true); public static final OptionValidator BROADCAST_THRESHOLD = new PositiveLongValidator("planner.broadcast_threshold", MAX_BROADCAST_THRESHOLD, 1000000); + public static final OptionValidator JOIN_ROW_COUNT_ESTIMATE_FACTOR = new RangeDoubleValidator("planner.join.row_count_estimate_factor", 0, 100, 1.0d); public OptionManager options = null; @@ -56,6 +58,10 @@ public class PlannerSettings implements FrameworkContext{ return numEndPoints; } + public double getRowCountEstimateFactor(){ + return options.getOption(JOIN_ROW_COUNT_ESTIMATE_FACTOR.getOptionName()).float_val; + } + public boolean useDefaultCosting() { return useDefaultCosting; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java index ae4d661..168fd28 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/ExcessiveExchangeIdentifier.java @@ -88,8 +88,7 @@ public class ExcessiveExchangeIdentifier extends BasePrelVisitor<Prel, Excessive private int maxWidth = Integer.MAX_VALUE; public void add(Prel prel){ - RelOptCost cost = prel.computeSelfCost(prel.getCluster().getPlanner()); - maxRows = Math.max(cost.getRows(), maxRows); + maxRows = Math.max(prel.getRows(), maxRows); } public void setSingular(){ http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index c950c5f..8503197 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -53,6 +53,7 @@ public class SystemOptionManager implements OptionManager{ PlannerSettings.MULTIPHASE, PlannerSettings.BROADCAST, PlannerSettings.BROADCAST_THRESHOLD, + PlannerSettings.JOIN_ROW_COUNT_ESTIMATE_FACTOR, ExecConstants.OUTPUT_FORMAT_VALIDATOR, ExecConstants.PARQUET_BLOCK_SIZE_VALIDATOR, ExecConstants.SLICE_TARGET_OPTION, http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/a88ebb27/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java index bc6a9d3..a90807c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/TypeValidators.java @@ -45,6 +45,25 @@ public class TypeValidators { } } + public static class RangeDoubleValidator extends DoubleValidator { + private final double min; + private final double max; + + public RangeDoubleValidator(String name, double def, double min, double max) { + super(name, def); + this.min = min; + this.max = max; + } + + @Override + public void extraValidate(OptionValue v) throws ExpressionParsingException { + if (v.float_val > max || v.float_val < min) + throw new ExpressionParsingException(String.format("Option %s must be between %d and %d.", getOptionName(), min, + max)); + } + + } + public static class BooleanValidator extends TypeValidator{ public BooleanValidator(String name, boolean def){ super(name, Kind.BOOLEAN, OptionValue.createBoolean(OptionType.SYSTEM, name, def));
