HIVE-11197 : While extracting join conditions follow Hive rules for type conversion instead of Calcite (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20f2c29f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20f2c29f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20f2c29f Branch: refs/heads/spark Commit: 20f2c29f42725c0dd82acc5e3d170d7423003b47 Parents: b61e6b5 Author: Ashutosh Chauhan <hashut...@apache.org> Authored: Fri Jul 10 08:40:00 2015 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Fri Jul 10 08:40:00 2015 -0700 ---------------------------------------------------------------------- .../ql/optimizer/calcite/HiveCalciteUtil.java | 25 ++++---- .../ql/optimizer/calcite/HiveRelOptUtil.java | 36 +++++------ .../calcite/cost/HiveOnTezCostModel.java | 25 ++++++-- .../calcite/reloperators/HiveJoin.java | 11 ++-- .../calcite/reloperators/HiveMultiJoin.java | 13 +++- .../rules/HiveInsertExchange4JoinRule.java | 13 +++- .../calcite/rules/HiveJoinAddNotNullRule.java | 16 +++-- .../calcite/rules/HiveJoinToMultiJoinRule.java | 64 +++++++------------- .../calcite/stats/HiveRelMdSelectivity.java | 11 ++-- 9 files changed, 120 insertions(+), 94 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 024097e..0200506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -339,25 +339,25 @@ public class HiveCalciteUtil { return this.mapOfProjIndxInJoinSchemaToLeafPInfo; } - public static JoinPredicateInfo constructJoinPredicateInfo(Join j) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j) throws CalciteSemanticException { return constructJoinPredicateInfo(j, j.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj) { + public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj) throws CalciteSemanticException { return constructJoinPredicateInfo(mj, mj.getCondition()); } - public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(Join j, RexNode predicate) throws CalciteSemanticException { return constructJoinPredicateInfo(j.getInputs(), j.getSystemFieldList(), predicate); } - public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj, RexNode predicate) { + public static JoinPredicateInfo constructJoinPredicateInfo(HiveMultiJoin mj, RexNode predicate) throws CalciteSemanticException { final List<RelDataTypeField> systemFieldList = ImmutableList.of(); return constructJoinPredicateInfo(mj.getInputs(), systemFieldList, predicate); } public static JoinPredicateInfo constructJoinPredicateInfo(List<RelNode> inputs, - List<RelDataTypeField> systemFieldList, RexNode predicate) { + List<RelDataTypeField> systemFieldList, RexNode predicate) throws CalciteSemanticException { JoinPredicateInfo jpi = null; JoinLeafPredicateInfo jlpi = null; List<JoinLeafPredicateInfo> equiLPIList = new ArrayList<JoinLeafPredicateInfo>(); @@ -504,7 +504,7 @@ public class HiveCalciteUtil { // split accordingly. If the join condition is not part of the equi-join predicate, // the returned object will be typed as SQLKind.OTHER. private static JoinLeafPredicateInfo constructJoinLeafPredicateInfo(List<RelNode> inputs, - List<RelDataTypeField> systemFieldList, RexNode pe) { + List<RelDataTypeField> systemFieldList, RexNode pe) throws CalciteSemanticException { JoinLeafPredicateInfo jlpi = null; List<Integer> filterNulls = new ArrayList<Integer>(); List<List<RexNode>> joinExprs = new ArrayList<List<RexNode>>(); @@ -513,7 +513,7 @@ public class HiveCalciteUtil { } // 1. Split leaf join predicate to expressions from left, right - RexNode otherConditions = HiveRelOptUtil.splitJoinCondition(systemFieldList, inputs, pe, + RexNode otherConditions = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, inputs, pe, joinExprs, filterNulls, null); if (otherConditions.isAlwaysTrue()) { @@ -689,7 +689,7 @@ public class HiveCalciteUtil { public static ImmutableList<RexNode> getInputRef(List<Integer> inputRefs, RelNode inputRel) { ImmutableList.Builder<RexNode> bldr = ImmutableList.<RexNode> builder(); for (int i : inputRefs) { - bldr.add(new RexInputRef(i, (RelDataType) inputRel.getRowType().getFieldList().get(i).getType())); + bldr.add(new RexInputRef(i, inputRel.getRowType().getFieldList().get(i).getType())); } return bldr.build(); } @@ -697,7 +697,7 @@ public class HiveCalciteUtil { public static ExprNodeDesc getExprNode(Integer inputRefIndx, RelNode inputRel, ExprNodeConverter exprConv) { ExprNodeDesc exprNode = null; - RexNode rexInputRef = new RexInputRef(inputRefIndx, (RelDataType) inputRel.getRowType() + RexNode rexInputRef = new RexInputRef(inputRefIndx, inputRel.getRowType() .getFieldList().get(inputRefIndx).getType()); exprNode = rexInputRef.accept(exprConv); @@ -723,9 +723,9 @@ public class HiveCalciteUtil { for (Integer iRef : inputRefs) { fieldNames.add(schemaNames.get(iRef)); } - + return fieldNames; - } + } /** * Walks over an expression and determines whether it is constant. @@ -789,12 +789,13 @@ public class HiveCalciteUtil { private static class InputRefsCollector extends RexVisitorImpl<Void> { - private Set<Integer> inputRefSet = new HashSet<Integer>(); + private final Set<Integer> inputRefSet = new HashSet<Integer>(); private InputRefsCollector(boolean deep) { super(deep); } + @Override public Void visitInputRef(RexInputRef inputRef) { inputRefSet.add(inputRef.getIndex()); return null; http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9ebb24f..ab793f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -7,7 +7,6 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; @@ -17,11 +16,13 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Util; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import com.google.common.collect.ImmutableList; public class HiveRelOptUtil extends RelOptUtil { @@ -48,14 +49,15 @@ public class HiveRelOptUtil extends RelOptUtil { * join predicate are at the end of the key lists * returned * @return What's left, never null + * @throws CalciteSemanticException */ - public static RexNode splitJoinCondition( + public static RexNode splitHiveJoinCondition( List<RelDataTypeField> sysFieldList, List<RelNode> inputs, RexNode condition, List<List<RexNode>> joinKeys, List<Integer> filterNulls, - List<SqlOperator> rangeOp) { + List<SqlOperator> rangeOp) throws CalciteSemanticException { final List<RexNode> nonEquiList = new ArrayList<>(); splitJoinCondition( @@ -79,11 +81,10 @@ public class HiveRelOptUtil extends RelOptUtil { List<List<RexNode>> joinKeys, List<Integer> filterNulls, List<SqlOperator> rangeOp, - List<RexNode> nonEquiList) { + List<RexNode> nonEquiList) throws CalciteSemanticException { final int sysFieldCount = sysFieldList.size(); final RelOptCluster cluster = inputs.get(0).getCluster(); final RexBuilder rexBuilder = cluster.getRexBuilder(); - final RelDataTypeFactory typeFactory = cluster.getTypeFactory(); final ImmutableBitSet[] inputsRange = new ImmutableBitSet[inputs.size()]; int totalFieldCount = 0; @@ -199,24 +200,25 @@ public class HiveRelOptUtil extends RelOptUtil { RelDataType rightKeyType = rightKey.getType(); if (leftKeyType != rightKeyType) { - // perform casting - RelDataType targetKeyType = - typeFactory.leastRestrictive( - ImmutableList.of(leftKeyType, rightKeyType)); + // perform casting using Hive rules + TypeInfo rType = TypeConverter.convert(rightKeyType); + TypeInfo lType = TypeConverter.convert(leftKeyType); + TypeInfo tgtType = FunctionRegistry.getCommonClassForComparison(lType, rType); - if (targetKeyType == null) { - throw Util.newInternal( + if (tgtType == null) { + throw new CalciteSemanticException( "Cannot find common type for join keys " - + leftKey + " (type " + leftKeyType + ") and " - + rightKey + " (type " + rightKeyType + ")"); + + leftKey + " (type " + leftKeyType + ") and " + + rightKey + " (type " + rightKeyType + ")"); } + RelDataType targetKeyType = TypeConverter.convert(tgtType, rexBuilder.getTypeFactory()); - if (leftKeyType != targetKeyType) { + if (leftKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, lType)) { leftKey = rexBuilder.makeCast(targetKeyType, leftKey); } - if (rightKeyType != targetKeyType) { + if (rightKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, rType)) { rightKey = rexBuilder.makeCast(targetKeyType, rightKey); } http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java index fb67309..e9f1d96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveOnTezCostModel.java @@ -29,7 +29,10 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; @@ -48,6 +51,8 @@ public class HiveOnTezCostModel extends HiveCostModel { private static HiveAlgorithmsUtil algoUtils; + private static transient final Log LOG = LogFactory.getLog(HiveOnTezCostModel.class); + synchronized public static HiveOnTezCostModel getCostModel(HiveConf conf) { if (INSTANCE == null) { INSTANCE = new HiveOnTezCostModel(conf); @@ -136,7 +141,13 @@ public class HiveOnTezCostModel extends HiveCostModel { add(leftRCount). add(rightRCount). build(); - final double cpuCost = algoUtils.computeSortMergeCPUCost(cardinalities, join.getSortedInputs()); + double cpuCost; + try { + cpuCost = algoUtils.computeSortMergeCPUCost(cardinalities, join.getSortedInputs()); + } catch (CalciteSemanticException e) { + LOG.trace("Failed to compute sort merge cpu cost ", e); + return null; + } // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to join + // cost of transferring map outputs to Join operator @@ -183,7 +194,7 @@ public class HiveOnTezCostModel extends HiveCostModel { if (memoryWithinPhase == null || splitCount == null) { return null; } - + return memoryWithinPhase / splitCount; } @@ -289,7 +300,7 @@ public class HiveOnTezCostModel extends HiveCostModel { if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION || join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) { return null; - } + } return HiveAlgorithmsUtil.getJoinDistribution(join.getJoinPredicateInfo(), join.getStreamingSide()); } @@ -521,7 +532,13 @@ public class HiveOnTezCostModel extends HiveCostModel { for (int i=0; i<join.getInputs().size(); i++) { RelNode input = join.getInputs().get(i); // Is smbJoin possible? We need correct order - boolean orderFound = join.getSortedInputs().get(i); + boolean orderFound; + try { + orderFound = join.getSortedInputs().get(i); + } catch (CalciteSemanticException e) { + LOG.trace("Not possible to do SMB Join ",e); + return false; + } if (!orderFound) { return false; } http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java index 668960e..6814df6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java @@ -41,6 +41,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableIntList; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; @@ -51,7 +52,7 @@ import com.google.common.collect.ImmutableList; //TODO: Should we convert MultiJoin to be a child of HiveJoin public class HiveJoin extends Join implements HiveRelNode { - + public static final JoinFactory HIVE_JOIN_FACTORY = new HiveJoinFactoryImpl(); public enum MapJoinStreamingRelation { @@ -71,14 +72,14 @@ public class HiveJoin extends Join implements HiveRelNode { HiveJoin join = new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, DefaultJoinAlgorithm.INSTANCE, leftSemiJoin); return join; - } catch (InvalidRelException e) { + } catch (InvalidRelException | CalciteSemanticException e) { throw new RuntimeException(e); } } protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, Set<String> variablesStopped, - JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException { + JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException, CalciteSemanticException { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, variablesStopped); this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); @@ -97,7 +98,7 @@ public class HiveJoin extends Join implements HiveRelNode { Set<String> variablesStopped = Collections.emptySet(); return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType, variablesStopped, joinAlgorithm, leftSemiJoin); - } catch (InvalidRelException e) { + } catch (InvalidRelException | CalciteSemanticException e) { // Semantic error not possible. Must be a bug. Convert to // internal error. throw new AssertionError(e); @@ -170,7 +171,7 @@ public class HiveJoin extends Join implements HiveRelNode { return smallInput; } - public ImmutableBitSet getSortedInputs() { + public ImmutableBitSet getSortedInputs() throws CalciteSemanticException { ImmutableBitSet.Builder sortedInputsBuilder = new ImmutableBitSet.Builder(); JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. constructJoinPredicateInfo(this); http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java index 911ceda..7a43f29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java @@ -31,6 +31,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; @@ -60,7 +61,7 @@ public final class HiveMultiJoin extends AbstractRelNode { * @param inputs inputs into this multi-join * @param condition join filter applicable to this join node * @param rowType row type of the join result of this node - * @param joinInputs + * @param joinInputs * @param joinTypes the join type corresponding to each input; if * an input is null-generating in a left or right * outer join, the entry indicates the type of @@ -84,7 +85,11 @@ public final class HiveMultiJoin extends AbstractRelNode { this.joinTypes = ImmutableList.copyOf(joinTypes); this.outerJoin = containsOuter(); - this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); + try { + this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); + } catch (CalciteSemanticException e) { + throw new RuntimeException(e); + } } @@ -105,6 +110,7 @@ public final class HiveMultiJoin extends AbstractRelNode { joinTypes); } + @Override public RelWriter explainTerms(RelWriter pw) { List<String> joinsString = new ArrayList<String>(); for (int i = 0; i < joinInputs.size(); i++) { @@ -122,10 +128,12 @@ public final class HiveMultiJoin extends AbstractRelNode { .item("joinsDescription", joinsString); } + @Override public RelDataType deriveRowType() { return rowType; } + @Override public List<RelNode> getInputs() { return inputs; } @@ -134,6 +142,7 @@ public final class HiveMultiJoin extends AbstractRelNode { return ImmutableList.of(condition); } + @Override public RelNode accept(RexShuttle shuttle) { RexNode joinFilter = shuttle.apply(this.condition); http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java index c5ab055..39c69a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rex.RexNode; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; @@ -75,10 +76,18 @@ public class HiveInsertExchange4JoinRule extends RelOptRule { JoinPredicateInfo joinPredInfo; if (call.rel(0) instanceof HiveMultiJoin) { HiveMultiJoin multiJoin = call.rel(0); - joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(multiJoin); + try { + joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(multiJoin); + } catch (CalciteSemanticException e) { + throw new RuntimeException(e); + } } else if (call.rel(0) instanceof Join) { Join join = call.rel(0); - joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + try { + joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + } catch (CalciteSemanticException e) { + throw new RuntimeException(e); + } } else { return; } http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index a4484ec..c4a40bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -39,6 +39,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; @@ -46,12 +47,13 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; import org.apache.hadoop.hive.ql.parse.SemanticException; +import com.esotericsoftware.minlog.Log; import com.google.common.collect.ImmutableList; public final class HiveJoinAddNotNullRule extends RelOptRule { private static final String NOT_NULL_FUNC_NAME = "isnotnull"; - + /** The singleton. */ public static final HiveJoinAddNotNullRule INSTANCE = new HiveJoinAddNotNullRule(HiveFilter.DEFAULT_FILTER_FACTORY); @@ -72,6 +74,7 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { //~ Methods ---------------------------------------------------------------- + @Override public void onMatch(RelOptRuleCall call) { final Join join = call.rel(0); RelNode leftInput = call.rel(1); @@ -85,8 +88,13 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { return; } - JoinPredicateInfo joinPredInfo = - HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + JoinPredicateInfo joinPredInfo; + try { + joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join); + } catch (CalciteSemanticException e) { + Log.trace("Failed to add is not null filter on join ", e); + return; + } Set<Integer> joinLeftKeyPositions = new HashSet<Integer>(); Set<Integer> joinRightKeyPositions = new HashSet<Integer>(); @@ -133,7 +141,7 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { call.transformTo(newJoin); } - + private static Map<String,RexNode> getNotNullConditions(RelOptCluster cluster, RexBuilder rexBuilder, RelNode input, Set<Integer> inputKeyPositions) { http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java index c5e0e11..a0144f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinToMultiJoinRule.java @@ -22,7 +22,6 @@ import java.util.List; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; @@ -35,6 +34,9 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; @@ -56,6 +58,7 @@ public class HiveJoinToMultiJoinRule extends RelOptRule { private final ProjectFactory projectFactory; + private static transient final Log LOG = LogFactory.getLog(HiveJoinToMultiJoinRule.class); //~ Constructors ----------------------------------------------------------- @@ -142,8 +145,14 @@ public class HiveJoinToMultiJoinRule extends RelOptRule { leftJoinTypes = hmj.getJoinTypes(); } - boolean combinable = isCombinablePredicate(join, join.getCondition(), - leftCondition); + boolean combinable; + try { + combinable = isCombinablePredicate(join, join.getCondition(), + leftCondition); + } catch (CalciteSemanticException e) { + LOG.trace("Failed to merge joins", e); + combinable = false; + } if (combinable) { newJoinFilters.add(leftCondition); for (int i = 0; i < leftJoinInputs.size(); i++) { @@ -172,8 +181,14 @@ public class HiveJoinToMultiJoinRule extends RelOptRule { for (int i=0; i<newInputs.size(); i++) { joinKeyExprs.add(new ArrayList<RexNode>()); } - RexNode otherCondition = HiveRelOptUtil.splitJoinCondition(systemFieldList, newInputs, join.getCondition(), - joinKeyExprs, filterNulls, null); + RexNode otherCondition; + try { + otherCondition = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), + joinKeyExprs, filterNulls, null); + } catch (CalciteSemanticException e) { + LOG.trace("Failed to merge joins", e); + return null; + } // If there are remaining parts in the condition, we bail out if (!otherCondition.isAlwaysTrue()) { return null; @@ -221,7 +236,7 @@ public class HiveJoinToMultiJoinRule extends RelOptRule { } private static boolean isCombinablePredicate(Join join, - RexNode condition, RexNode otherCondition) { + RexNode condition, RexNode otherCondition) throws CalciteSemanticException { final JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. constructJoinPredicateInfo(join, condition); final JoinPredicateInfo otherJoinPredInfo = HiveCalciteUtil.JoinPredicateInfo. @@ -236,41 +251,4 @@ public class HiveJoinToMultiJoinRule extends RelOptRule { } return true; } - - /** - * Shifts a filter originating from the right child of the LogicalJoin to the - * right, to reflect the filter now being applied on the resulting - * MultiJoin. - * - * @param joinRel the original LogicalJoin - * @param left the left child of the LogicalJoin - * @param right the right child of the LogicalJoin - * @param rightFilter the filter originating from the right child - * @return the adjusted right filter - */ - private static RexNode shiftRightFilter( - Join joinRel, - RelNode left, - RelNode right, - RexNode rightFilter) { - if (rightFilter == null) { - return null; - } - - int nFieldsOnLeft = left.getRowType().getFieldList().size(); - int nFieldsOnRight = right.getRowType().getFieldList().size(); - int[] adjustments = new int[nFieldsOnRight]; - for (int i = 0; i < nFieldsOnRight; i++) { - adjustments[i] = nFieldsOnLeft; - } - rightFilter = - rightFilter.accept( - new RelOptUtil.RexInputConverter( - joinRel.getCluster().getRexBuilder(), - right.getRowType().getFieldList(), - joinRel.getRowType().getFieldList(), - adjustments)); - return rightFilter; - } - } http://git-wip-us.apache.org/repos/asf/hive/blob/20f2c29f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 960ec40..715f24f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; @@ -57,14 +58,14 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { return 1.0; } - public Double getSelectivity(HiveJoin j, RexNode predicate) { + public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { if (j.getJoinType().equals(JoinRelType.INNER)) { return computeInnerJoinSelectivity(j, predicate); } return 1.0; } - private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) { + private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { double ndvCrossProduct = 1; Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate); @@ -183,7 +184,7 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { } /** - * + * * @param j * @param additionalPredicate * @return if predicate is the join condition return (true, joinCond) @@ -206,7 +207,7 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { /** * Compute Max NDV to determine Join Selectivity. - * + * * @param jlpi * @param colStatMap * Immutable Map of Projection Index (in Join Schema) to Column Stat @@ -238,5 +239,5 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { return maxNDVSoFar; } - + }