http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java index a231543..d7a83f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; @@ -68,10 +69,16 @@ public class GenMRFileSink1 implements NodeProcessor { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; - Task<? extends Serializable> currTask = ctx.getCurrTask(); + // we should look take the parent of fsOp's task as the current task. + FileSinkOperator fsOp = (FileSinkOperator) nd; + Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx + .getMapCurrCtx(); + GenMapRedCtx mapredCtx = mapCurrCtx.get(fsOp.getParentOperators().get(0)); + Task<? extends Serializable> currTask = mapredCtx.getCurrTask(); + + ctx.setCurrTask(currTask); ctx.addRootIfPossible(currTask); - FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE GenMapRedUtils.isInsertInto(parseCtx, fsOp); HiveConf hconf = parseCtx.getConf();
http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 38157a6..87fff3e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -131,11 +131,7 @@ import com.google.common.collect.Interner; * map-reduce tasks. */ public final class GenMapRedUtils { - private static Logger LOG; - - static { - LOG = LoggerFactory.getLogger("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils"); - } + private static final Logger LOG = LoggerFactory.getLogger("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils"); public static boolean needsTagging(ReduceWork rWork) { return rWork != null && (rWork.getReducer().getClass() == JoinOperator.class || http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index b2893e7..85d46f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -59,7 +59,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; -import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -80,8 +79,6 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import com.clearspring.analytics.util.Lists; - /** * Implementation of one of the rule-based map join optimization. User passes hints to specify * map-joins and during this optimization, all user specified map joins are converted to MapJoins - @@ -434,7 +431,8 @@ public class MapJoinProcessor extends Transform { smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getOutputColumnNames(), bigTablePos, smbJoinDesc.getConds(), - smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix()); + smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix(), + smbJoinDesc.getNoConditionalTaskSize()); mapJoinDesc.setStatistics(smbJoinDesc.getStatistics()); @@ -1187,7 +1185,7 @@ public class MapJoinProcessor extends Transform { MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs, valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op - .getConf().getNoOuterJoin(), dumpFilePrefix); + .getConf().getNoOuterJoin(), dumpFilePrefix, op.getConf().getNoConditionalTaskSize()); mapJoinDescriptor.setStatistics(op.getConf().getStatistics()); mapJoinDescriptor.setTagOrder(tagOrder); mapJoinDescriptor.setNullSafes(desc.getNullSafes()); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index a3a19f4..92225ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -210,7 +210,7 @@ public class Optimizer { if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && - !isTezExecEngine) { + !isTezExecEngine && !isSparkExecEngine) { transformations.add(new CorrelationOptimizer()); } if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java index 60a8604..b51af55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.stats.StatsUtils; import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.AUTOPARALLEL; import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; /** * SetReducerParallelism determines how many reducers should @@ -106,6 +107,7 @@ public class SetReducerParallelism implements NodeProcessor { } } else { LOG.info("Number of reducers determined to be: "+desc.getNumReducers()); + desc.setReducerTraits(EnumSet.of(FIXED)); // usually controlled by bucketing } return false; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java index 5b2c9c0..8c43774 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -31,10 +31,11 @@ public class CalciteSemanticException extends SemanticException { public enum UnsupportedFeature { Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, - Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal, + Having_clause_without_any_groupby, Invalid_column_reference, Invalid_decimal, Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions, Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, - Table_sample_clauses, UDTF, Union_type, Unique_join + Table_sample_clauses, UDTF, Union_type, Unique_join, + HighPrecissionTimestamp // CALCITE-1690 }; private UnsupportedFeature unsupportedFeature; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 6ccd879..e339d0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -48,6 +48,7 @@ import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexLocalRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexPatternFieldRef; import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; @@ -1074,6 +1075,11 @@ public class HiveCalciteUtil { // it seems that it is not used by anything. return false; } + + @Override + public Boolean visitPatternFieldRef(RexPatternFieldRef fieldRef) { + return false; + } } public static Set<Integer> getInputRefs(RexNode expr) { http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index 9a65de3..d0b1757 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.plan.Context; import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; @@ -27,11 +28,14 @@ import java.util.Set; public class HivePlannerContext implements Context { private HiveAlgorithmsConf algoConfig; private HiveRulesRegistry registry; + private CalciteConnectionConfig calciteConfig; private Set<RelNode> corrScalarRexSQWithAgg; - public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, Set<RelNode> corrScalarRexSQWithAgg) { + public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, + CalciteConnectionConfig calciteConfig, Set<RelNode> corrScalarRexSQWithAgg) { this.algoConfig = algoConfig; this.registry = registry; + this.calciteConfig = calciteConfig; // this is to keep track if a subquery is correlated and contains aggregate // this is computed in CalcitePlanner while planning and is later required by subuery remove rule // hence this is passed using HivePlannerContext @@ -45,6 +49,9 @@ public class HivePlannerContext implements Context { if (clazz.isInstance(registry)) { return clazz.cast(registry); } + if (clazz.isInstance(calciteConfig)) { + return clazz.cast(calciteConfig); + } if(clazz.isInstance(corrScalarRexSQWithAgg)) { return clazz.cast(corrScalarRexSQWithAgg); } http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java index 2aadf50..8e52d88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelShuttleImpl.java @@ -27,6 +27,7 @@ import org.apache.calcite.rel.logical.LogicalExchange; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalIntersect; import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMatch; import org.apache.calcite.rel.logical.LogicalMinus; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; @@ -140,6 +141,10 @@ public class HiveRelShuttleImpl implements HiveRelShuttle { public RelNode visit(RelNode other) { return visitChildren(other); } + + public RelNode visit(LogicalMatch match) { + return visitChildren(match); + } } // End RelShuttleImpl.java http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java index 4edc4df..0b94b8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java @@ -24,6 +24,7 @@ import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeTransforms; import com.google.common.collect.Sets; @@ -42,9 +43,10 @@ public class HiveExtractDate extends SqlFunction { Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND); private HiveExtractDate(String name) { - super(name, SqlKind.EXTRACT, ReturnTypes.INTEGER_NULLABLE, null, - OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, - SqlFunctionCategory.SYSTEM); + super(name, SqlKind.EXTRACT, + ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null, + OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, + SqlFunctionCategory.SYSTEM); } } http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidProjectFilterTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidProjectFilterTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidProjectFilterTransposeRule.java new file mode 100644 index 0000000..dd39056 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveDruidProjectFilterTransposeRule.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.rules.ProjectFilterTransposeRule; +import org.apache.calcite.rel.rules.PushProjector; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; + +/** + * Rule to push a {@link org.apache.calcite.rel.core.Project} + * past a {@link org.apache.calcite.rel.core.Filter} + * when {@code Filter} is on top of a {@link DruidQuery}. + * TODO: Replace this class with calcite DruidRules.DruidFilterProjectTransposeRule + * once we upgrade to calcite 1.13 + */ +public class HiveDruidProjectFilterTransposeRule + extends ProjectFilterTransposeRule { + + public static final HiveDruidProjectFilterTransposeRule INSTANCE = + new HiveDruidProjectFilterTransposeRule(); + + private HiveDruidProjectFilterTransposeRule() { + super( + operand(Project.class, + operand(Filter.class, operand(DruidQuery.class, none()))), + PushProjector.ExprCondition.FALSE, + HiveRelFactories.HIVE_BUILDER); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java index 4b8568e..a4da6db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java @@ -106,57 +106,6 @@ public abstract class HiveFilterJoinRule extends FilterJoinRule { } } - /* - * Any predicates pushed down to joinFilters that aren't equality conditions: - * put them back as aboveFilters because Hive doesn't support not equi join - * conditions. - */ - @Override - protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters, - Join join, JoinRelType joinType) { - if (joinType.equals(JoinRelType.INNER)) { - ListIterator<RexNode> filterIter = joinFilters.listIterator(); - while (filterIter.hasNext()) { - RexNode exp = filterIter.next(); - - if (exp instanceof RexCall) { - RexCall c = (RexCall) exp; - boolean validHiveJoinFilter = false; - - if ((c.getOperator().getKind() == SqlKind.EQUALS)) { - validHiveJoinFilter = true; - for (RexNode rn : c.getOperands()) { - // NOTE: Hive dis-allows projections from both left & right side - // of join condition. Example: Hive disallows - // (r1.x +r2.x)=(r1.y+r2.y) on join condition. - if (filterRefersToBothSidesOfJoin(rn, join)) { - validHiveJoinFilter = false; - break; - } - } - } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN) - || (c.getOperator().getKind() == SqlKind.GREATER_THAN) - || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL) - || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) { - validHiveJoinFilter = true; - // NOTE: Hive dis-allows projections from both left & right side of - // join in in equality condition. Example: Hive disallows (r1.x < - // r2.x) on join condition. - if (filterRefersToBothSidesOfJoin(c, join)) { - validHiveJoinFilter = false; - } - } - - if (validHiveJoinFilter) - continue; - } - - aboveFilters.add(exp); - filterIter.remove(); - } - } - } - private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) { boolean refersToBothSides = false; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java index 4cfe782..f3d7293 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java @@ -55,36 +55,89 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; + + +public abstract class HivePointLookupOptimizerRule extends RelOptRule { /** - * This optimization will take a Filter expression, and if its predicate contains + * This optimization will take a Filter or expression, and if its predicate contains * an OR operator whose children are constant equality expressions, it will try * to generate an IN clause (which is more efficient). If the OR operator contains * AND operator children, the optimization might generate an IN clause that uses * structs. */ -public class HivePointLookupOptimizerRule extends RelOptRule { + public static class FilterCondition extends HivePointLookupOptimizerRule { + public FilterCondition (int minNumORClauses) { + super(operand(Filter.class, any()), minNumORClauses); + } - protected static final Log LOG = LogFactory.getLog(HivePointLookupOptimizerRule.class); + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + analyzeCondition(call , rexBuilder, filter, condition); + } + @Override protected RelNode copyNode(AbstractRelNode node, RexNode newCondition) { + final Filter filter = (Filter) node; + return filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + } + } - // Minimum number of OR clauses needed to transform into IN clauses - private final int minNumORClauses; +/** + * This optimization will take a Join or expression, and if its join condition contains + * an OR operator whose children are constant equality expressions, it will try + * to generate an IN clause (which is more efficient). If the OR operator contains + * AND operator children, the optimization might generate an IN clause that uses + * structs. + */ + public static class JoinCondition extends HivePointLookupOptimizerRule { + public JoinCondition (int minNumORClauses) { + super(operand(Join.class, any()), minNumORClauses); + } + + public void onMatch(RelOptRuleCall call) { + final Join join = call.rel(0); + final RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + final RexNode condition = RexUtil.pullFactors(rexBuilder, join.getCondition()); + analyzeCondition(call , rexBuilder, join, condition); + } - public HivePointLookupOptimizerRule(int minNumORClauses) { - super(operand(Filter.class, any())); - this.minNumORClauses = minNumORClauses; + @Override protected RelNode copyNode(AbstractRelNode node, RexNode newCondition) { + final Join join = (Join) node; + return join.copy(join.getTraitSet(), + newCondition, + join.getLeft(), + join.getRight(), + join.getJoinType(), + join.isSemiJoinDone()); + } } - public void onMatch(RelOptRuleCall call) { - final Filter filter = call.rel(0); + protected static final Log LOG = LogFactory.getLog(HivePointLookupOptimizerRule.class); - final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + // Minimum number of OR clauses needed to transform into IN clauses + protected final int minNumORClauses; + + protected abstract RelNode copyNode(AbstractRelNode node, RexNode newCondition); + + protected HivePointLookupOptimizerRule( + RelOptRuleOperand operand, int minNumORClauses) { + super(operand); + this.minNumORClauses = minNumORClauses; + } - final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + public void analyzeCondition(RelOptRuleCall call, + RexBuilder rexBuilder, + AbstractRelNode node, + RexNode condition) { // 1. We try to transform possible candidates - RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, + RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, node, minNumORClauses); RexNode newCondition = transformIntoInClause.apply(condition); @@ -97,10 +150,10 @@ public class HivePointLookupOptimizerRule extends RelOptRule { return; } - // 4. We create the filter with the new condition - RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + // 4. We create the Filter/Join with the new condition + RelNode newNode = copyNode(node, newCondition); - call.transformTo(newFilter); + call.transformTo(newNode); } @@ -109,11 +162,11 @@ public class HivePointLookupOptimizerRule extends RelOptRule { */ protected static class RexTransformIntoInClause extends RexShuttle { private final RexBuilder rexBuilder; - private final Filter filterOp; + private final AbstractRelNode nodeOp; private final int minNumORClauses; - RexTransformIntoInClause(RexBuilder rexBuilder, Filter filterOp, int minNumORClauses) { - this.filterOp = filterOp; + RexTransformIntoInClause(RexBuilder rexBuilder, AbstractRelNode nodeOp, int minNumORClauses) { + this.nodeOp = nodeOp; this.rexBuilder = rexBuilder; this.minNumORClauses = minNumORClauses; } @@ -129,7 +182,7 @@ public class HivePointLookupOptimizerRule extends RelOptRule { if (operand.getKind() == SqlKind.OR) { try { newOperand = transformIntoInClauseCondition(rexBuilder, - filterOp.getRowType(), operand, minNumORClauses); + nodeOp.getRowType(), operand, minNumORClauses); if (newOperand == null) { newOperand = operand; } @@ -147,7 +200,7 @@ public class HivePointLookupOptimizerRule extends RelOptRule { case OR: try { node = transformIntoInClauseCondition(rexBuilder, - filterOp.getRowType(), call, minNumORClauses); + nodeOp.getRowType(), call, minNumORClauses); if (node == null) { return call; } http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java index fd19d99..1487ed4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptRuleOperand; @@ -25,7 +26,12 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCallBinding; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.validate.SqlMonotonicity; import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -59,16 +65,28 @@ public class HiveProjectSortTransposeRule extends RelOptRule { public void onMatch(RelOptRuleCall call) { final HiveProject project = call.rel(0); final HiveSortLimit sort = call.rel(1); + final RelOptCluster cluster = project.getCluster(); // Determine mapping between project input and output fields. If sort // relies on non-trivial expressions, we can't push. final Mappings.TargetMapping map = - RelOptUtil.permutation( + RelOptUtil.permutationIgnoreCast( project.getProjects(), project.getInput().getRowType()).inverse(); for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { if (map.getTarget(fc.getFieldIndex()) < 0) { return; } + final RexNode node = project.getProjects().get(map.getTarget(fc.getFieldIndex())); + if (node.isA(SqlKind.CAST)) { + // Check whether it is a monotonic preserving cast, otherwise we cannot push + final RexCall cast = (RexCall) node; + final RexCallBinding binding = + RexCallBinding.create(cluster.getTypeFactory(), cast, + ImmutableList.of(RexUtil.apply(map, sort.getCollation()))); + if (cast.getOperator().getMonotonicity(binding) == SqlMonotonicity.NOT_MONOTONIC) { + return; + } + } } // Create new collation http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java index fe29850..d1be4bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java @@ -17,23 +17,13 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; -import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelOptRuleOperand; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollationTraitDef; -import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.util.mapping.Mappings; +import org.apache.calcite.rel.rules.SortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; -import com.google.common.collect.ImmutableList; - -public class HiveSortProjectTransposeRule extends RelOptRule { +public class HiveSortProjectTransposeRule extends SortProjectTransposeRule { public static final HiveSortProjectTransposeRule INSTANCE = new HiveSortProjectTransposeRule(); @@ -50,10 +40,6 @@ public class HiveSortProjectTransposeRule extends RelOptRule { operand(HiveProject.class, any()))); } - protected HiveSortProjectTransposeRule(RelOptRuleOperand operand) { - super(operand); - } - //~ Methods ---------------------------------------------------------------- @Override @@ -68,34 +54,4 @@ public class HiveSortProjectTransposeRule extends RelOptRule { return true; } - // implement RelOptRule - public void onMatch(RelOptRuleCall call) { - final HiveSortLimit sort = call.rel(0); - final HiveProject project = call.rel(1); - - // Determine mapping between project input and output fields. If sort - // relies on non-trivial expressions, we can't push. - final Mappings.TargetMapping map = - RelOptUtil.permutation( - project.getProjects(), project.getInput().getRowType()); - for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { - if (map.getTargetOpt(fc.getFieldIndex()) < 0) { - return; - } - } - - // Create new collation - final RelCollation newCollation = - RelCollationTraitDef.INSTANCE.canonize( - RexUtil.apply(map, sort.getCollation())); - - // New operators - final HiveSortLimit newSort = sort.copy(sort.getTraitSet().replace(newCollation), - project.getInput(), newCollation, sort.offset, sort.fetch); - final RelNode newProject = project.copy(sort.getTraitSet(), - ImmutableList.<RelNode>of(newSort)); - - call.transformTo(newProject); - } - } http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 76e0780..c692cc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rex.LogicVisitor; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexSubQuery; @@ -43,10 +44,12 @@ import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; +import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -72,49 +75,26 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; */ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ - public static final HiveSubQueryRemoveRule PROJECT = - new HiveSubQueryRemoveRule( - operand(Project.class, null, RexUtil.SubQueryFinder.PROJECT_PREDICATE, - any()), - HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Project") { - public void onMatch(RelOptRuleCall call) { - final Project project = call.rel(0); - //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released - final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null); - final RexSubQuery e = - RexUtil.SubQueryFinder.find(project.getProjects()); - assert e != null; - final RelOptUtil.Logic logic = - LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, - project.getProjects(), e); - builder.push(project.getInput()); - final int fieldCount = builder.peek().getRowType().getFieldCount(); - final RexNode target = apply(e, HiveFilter.getVariablesSet(e), - logic, builder, 1, fieldCount, false); - final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); - builder.project(shuttle.apply(project.getProjects()), - project.getRowType().getFieldNames()); - call.transformTo(builder.build()); - } - }; - - public static final HiveSubQueryRemoveRule FILTER = - new HiveSubQueryRemoveRule( - operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE, - any()), - HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter") { - public void onMatch(RelOptRuleCall call) { + public static final HiveSubQueryRemoveRule REL_NODE = + new HiveSubQueryRemoveRule( + operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE, + any()), + HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter") { + public void onMatch(RelOptRuleCall call) { + final RelNode relNode = call.rel(0); + //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released + final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null); + + // if subquery is in FILTER + if(relNode instanceof Filter) { final Filter filter = call.rel(0); - //final RelBuilder builder = call.builder(); - //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released - final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null); final RexSubQuery e = - RexUtil.SubQueryFinder.find(filter.getCondition()); + RexUtil.SubQueryFinder.find(filter.getCondition()); assert e != null; final RelOptUtil.Logic logic = - LogicVisitor.find(RelOptUtil.Logic.TRUE, - ImmutableList.of(filter.getCondition()), e); + LogicVisitor.find(RelOptUtil.Logic.TRUE, + ImmutableList.of(filter.getCondition()), e); builder.push(filter.getInput()); final int fieldCount = builder.peek().getRowType().getFieldCount(); @@ -123,13 +103,37 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ boolean isCorrScalarQuery = corrScalarQueries.contains(e.rel); final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic, - builder, 1, fieldCount, isCorrScalarQuery); + builder, 1, fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); builder.filter(shuttle.apply(filter.getCondition())); builder.project(fields(builder, filter.getRowType().getFieldCount())); call.transformTo(builder.build()); } - }; + // if subquery is in PROJECT + else if(relNode instanceof Project) { + final Project project = call.rel(0); + final RexSubQuery e = + RexUtil.SubQueryFinder.find(project.getProjects()); + assert e != null; + + final RelOptUtil.Logic logic = + LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, + project.getProjects(), e); + builder.push(project.getInput()); + final int fieldCount = builder.peek().getRowType().getFieldCount(); + + Set<RelNode> corrScalarQueries = project.getCluster().getPlanner().getContext().unwrap(Set.class); + boolean isCorrScalarQuery = corrScalarQueries.contains(e.rel); + + final RexNode target = apply(e, HiveFilter.getVariablesSet(e), + logic, builder, 1, fieldCount, isCorrScalarQuery); + final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); + builder.project(shuttle.apply(project.getProjects()), + project.getRowType().getFieldNames()); + call.transformTo(builder.build()); + } + } + }; private HiveSubQueryRemoveRule(RelOptRuleOperand operand, RelBuilderFactory relBuilderFactory, @@ -164,6 +168,25 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: + builder.push(e.rel); + // returns single row/column + builder.aggregate(builder.groupKey(), + builder.count(false, "cnt")); + + SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); + + // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer + // ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + builder.call(countCheck, builder.field("cnt")), + builder.literal(1))); + if( !variablesSet.isEmpty()) + { + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } + else + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); if(isCorrScalarAgg) { // Transformation : // Outer Query Left Join (inner query) on correlated predicate and preserve rows only from left side. @@ -181,7 +204,9 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); RexNode literal; if(isAggZeroOnEmpty(e)) { - literal = builder.literal(0); + // since count has a return type of BIG INT we need to make a literal of type big int + // relbuilder's literal doesn't allow this + literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); } else { literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); @@ -193,26 +218,7 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ //Transformation is to left join for correlated predicates and inner join otherwise, // but do a count on inner side before that to make sure it generates atmost 1 row. - builder.push(e.rel); - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt")); - - SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); - // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - builder.call(countCheck, builder.field("cnt")), - builder.literal(1))); - - if( !variablesSet.isEmpty()) - { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } - else - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); builder.push(e.rel); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); offset++; @@ -455,6 +461,72 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery; } } + + // TODO: + // Following HiveSubQueryFinder has been copied from RexUtil::SubQueryFinder + // since there is BUG in there (CALCITE-1726). + // Once CALCITE-1726 is fixed we should get rid of the following code + /** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if + * applied to an expression that contains a {@link RexSubQuery}. */ + public static class HiveSubQueryFinder extends RexVisitorImpl<Void> { + public static final HiveSubQueryFinder INSTANCE = new HiveSubQueryFinder(); + + /** Returns whether a {@link Project} contains a sub-query. */ + public static final Predicate<RelNode> RELNODE_PREDICATE= + new Predicate<RelNode>() { + public boolean apply(RelNode relNode) { + if (relNode instanceof Project) { + Project project = (Project)relNode; + for (RexNode node : project.getProjects()) { + try { + node.accept(INSTANCE); + } catch (Util.FoundOne e) { + return true; + } + } + return false; + } + else if (relNode instanceof Filter) { + try { + ((Filter)relNode).getCondition().accept(INSTANCE); + return false; + } catch (Util.FoundOne e) { + return true; + } + } + return false; + } + }; + + private HiveSubQueryFinder() { + super(true); + } + + @Override public Void visitSubQuery(RexSubQuery subQuery) { + throw new Util.FoundOne(subQuery); + } + + public static RexSubQuery find(Iterable<RexNode> nodes) { + for (RexNode node : nodes) { + try { + node.accept(INSTANCE); + } catch (Util.FoundOne e) { + return (RexSubQuery) e.getNode(); + } + } + return null; + } + + public static RexSubQuery find(RexNode node) { + try { + node.accept(INSTANCE); + return null; + } catch (Util.FoundOne e) { + return (RexSubQuery) e.getNode(); + } + } + } + } // End SubQueryRemoveRule.java http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java index 38d7906..81de33f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewFilterScanRule.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.List; import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptMaterializations; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; @@ -77,7 +78,7 @@ public class HiveMaterializedViewFilterScanRule extends RelOptRule { // Costing is done in transformTo(), so we call it repeatedly with all applicable // materialized views and cheapest one will be picked List<RelOptMaterialization> applicableMaterializations = - VolcanoPlanner.getApplicableMaterializations(root, materializations); + RelOptMaterializations.getApplicableMaterializations(root, materializations); for (RelOptMaterialization materialization : applicableMaterializations) { List<RelNode> subs = new MaterializedViewSubstitutionVisitor( materialization.queryRel, root, relBuilderFactory).go(materialization.tableRel); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index 6f26d7d..a25b58b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; @@ -301,4 +302,15 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> { return op; } + + public Double visitLiteral(RexLiteral literal) { + if (literal.isAlwaysFalse()) { + return 0.0; + } else if (literal.isAlwaysTrue()) { + return 1.0; + } else { + assert false; + } + return null; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 69e157e..9bcdd0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -165,7 +165,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred rexBuilder.makeInputRef(project, expr.i), expr.e)); } } - return RelOptPredicateList.of(projectPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, projectPullUpPredicates); } /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ @@ -202,6 +202,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred final RelNode input = agg.getInput(); final RelOptPredicateList inputInfo = mq.getPulledUpPredicates(input); final List<RexNode> aggPullUpPredicates = new ArrayList<>(); + final RexBuilder rexBuilder = agg.getCluster().getRexBuilder(); ImmutableBitSet groupKeys = agg.getGroupSet(); Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, @@ -219,7 +220,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred aggPullUpPredicates.add(r); } } - return RelOptPredicateList.of(aggPullUpPredicates); + return RelOptPredicateList.of(rexBuilder, aggPullUpPredicates); } /** @@ -271,7 +272,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred if (!disjPred.isAlwaysTrue()) { preds.add(disjPred); } - return RelOptPredicateList.of(preds); + return RelOptPredicateList.of(rB, preds); } /** @@ -411,6 +412,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred final JoinRelType joinType = joinRel.getJoinType(); final List<RexNode> leftPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(leftChildPredicates)); final List<RexNode> rightPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(rightChildPredicates)); + final RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); switch (joinType) { case INNER: case LEFT: @@ -476,13 +478,13 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates); } - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, pulledUpPredicates, leftInferredPredicates, rightInferredPredicates); case LEFT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, leftPreds, EMPTY_LIST, rightInferredPredicates); case RIGHT: - return RelOptPredicateList.of( + return RelOptPredicateList.of(rexBuilder, rightPreds, leftInferredPredicates, EMPTY_LIST); default: assert inferredPredicates.size() == 0; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 0dc0c24..a43d2be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; public class ASTBuilder { @@ -269,19 +271,23 @@ public class ASTBuilder { type = ((Boolean) val).booleanValue() ? HiveParser.KW_TRUE : HiveParser.KW_FALSE; break; case DATE: { - val = literal.getValue(); + //Calcite Calendar is always GMT, Hive atm uses JVM local + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_DATELITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.toDateTime(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; case TIME: case TIMESTAMP: { - val = literal.getValue(); + //Calcite Calendar is always GMT, Hive atm uses JVM local + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); type = HiveParser.TOK_TIMESTAMPLITERAL; DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - val = df.format(((Calendar) val).getTime()); + val = df.format(dt.toDateTime(DateTimeZone.getDefault()).toDate()); val = "'" + val + "'"; } break; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 27990a2..165f8c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.Map; import org.apache.calcite.adapter.druid.DruidQuery; -import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelVisitor; http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index e840938..b1efbbd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; import java.math.BigDecimal; -import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; @@ -75,6 +74,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -291,16 +292,17 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { case DOUBLE: return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: + case DATE: { + final Calendar c = (Calendar) literal.getValue(); return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); + new java.sql.Date(c.getTimeInMillis())); + } case TIME: case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); - } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + final Calendar c = (Calendar) literal.getValue(); + final DateTime dt = new DateTime(c.getTimeInMillis(), DateTimeZone.forTimeZone(c.getTimeZone())); + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + new Timestamp(dt.getMillis())); } case BINARY: return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index 73a9b0f..b9b600d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; +import org.apache.hadoop.hive.ql.parse.*; + import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -72,19 +74,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; -import org.apache.hadoop.hive.ql.parse.JoinCond; -import org.apache.hadoop.hive.ql.parse.JoinType; -import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; -import org.apache.hadoop.hive.ql.parse.PTFTranslator; -import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.UnparseTranslator; -import org.apache.hadoop.hive.ql.parse.WindowingComponentizer; -import org.apache.hadoop.hive.ql.parse.WindowingSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -348,6 +339,7 @@ public class HiveOpConverter { // through Hive String[] baseSrc = new String[joinRel.getInputs().size()]; String tabAlias = getHiveDerivedTableAlias(); + // 1. Convert inputs OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()]; List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size()); @@ -726,7 +718,7 @@ public class HiveOpConverter { List<String> keepColNames) throws SemanticException { // 1. Generate RS operator // 1.1 Prune the tableNames, only count the tableNames that are not empty strings - // as empty string in table aliases is only allowed for virtual columns. + // as empty string in table aliases is only allowed for virtual columns. String tableAlias = null; Set<String> tableNames = input.getSchema().getTableNames(); for (String tableName : tableNames) { @@ -885,7 +877,8 @@ public class HiveOpConverter { private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, - String[] baseSrc, String tabAlias) throws SemanticException { + String[] baseSrc, String tabAlias) + throws SemanticException { // 1. Extract join type JoinCondDesc[] joinCondns; @@ -1010,7 +1003,7 @@ public class HiveOpConverter { // 4. We create the join operator with its descriptor JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, - filters, joinExpressions); + filters, joinExpressions, 0); desc.setReversedExprs(reversedExprs); desc.setFilterMap(filterMap); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index a05b89c..52ca3b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -23,11 +23,12 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; -import java.util.GregorianCalendar; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; +import org.apache.calcite.avatica.util.DateTimeUtils; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -38,8 +39,8 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -76,8 +77,10 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; @@ -96,6 +99,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; @@ -248,6 +253,8 @@ public class RexNodeConverter { boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase; boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0; + boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween; + boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn; if (isNumeric) { tgtDT = func.getTypeInfo(); @@ -266,15 +273,33 @@ public class RexNodeConverter { } else if (isTransformableTimeStamp) { // unix_timestamp(args) -> to_unix_timestamp(args) func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren()); + } else if (isBetween) { + assert func.getChildren().size() == 4; + // We skip first child as is not involved (is the revert boolean) + // The target type needs to account for all 3 operands + tgtDT = FunctionRegistry.getCommonClassForComparison( + func.getChildren().get(1).getTypeInfo(), + FunctionRegistry.getCommonClassForComparison( + func.getChildren().get(2).getTypeInfo(), + func.getChildren().get(3).getTypeInfo())); + } else if (isIN) { + // We're only considering the first element of the IN list for the type + assert func.getChildren().size() > 1; + tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) + .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } - for (ExprNodeDesc childExpr : func.getChildren()) { + for (int i =0; i < func.getChildren().size(); ++i) { + ExprNodeDesc childExpr = func.getChildren().get(i); tmpExprNode = childExpr; if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { - if (isCompare) { + if (isCompare || isBetween || isIN) { // For compare, we will convert requisite children - tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); + // For BETWEEN skip the first child (the revert boolean) + if (!isBetween || i > 0) { + tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); + } } else if (isNumeric) { // For numeric, we'll do minimum necessary cast - if we cast to the type // of expression, bad things will happen. @@ -634,20 +659,40 @@ public class RexNodeConverter { calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case DATE: - Calendar cal = new GregorianCalendar(); - cal.setTime((Date) value); - calciteLiteral = rexBuilder.makeDateLiteral(cal); - break; - case TIMESTAMP: - Calendar c = null; - if (value instanceof Calendar) { - c = (Calendar)value; - } else { - c = Calendar.getInstance(); - c.setTimeInMillis(((Timestamp)value).getTime()); - } - calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED); - break; + // The Calcite literal is in GMT, this will be converted back to JVM locale + // by ASTBuilder.literal during Calcite->Hive plan conversion + final Calendar cal = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + cal.setTime((Date) value); + calciteLiteral = rexBuilder.makeDateLiteral(cal); + break; + case TIMESTAMP: + // The Calcite literal is in GMT, this will be converted back to JVM locale + // by ASTBuilder.literal during Calcite->Hive plan conversion + final Calendar calt = Calendar.getInstance(DateTimeUtils.GMT_ZONE, Locale.getDefault()); + if (value instanceof Calendar) { + final Calendar c = (Calendar) value; + long timeMs = c.getTimeInMillis(); + calt.setTimeInMillis(timeMs); + } else { + final Timestamp ts = (Timestamp) value; + // CALCITE-1690 + // Calcite cannot represent TIMESTAMP literals with precision higher than 3 + if (ts.getNanos() % 1000000 != 0) { + throw new CalciteSemanticException( + "High Precision Timestamp: " + String.valueOf(ts), + UnsupportedFeature.HighPrecissionTimestamp); + } + calt.setTimeInMillis(ts.getTime()); + } + // Must call makeLiteral, not makeTimestampLiteral + // to have the RexBuilder.roundTime logic kick in + calciteLiteral = rexBuilder.makeLiteral( + calt, + rexBuilder.getTypeFactory().createSqlType( + SqlTypeName.TIMESTAMP, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), + false); + break; case INTERVAL_YEAR_MONTH: // Calcite year-month literal value is months as BigDecimal BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths()); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 85450c9..10f5eb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -352,6 +352,7 @@ public class SqlFunctionConverter { registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); registerFunction("isnull", SqlStdOperatorTable.IS_NULL, hToken(HiveParser.TOK_ISNULL, "TOK_ISNULL")); + registerFunction("is not distinct from", SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, hToken(HiveParser.EQUAL_NS, "<=>")); registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); // timebased http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 2b075be..701bde4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -194,7 +194,7 @@ public class ReduceSinkDeDuplication extends Transform { ReduceSinkDesc cRSc = cRS.getConf(); for (ReduceSinkOperator pRSNs : pRSs) { ReduceSinkDesc pRSNc = pRSNs.getConf(); - if (cRSc.getKeyCols().size() < pRSNc.getKeyCols().size()) { + if (cRSc.getKeyCols().size() != pRSNc.getKeyCols().size()) { return false; } if (cRSc.getPartitionCols().size() != pRSNc.getPartitionCols().size()) { http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPrunerUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPrunerUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPrunerUtils.java index 4d3e74e..88b8119 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPrunerUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPrunerUtils.java @@ -37,10 +37,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; public final class ListBucketingPrunerUtils { /* Default list bucketing directory name. internal use only not for client. */ - public static String HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME = + public static final String HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME = "HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME"; /* Default list bucketing directory key. internal use only not for client. */ - public static String HIVE_LIST_BUCKETING_DEFAULT_KEY = "HIVE_DEFAULT_LIST_BUCKETING_KEY"; + public static final String HIVE_LIST_BUCKETING_DEFAULT_KEY = "HIVE_DEFAULT_LIST_BUCKETING_KEY"; /** * Decide if pruner skips the skewed directory http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java index 6d0ee92..2143718 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java @@ -281,7 +281,8 @@ public final class GenMRSkewJoinProcessor { MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor .getOutputColumnNames(), i, joinDescriptor.getConds(), - joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix); + joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, + joinDescriptor.getNoConditionalTaskSize()); mapJoinDescriptor.setTagOrder(tags); mapJoinDescriptor.setHandleSkewJoin(false); mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes()); @@ -383,11 +384,11 @@ public final class GenMRSkewJoinProcessor { return true; } - private static String skewJoinPrefix = "hive_skew_join"; - private static String UNDERLINE = "_"; - private static String BIGKEYS = "bigkeys"; - private static String SMALLKEYS = "smallkeys"; - private static String RESULTS = "results"; + private static final String skewJoinPrefix = "hive_skew_join"; + private static final String UNDERLINE = "_"; + private static final String BIGKEYS = "bigkeys"; + private static final String SMALLKEYS = "smallkeys"; + private static final String RESULTS = "results"; static Path getBigKeysDir(Path baseDir, Byte srcTbl) { return StringInternUtils.internUriStringsInPath( http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java index 38bb847..b705f5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java @@ -240,7 +240,8 @@ public class GenSparkSkewJoinProcessor { MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor .getOutputColumnNames(), i, joinDescriptor.getConds(), - joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix); + joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, + joinDescriptor.getNoConditionalTaskSize()); mapJoinDescriptor.setTagOrder(tags); mapJoinDescriptor.setHandleSkewJoin(false); mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes()); http://git-wip-us.apache.org/repos/asf/hive/blob/187eb760/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java new file mode 100644 index 0000000..a5ed308 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.util.concurrent.ExecutionException; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.Callable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.llap.registry.ServiceInstance; +import org.apache.hadoop.hive.llap.registry.ServiceInstanceSet; +import org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance; +import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LlapClusterStateForCompile { + protected static final Logger LOG = LoggerFactory.getLogger(LlapClusterStateForCompile.class); + + private static final long CLUSTER_UPDATE_INTERVAL_NS = 120 * 1000000000L; // 2 minutes. + private Long lastClusterUpdateNs; + private Integer noConfigNodeCount, executorCount; + private int numExecutorsPerNode = -1; + private LlapRegistryService svc; + private final Configuration conf; + + // It's difficult to impossible to pass global things to compilation, so we have a static cache. + private static final Cache<String, LlapClusterStateForCompile> CACHE = + CacheBuilder.newBuilder().initialCapacity(10).maximumSize(100).build(); + + public static LlapClusterStateForCompile getClusterInfo(final Configuration conf) { + final String nodes = HiveConf.getTrimmedVar(conf, HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS); + final String userName = HiveConf.getVar( + conf, ConfVars.LLAP_ZK_REGISTRY_USER, LlapRegistryService.currentUser()); + Callable<LlapClusterStateForCompile> generator = new Callable<LlapClusterStateForCompile>() { + @Override + public LlapClusterStateForCompile call() throws Exception { + LOG.info("Creating cluster info for " + userName + ":" + nodes); + return new LlapClusterStateForCompile(conf); + } + }; + try { + return CACHE.get(userName + ":" + nodes, generator); + } catch (ExecutionException e) { + throw new RuntimeException(e); // Should never happen... ctor is just assignments. + } + } + + private LlapClusterStateForCompile(Configuration conf) { + this.conf = conf; + } + + public boolean hasClusterInfo() { + return lastClusterUpdateNs != null; + } + + public int getKnownExecutorCount() { + return executorCount; + } + + public int getNodeCountWithUnknownExecutors() { + return noConfigNodeCount; + } + + public int getNumExecutorsPerNode() { + return numExecutorsPerNode; + } + + public synchronized void initClusterInfo() { + if (lastClusterUpdateNs != null) { + long elapsed = System.nanoTime() - lastClusterUpdateNs; + if (elapsed < CLUSTER_UPDATE_INTERVAL_NS) return; + } + if (svc == null) { + try { + svc = LlapRegistryService.getClient(conf); + } catch (Throwable t) { + LOG.info("Cannot create the client; ignoring", t); + return; // Don't fail; this is best-effort. + } + } + ServiceInstanceSet instances; + try { + instances = svc.getInstances(10); + } catch (IOException e) { + LOG.info("Cannot update cluster information; ignoring", e); + return; // Don't wait for the cluster if not started; this is best-effort. + } + int executorsLocal = 0, noConfigNodesLocal = 0; + for (ServiceInstance si : instances.getAll()) { + if (si instanceof InactiveServiceInstance) continue; // Shouldn't happen in getAll. + Map<String, String> props = si.getProperties(); + if (props == null) { + ++noConfigNodesLocal; + continue; + } + try { + int numExecutors = Integer.parseInt(props.get(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname)); + executorsLocal += numExecutors; + if (numExecutorsPerNode == -1) { + numExecutorsPerNode = numExecutors; + } + } catch (NumberFormatException e) { + ++noConfigNodesLocal; + } + } + lastClusterUpdateNs = System.nanoTime(); + noConfigNodeCount = noConfigNodesLocal; + executorCount = executorsLocal; + } +}