HIVE-11461 : Transform flat AND/OR into IN struct clause (Jesus Camacho Rodriguez, Ashutosh Chauhan via Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/139101d6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/139101d6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/139101d6 Branch: refs/heads/hbase-metastore Commit: 139101d6cf3be23d9ec8a88a9a75b5969434607b Parents: 5b67f35 Author: Ashutosh Chauhan <hashut...@apache.org> Authored: Thu Aug 13 09:16:28 2015 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Thu Aug 13 09:16:28 2015 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../hadoop/hive/ql/lib/PreOrderOnceWalker.java | 44 +++ .../hadoop/hive/ql/optimizer/Optimizer.java | 6 + .../hive/ql/optimizer/PointLookupOptimizer.java | 280 +++++++++++++++++++ .../ql/optimizer/pcr/PcrExprProcFactory.java | 3 +- .../hive/ql/optimizer/ppr/OpProcFactory.java | 3 +- .../apache/hadoop/hive/ql/plan/FilterDesc.java | 14 +- .../annotate_stats_deep_filters.q | 3 +- .../alter_partition_coltype.q.out | 12 +- .../clientpositive/annotate_stats_filter.q.out | 8 +- .../results/clientpositive/flatten_and_or.q.out | 8 +- ql/src/test/results/clientpositive/pcr.q.out | 12 +- .../results/clientpositive/ppd_transform.q.out | 12 +- .../test/results/clientpositive/spark/pcr.q.out | 12 +- .../clientpositive/spark/ppd_transform.q.out | 12 +- .../clientpositive/spark/vectorized_case.q.out | 2 +- .../clientpositive/tez/explainuser_1.q.out | 2 +- .../clientpositive/tez/vectorized_case.q.out | 2 +- .../clientpositive/vectorized_case.q.out | 9 +- 19 files changed, 397 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d1cb5fb..11b9f78 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1186,6 +1186,8 @@ public class HiveConf extends Configuration { "Whether to transitively replicate predicate filters over equijoin conditions."), HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true, "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."), + HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, + "Whether to transform OR clauses in Filter operators into IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java new file mode 100644 index 0000000..d891fc2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java @@ -0,0 +1,44 @@ +package org.apache.hadoop.hive.ql.lib; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * Graph walker this class takes list of starting nodes and walks them in pre-order. + * If a rule fires up against a given node, we do not try to apply the rule + * on its children. + */ +public class PreOrderOnceWalker extends PreOrderWalker { + + public PreOrderOnceWalker(Dispatcher disp) { + super(disp); + } + + /** + * Walk the current operator and its descendants. + * + * @param nd + * current operator in the graph + * @throws SemanticException + */ + @Override + public void walk(Node nd) throws SemanticException { + opStack.push(nd); + dispatch(nd, opStack); + + // The rule has been applied, we bail out + if (retMap.get(nd) != null) { + opStack.pop(); + return; + } + + // move all the children to the front of queue + if (nd.getChildren() != null) { + for (Node n : nd.getChildren()) { + walk(n); + } + } + + opStack.pop(); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index c4e11b9..14f362f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -81,6 +81,12 @@ public class Optimizer { // are combined and may become eligible for reduction (like is not null filter). transformations.add(new ConstantPropagate()); } + + // Try to transform OR predicates in Filter into IN clauses. + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + transformations.add(new PointLookupOptimizer()); + } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { transformations.add(new PartitionPruner()); transformations.add(new PartitionConditionRemover()); http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java new file mode 100644 index 0000000..6a8acec --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.calcite.util.Pair; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ListMultimap; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an OR operator whose children are constant equality expressions, it will try + * to generate an IN clause (which is more efficient). If the OR operator contains + * AND operator children, the optimization might generate an IN clause that uses + * structs. + */ +public class PointLookupOptimizer implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new FilterTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class FilterTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate + ExprNodeDesc newPredicate = generateInClause(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + filterOp.getConf().setOrigPredicate(predicate); + filterOp.getConf().setPredicate(newPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClause(ExprNodeDesc predicate) throws SemanticException { + Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new OrExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List<Node> startNodes = new ArrayList<Node>(); + startNodes.add(predicate); + + HashMap<Node, Object> outputMap = new HashMap<Node, Object>(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + private class OrExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + + // 1. If it is not an OR operator, we bail out. + if (!FunctionRegistry.isOpOr(fd)) { + return null; + } + + // 2. It is an OR operator + List<ExprNodeDesc> children = fd.getChildren(); + ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap = + ArrayListMultimap.create(); + boolean modeAnd = false; + for (int i = 0; i < children.size(); i++) { + ExprNodeDesc child = children.get(i); + + // - If the child is an AND operator, extract its children + // - Otherwise, take the child itself + final List<ExprNodeDesc> conjunctions; + if (FunctionRegistry.isOpAnd(child)) { + // If it is the first child, we set the mode variable value + // Otherwise, if the mode we are working on is different, we + // bail out + if (i == 0) { + modeAnd = true; + } else { + if (!modeAnd) { + return null; + } + } + + // Multiple children + conjunctions = child.getChildren(); + } else { + // If it is the first child, we set the mode variable value + // Otherwise, if the mode we are working on is different, we + // bail out + if (i == 0) { + modeAnd = false; + } else { + if (modeAnd) { + return null; + } + } + + // One child + conjunctions = new ArrayList<ExprNodeDesc>(1); + conjunctions.add(child); + } + + // 3. We will extract the literals to introduce in the IN clause. + // If the patterns OR-AND-EqOp or OR-EqOp are not matched, we bail out + for (ExprNodeDesc conjunction: conjunctions) { + if (!(conjunction instanceof ExprNodeGenericFuncDesc)) { + return null; + } + + ExprNodeGenericFuncDesc conjCall = (ExprNodeGenericFuncDesc) conjunction; + Class<? extends GenericUDF> genericUdfClass = conjCall.getGenericUDF().getClass(); + if(GenericUDFOPEqual.class == genericUdfClass) { + if (conjCall.getChildren().get(0) instanceof ExprNodeColumnDesc && + conjCall.getChildren().get(1) instanceof ExprNodeConstantDesc) { + ExprNodeColumnDesc ref = (ExprNodeColumnDesc) conjCall.getChildren().get(0); + String refString = ref.toString(); + columnConstantsMap.put(refString, + new Pair<ExprNodeColumnDesc,ExprNodeConstantDesc>( + ref, (ExprNodeConstantDesc) conjCall.getChildren().get(1))); + if (columnConstantsMap.get(refString).size() != i+1) { + // If we have not added to this column desc before, we bail out + return null; + } + } else if (conjCall.getChildren().get(1) instanceof ExprNodeColumnDesc && + conjCall.getChildren().get(0) instanceof ExprNodeConstantDesc) { + ExprNodeColumnDesc ref = (ExprNodeColumnDesc) conjCall.getChildren().get(1); + String refString = ref.toString(); + columnConstantsMap.put(refString, + new Pair<ExprNodeColumnDesc,ExprNodeConstantDesc>( + ref, (ExprNodeConstantDesc) conjCall.getChildren().get(0))); + if (columnConstantsMap.get(refString).size() != i+1) { + // If we have not added to this column desc before, we bail out + return null; + } + } else { + // We bail out + return null; + } + } else { + // We bail out + return null; + } + } + } + + // 4. We build the new predicate and return it + ExprNodeDesc newPredicate = null; + List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>(children.size()); + // 4.1 Create structs + List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>(); + List<String> names = new ArrayList<String>(); + List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(); + for (int i = 0; i < children.size(); i++) { + List<ExprNodeDesc> constantFields = new ArrayList<ExprNodeDesc>(children.size()); + + for (String keyString : columnConstantsMap.keySet()) { + Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = + columnConstantsMap.get(keyString).get(i); + if (i == 0) { + columns.add(columnConstant.left); + names.add(columnConstant.left.getColumn()); + typeInfos.add(columnConstant.left.getTypeInfo()); + } + constantFields.add(columnConstant.right); + } + + if (i == 0) { + ExprNodeDesc columnsRefs; + if (columns.size() == 1) { + columnsRefs = columns.get(0); + } else { + columnsRefs = new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(names, typeInfos), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + columns); + } + newChildren.add(columnsRefs); + } + ExprNodeDesc values; + if (constantFields.size() == 1) { + values = constantFields.get(0); + } else { + values = new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(names, typeInfos), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + constantFields); + } + newChildren.add(values); + } + newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); + + return newPredicate; + } + + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 71a6c73..825938a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -379,7 +380,7 @@ public final class PcrExprProcFactory { } } - if (has_part_col) { + if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) { // we need to evaluate result for every pruned partition if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) { // if the return type of the GenericUDF is boolean and all partitions agree on http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index fd51628..7262164 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,7 +55,8 @@ public final class OpProcFactory extends PrunerOperatorFactory { TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getPredicate(); + ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); + predicate = predicate == null ? fop.getConf().getPredicate() : predicate; String alias = top.getConf().getAlias(); // Generate the partition pruning predicate http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 5408dc8..6a31689 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -70,14 +70,16 @@ public class FilterDesc extends AbstractOperatorDesc { SampleDesc desc = new SampleDesc(numerator, denominator, null, inputPruning); return desc; } - + + @Override public String toString() { - return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: null; + return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: null; } } private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; + private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -149,6 +151,14 @@ public class FilterDesc extends AbstractOperatorDesc { this.isSortedFilter = isSortedFilter; } + public void setOrigPredicate(ExprNodeDesc origPredicate) { + this.origPredicate = origPredicate; + } + + public ExprNodeDesc getOrigPredicate() { + return origPredicate; + } + /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q index c027532..e01a654 100644 --- a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q +++ b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q @@ -20,6 +20,7 @@ analyze table over1k compute statistics; analyze table over1k compute statistics for columns; set hive.stats.fetch.column.stats=true; +set hive.optimize.point.lookup=false; explain select count(*) from over1k where ( (t=1 and si=2) or (t=2 and si=3) @@ -63,4 +64,4 @@ or (t=17 and si=18) or (t=27 and si=28) or (t=37 and si=38) or (t=47 and si=48) -or (t=52 and si=53)); \ No newline at end of file +or (t=52 and si=53)); http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/alter_partition_coltype.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 9fc3c8d..06515da 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -1134,11 +1134,15 @@ STAGE PLANS: alias: alterdynamic_part_table Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 + Filter Operator + isSamplingPred: false + predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean) Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - ListSink + Select Operator + expressions: intcol (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/annotate_stats_filter.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 492e302..af1e1c3 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -678,15 +678,15 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/flatten_and_or.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out index 9c51ff3..5f25daa 100644 --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -44,15 +44,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/pcr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index d7c40a3..4c9ea77 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -2475,16 +2475,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2588,13 +2588,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/ppd_transform.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out index 17248e4..f536767 100644 --- a/ql/src/test/results/clientpositive/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out @@ -390,21 +390,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/pcr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index fb08f10..5aa0df8 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2534,16 +2534,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2648,13 +2648,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/ppd_transform.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 52a847a..a6e6e38 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -405,21 +405,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c2250e6..54003c3 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/tez/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 9756b0c..e8a9786 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -2909,7 +2909,7 @@ Stage-0 Select Operator [SEL_2] outputColumnNames:["_col0"] Filter Operator [FIL_4] - predicate:((c_int = -6) or (c_int = 6)) (type: boolean) + predicate:(c_int) IN (-6, 6) (type: boolean) TableScan [TS_0] alias:cbo_t1 http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/tez/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out index c2250e6..54003c3 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out index 73bf12d..9e47014 100644 --- a/ql/src/test/results/clientpositive/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out @@ -46,20 +46,19 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator