HIVE-11634 : Support partition pruning for IN(STRUCT(partcol, nonpartcol..)...) (Hari Subramaniyan, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c9246f44 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c9246f44 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c9246f44 Branch: refs/heads/master-fixed Commit: c9246f44ead401b9121c3badbfbdb07cc9227a0a Parents: 55a24f0 Author: Hari Subramaniyan <harisan...@apache.org> Authored: Mon Nov 2 11:34:49 2015 -0800 Committer: Hari Subramaniyan <harisan...@apache.org> Committed: Mon Nov 2 11:34:49 2015 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +- .../apache/hadoop/hive/conf/HiveConf.java.orig | 5 +- .../hadoop/hive/ql/optimizer/Optimizer.java | 10 +- .../ql/optimizer/PartitionColumnsSeparator.java | 525 ++++ .../hive/ql/optimizer/PointLookupOptimizer.java | 90 +- .../ql/optimizer/pcr/PcrExprProcFactory.java | 33 + .../hive/ql/optimizer/ppr/OpProcFactory.java | 3 +- .../apache/hadoop/hive/ql/plan/FilterDesc.java | 9 - ql/src/test/queries/clientpositive/pcs.q | 66 + .../test/queries/clientpositive/pointlookup.q | 6 +- .../test/queries/clientpositive/pointlookup2.q | 2 +- .../test/queries/clientpositive/pointlookup3.q | 2 +- .../dynpart_sort_optimization_acid.q.out | 4 +- .../llap/dynamic_partition_pruning.q.out | 45 - .../vectorized_dynamic_partition_pruning.q.out | 45 - ql/src/test/results/clientpositive/pcs.q.out | 2249 ++++++++++++++++++ .../results/clientpositive/pointlookup.q.out | 8 +- .../tez/dynamic_partition_pruning.q.out | 45 - .../vectorized_dynamic_partition_pruning.q.out | 45 - 19 files changed, 2896 insertions(+), 300 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b214344..5198bb5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1263,8 +1263,8 @@ public class HiveConf extends Configuration { "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, "Minimum number of OR clauses needed to transform into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true, - "Extract partial expressions when optimizing point lookup IN clauses"), + HIVEPARTITIONCOLUMNSEPARATOR("hive.optimize.partition.columns.separate", true, + "Extract partition columns from IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig index f05f224..b214344 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig @@ -2206,7 +2206,10 @@ public class HiveConf extends Configuration { "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), - + HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, + "This flag should be set to true to enable the new vectorization\n" + + "of queries using ReduceSink.\ni" + + "The default value is true."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " + "partition operations including but not limited to insert, such as alter, describe etc."), http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 7ee5081..6347872 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -84,11 +84,11 @@ public class Optimizer { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); - final boolean extract = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT); - final boolean testMode = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_IN_TEST); - transformations.add(new PointLookupOptimizer(min, extract, testMode)); + transformations.add(new PointLookupOptimizer(min)); + } + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) { + transformations.add(new PartitionColumnsSeparator()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java new file mode 100644 index 0000000..f71f37c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -0,0 +1,525 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an IN operator whose children are constant structs or structs containing constant fields, + * it will try to generate predicate with IN clauses containing only partition columns. + * This predicate is in turn used by the partition pruner to prune the columns that are not + * part of the original IN(STRUCT(..)..) predicate. + */ +public class PartitionColumnsSeparator implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class StructInTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate as 2 separate IN clauses + // containing the partitioning and non-partitioning columns. + ExprNodeDesc newPredicate = generateInClauses(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + final List<ExprNodeDesc> subExpr = + new ArrayList<ExprNodeDesc>(2); + subExpr.add(predicate); + subExpr.add(newPredicate); + ExprNodeGenericFuncDesc newFilterPredicate = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + filterOp.getConf().setPredicate(newFilterPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClauses(ExprNodeDesc predicate) throws SemanticException { + Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new StructInExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List<Node> startNodes = new ArrayList<Node>(); + startNodes.add(predicate); + + HashMap<Node, Object> outputMap = new HashMap<Node, Object>(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + /** + * The StructInExprProcessor processes the IN clauses of the following format : + * STRUCT(T1.a, T1.b, T2.b, T2.c) IN (STRUCT(1, 2, 3, 4) , STRUCT(2, 3, 4, 5)) + * where T1.a, T1.b, T2.c are all partition columns and T2.b is a non-partition + * column. The resulting additional predicate generated after + * StructInExprProcessor.process() looks like : + * STRUCT(T1.a, T1.b) IN (STRUCT(1, 2), STRUCT(2, 3)) + * AND + * STRUCT(T2.b) IN (STRUCT(4), STRUCT(5)) + * The additional predicate generated is used to prune the partitions that are + * part of the given query. Once the partitions are pruned, the partition condition + * remover is expected to remove the redundant predicates from the plan. + */ + private class StructInExprProcessor implements NodeProcessor { + + /** TableInfo is populated in PASS 1 of process(). It contains the information required + * to generate an IN clause of the following format: + * STRUCT(T1.a, T1.b) IN (const STRUCT(1, 2), const STRUCT(2, 3)) + * In the above e.g. please note that all elements of the struct come from the same table. + * The populated TableStructInfo is used to generate the IN clause in PASS 2 of process(). + * The table struct information class has the following fields: + * 1. Expression Node Descriptor for the Left Hand Side of the IN clause for the table + * 2. 2-D List of expression node descriptors which corresponds to the elements of IN clause + */ + class TableInfo { + List<ExprNodeDesc> exprNodeLHSDescriptor; + List<List<ExprNodeDesc>> exprNodeRHSStructs; + + public TableInfo() { + exprNodeLHSDescriptor = new ArrayList<ExprNodeDesc>(); + exprNodeRHSStructs = new ArrayList<List<ExprNodeDesc>>(); + } + } + + // Mapping from expression node to is an expression containing only + // partition or virtual column or constants + private Map<ExprNodeDesc, Boolean> exprNodeToPartOrVirtualColOrConstExpr = + new IdentityHashMap<ExprNodeDesc, Boolean>(); + + /** + * This function iterates through the entire subtree under a given expression node + * and makes sure that the expression contain only constant nodes or + * partition/virtual columns as leaf nodes. + * @param en Expression Node Descriptor for the root node. + * @return true if the subtree rooted under en has only partition/virtual columns or + * constant values as the leaf nodes. Else, return false. + */ + private boolean exprContainsOnlyPartitionColOrVirtualColOrConstants(ExprNodeDesc en) { + if (en == null) { + return true; + } + if (exprNodeToPartOrVirtualColOrConstExpr.containsKey(en)) { + return exprNodeToPartOrVirtualColOrConstExpr.get(en); + } + if (en instanceof ExprNodeColumnDesc) { + boolean ret = ((ExprNodeColumnDesc)en).getIsPartitionColOrVirtualCol(); + exprNodeToPartOrVirtualColOrConstExpr.put(en, ret); + return ret; + } + if (en.getChildren() != null) { + for (ExprNodeDesc cn : en.getChildren()) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(cn)) { + exprNodeToPartOrVirtualColOrConstExpr.put(en, false); + return false; + } + } + } + exprNodeToPartOrVirtualColOrConstExpr.put(en, true); + return true; + } + + + /** + * Check if the expression node satisfies the following : + * Has atleast one subexpression containing a partition/virtualcolumn and has + * exactly refer to a single table alias. + * @param en Expression Node Descriptor + * @return true if there is atleast one subexpression with partition/virtual column + * and has exactly refer to a single table alias. If not, return false. + */ + private boolean hasAtleastOneSubExprWithPartColOrVirtualColWithOneTableAlias(ExprNodeDesc en) { + if (en == null || en.getChildren() == null) { + return false; + } + for (ExprNodeDesc cn : en.getChildren()) { + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(cn) && getTableAlias(cn) != null) { + return true; + } + } + return false; + } + + + /** + * Check if the expression node satisfies the following : + * Has all subexpressions containing constants or a partition/virtual column/coming from the + * same table + * @param en Expression Node Descriptor + * @return true/false based on the condition specified in the above description. + */ + private boolean hasAllSubExprWithConstOrPartColOrVirtualColWithOneTableAlias(ExprNodeDesc en) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + return false; + } + + Set<String> s = new HashSet<String>(); + Set<ExprNodeDesc> visited = new HashSet<ExprNodeDesc>(); + + return getTableAliasHelper(en, s, visited); + } + + + /** + * Return the expression node descriptor if the input expression node is a GenericUDFIn. + * Else, return null. + * @param en Expression Node Descriptor + * @return The expression node descriptor if the input expression node represents an IN clause. + * Else, return null. + */ + private ExprNodeGenericFuncDesc getInExprNode(ExprNodeDesc en) { + if (en == null) { + return null; + } + + if (en instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc)(en)).getGenericUDF() + instanceof GenericUDFIn) { + return (ExprNodeGenericFuncDesc) en; + } + return null; + } + + + /** + * Helper used by getTableAlias + * @param en Expression Node Descriptor + * @param s Set of the table Aliases associated with the current Expression node. + * @param visited Visited ExpressionNode set. + * @return true if en has at most one table associated with it, else return false. + */ + private boolean getTableAliasHelper(ExprNodeDesc en, Set<String> s, Set<ExprNodeDesc> visited) { + visited.add(en); + + // The current expression node is a column, see if the column alias is already a part of + // the return set, s. If not and we already have an entry in set s, this is an invalid expression + // and return false. + if (en instanceof ExprNodeColumnDesc) { + if (s.size() > 0 && + !s.contains(((ExprNodeColumnDesc)en).getTabAlias())) { + return false; + } + if (s.size() == 0) { + s.add(((ExprNodeColumnDesc)en).getTabAlias()); + } + return true; + } + if (en.getChildren() == null) { + return true; + } + + // Iterative through the children in a DFS manner to see if there is more than 1 table alias + // referenced by the current expression node. + for (ExprNodeDesc cn : en.getChildren()) { + if (visited.contains(cn)) { + continue; + } + if (cn instanceof ExprNodeColumnDesc) { + s.add(((ExprNodeColumnDesc) cn).getTabAlias()); + } else if (!(cn instanceof ExprNodeConstantDesc)) { + if (!getTableAliasHelper(cn, s, visited)) { + return false; + } + } + } + return true; + } + + + /** + * If the given expression has just a single table associated with it, + * return the table alias associated with it. Else, return null. + * @param en + * @return The table alias associated with the expression if there is a single table + * reference. Else, return null. + */ + private String getTableAlias(ExprNodeDesc en) { + Set<String> s = new HashSet<String>(); + Set<ExprNodeDesc> visited = new HashSet<ExprNodeDesc>(); + boolean singleTableAlias = getTableAliasHelper(en, s, visited); + + if (!singleTableAlias || s.size() == 0) { + return null; + } + StringBuilder ans = new StringBuilder(); + for (String st : s) { + ans.append(st); + } + return ans.toString(); + } + + + /** + * The main process method for StructInExprProcessor to generate additional predicates + * containing only partition columns. + */ + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = getInExprNode((ExprNodeDesc)nd); + + /***************************************************************************************\ + BEGIN : Early terminations for Partition Column Separator + /***************************************************************************************/ + // 1. If the input node is not an IN operator, we bail out. + if (fd == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", is not IN operator : "); + } + return null; + } + + // 2. Check if the input is an IN operator with struct children + List<ExprNodeDesc> children = fd.getChildren(); + if (!(children.get(0) instanceof ExprNodeGenericFuncDesc) || + (!(((ExprNodeGenericFuncDesc) children.get(0)).getGenericUDF() + instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", children size " + + children.size() + ", child expression : " + children.get(0).getExprString()); + } + return null; + } + + // 3. See if the IN (STRUCT(EXP1, EXP2,..) has atleast one expression with partition + // column with single table alias. If not bail out. + // We might have expressions containing only partitioning columns, say, T1.A + T2.B + // where T1.A and T2.B are both partitioning columns. + // However, these expressions should not be considered as valid expressions for separation. + if (!hasAtleastOneSubExprWithPartColOrVirtualColWithOneTableAlias(children.get(0))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", there are no expression containing partition columns in struct fields"); + } + return null; + } + + // 4. See if all the field expressions of the left hand side of IN are expressions + // containing constants or only partition columns coming from same table. + // If so, we need not perform this optimization and we should bail out. + if (hasAllSubExprWithConstOrPartColOrVirtualColWithOneTableAlias(children.get(0))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", all fields are expressions containing constants or only partition columns" + + "coming from same table"); + } + return null; + } + + /***************************************************************************************\ + END : Early terminations for Partition Column Separator + /***************************************************************************************/ + + + /***************************************************************************************\ + BEGIN : Actual processing of the IN (STRUCT(..)) expression. + /***************************************************************************************/ + Map<String, TableInfo> tableAliasToInfo = + new HashMap<>(); + ExprNodeGenericFuncDesc originalStructDesc = ((ExprNodeGenericFuncDesc) children.get(0)); + List<ExprNodeDesc> originalDescChildren = originalStructDesc.getChildren(); + /** + * PASS 1 : Iterate through the original IN(STRUCT(..)) and populate the tableAlias to + * predicate information inside tableAliasToInfo. + */ + for (int i = 0; i < originalDescChildren.size(); i++) { + ExprNodeDesc en = originalDescChildren.get(i); + String tabAlias = null; + + // If the current expression node does not have a virtual/partition column or + // single table alias reference, ignore it and move to the next expression node. + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(en) || + (tabAlias = getTableAlias(en)) == null) { + continue; + } + + TableInfo currTableInfo = null; + + // If the table alias to information map already contains the current table, + // use the existing TableInfo object. Else, create a new one. + if (tableAliasToInfo.containsKey(tabAlias)) { + currTableInfo = tableAliasToInfo.get(tabAlias); + } else { + currTableInfo = new TableInfo(); + } + currTableInfo.exprNodeLHSDescriptor.add(en); + + // Iterate through the children nodes of the IN clauses starting from index 1, + // which corresponds to the right hand side of the IN list. + // Insert the value corresponding to the current expression in currExprNodeInfo.exprNodeValues. + for (int j = 1; j < children.size(); j++) { + ExprNodeDesc currChildStructExpr = children.get(j); + ExprNodeDesc newConstStructElement = null; + + // 1. Get the constant value associated with the current element in the struct. + // If the current child struct expression is a constant struct. + if (currChildStructExpr instanceof ExprNodeConstantDesc) { + List<Object> cnCols = (List<Object>)(((ExprNodeConstantDesc) (children.get(j))).getValue()); + newConstStructElement = new ExprNodeConstantDesc(cnCols.get(i)); + } else { + // This better be a generic struct with constant values as the children. + List<ExprNodeDesc> cnChildren = ((ExprNodeGenericFuncDesc) children.get(j)).getChildren(); + newConstStructElement = new ExprNodeConstantDesc( + (((ExprNodeConstantDesc) (cnChildren.get(i))).getValue())); + } + + // 2. Insert the current constant value into exprNodeStructs list. + // If there is no struct corresponding to the current element, create a new one, insert + // the constant value into it and add the struct as part of exprNodeStructs. + if (currTableInfo.exprNodeRHSStructs.size() < j) { + List<ExprNodeDesc> newConstStructList = new ArrayList<ExprNodeDesc>(); + newConstStructList.add(newConstStructElement); + currTableInfo.exprNodeRHSStructs.add(newConstStructList); + } else { + // We already have a struct node for the current index. Insert the constant value + // into the corresponding struct node. + currTableInfo.exprNodeRHSStructs.get(j-1).add(newConstStructElement); + } + } + + // Insert the current table alias entry into the map if not already present in tableAliasToInfo. + if (!tableAliasToInfo.containsKey(tabAlias)) { + tableAliasToInfo.put(tabAlias, currTableInfo); + } + } + + /** + * PASS 2 : Iterate through the tableAliasToInfo populated via PASS 1 + * to generate the new expression. + */ + // subExpr is the list containing generated IN clauses as a result of this optimization. + final List<ExprNodeDesc> subExpr = + new ArrayList<ExprNodeDesc>(originalDescChildren.size()+1); + + for (Entry<String, TableInfo> entry : + tableAliasToInfo.entrySet()) { + TableInfo currTableInfo = entry.getValue(); + List<List<ExprNodeDesc>> currConstStructList = currTableInfo.exprNodeRHSStructs; + + // IN(STRUCT(..)..) ExprNodeDesc list for the current table alias. + List<ExprNodeDesc> currInStructExprList = new ArrayList<ExprNodeDesc>(); + + // Add the left hand side of the IN clause which contains the struct definition. + currInStructExprList.add(ExprNodeGenericFuncDesc.newInstance + (FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + STRUCT_UDF, + currTableInfo.exprNodeLHSDescriptor)); + + // Generate the right hand side of the IN clause + for (int i = 0; i < currConstStructList.size(); i++) { + List<ExprNodeDesc> currConstStruct = currConstStructList.get(i); + + // Add the current constant struct to the right hand side of the IN clause. + currInStructExprList.add(ExprNodeGenericFuncDesc.newInstance + (FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + STRUCT_UDF, + currConstStruct)); + } + + // Add the newly generated IN clause to subExpr. + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry. + getFunctionInfo(IN_UDF).getGenericUDF(), currInStructExprList)); + } + /***************************************************************************************\ + END : Actual processing of the IN (STRUCT(..)) expression. + /***************************************************************************************/ + + // If there is only 1 table ALIAS, return it + if (subExpr.size() == 1) { + // Return the new expression containing only partition columns + return subExpr.get(0); + } + // Return the new expression containing only partition columns + // after concatenating them with AND operator + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index 4799b4d..a1a49cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -18,14 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.calcite.util.Pair; @@ -50,18 +46,15 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.ListMultimap; /** @@ -78,48 +71,14 @@ public class PointLookupOptimizer implements Transform { GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String STRUCT_UDF = GenericUDFStruct.class.getAnnotation(Description.class).name(); - private static final String AND_UDF = - GenericUDFOPAnd.class.getAnnotation(Description.class).name(); - // these are closure-bound for all the walkers in context public final int minOrExpr; - public final boolean extract; - public final boolean testMode; /* * Pass in configs and pre-create a parse context */ - public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) { + public PointLookupOptimizer(final int min) { this.minOrExpr = min; - this.extract = extract; - this.testMode = testMode; - } - - // Hash Set iteration isn't ordered, but force string sorted order - // to get a consistent test run. - private Collection<ExprNodeDescEqualityWrapper> sortForTests( - Set<ExprNodeDescEqualityWrapper> valuesExpr) { - if (!testMode) { - // normal case - sorting is wasted for an IN() - return valuesExpr; - } - final Collection<ExprNodeDescEqualityWrapper> sortedValues; - - sortedValues = ImmutableSortedSet.copyOf( - new Comparator<ExprNodeDescEqualityWrapper>() { - @Override - public int compare(ExprNodeDescEqualityWrapper w1, - ExprNodeDescEqualityWrapper w2) { - // fail if you find nulls (this is a test-code section) - if (w1.equals(w2)) { - return 0; - } - return w1.getExprNodeDesc().getExprString() - .compareTo(w2.getExprNodeDesc().getExprString()); - } - }, valuesExpr); - - return sortedValues; } @Override @@ -152,9 +111,6 @@ public class PointLookupOptimizer implements Transform { if (LOG.isDebugEnabled()) { LOG.debug("Generated new predicate with IN clause: " + newPredicate); } - if (!extract) { - filterOp.getConf().setOrigPredicate(predicate); - } filterOp.getConf().setPredicate(newPredicate); } @@ -326,50 +282,6 @@ public class PointLookupOptimizer implements Transform { newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); - if (extract && columns.size() > 1) { - final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1); - - // extract pre-conditions for the tuple expressions - // (a,b) IN ((1,2),(2,3)) -> - // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) - - for (String keyString : columnConstantsMap.keySet()) { - final Set<ExprNodeDescEqualityWrapper> valuesExpr = - new HashSet<ExprNodeDescEqualityWrapper>(children.size()); - final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial = - columnConstantsMap.get(keyString); - for (int i = 0; i < children.size(); i++) { - Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial - .get(i); - valuesExpr - .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); - } - ExprNodeColumnDesc lookupCol = partial.get(0).left; - // generate a partial IN clause, if the column is a partition column - if (lookupCol.getIsPartitionColOrVirtualCol() - || valuesExpr.size() < children.size()) { - // optimize only nDV reductions - final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>(); - inExpr.add(lookupCol); - for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { - inExpr.add(value.getExprNodeDesc()); - } - subExpr.add(new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); - } - } - // loop complete, inspect the sub expressions generated - if (subExpr.size() > 0) { - // add the newPredicate to the end & produce an AND clause - subExpr.add(newPredicate); - newPredicate = new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); - } - // else, newPredicate is unmodified - } - return newPredicate; } http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 7cdc730..2ab1575 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -48,9 +48,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -364,6 +367,36 @@ public final class PcrExprProcFactory { return getResultWrapFromResults(results, fd, newNodeOutputs); } return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + } else if (fd.getGenericUDF() instanceof GenericUDFIn) { + List<ExprNodeDesc> children = fd.getChildren(); + boolean removePredElem = false; + ExprNodeDesc lhs = children.get(0); + + if (lhs instanceof ExprNodeGenericFuncDesc) { + // Make sure that the generic udf is deterministic + if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) + .getGenericUDF())) { + boolean hasOnlyPartCols = true; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { + hasOnlyPartCols = false; + break; + } + } + removePredElem = hasOnlyPartCols; + } + } + + // If removePredElem is set to true, return true as this is a potential candidate + // for partition condition remover. Else, set the WalkState for this node to unknown. + return removePredElem ? + new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : + new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true return new NodeInfoWrapper(WalkState.UNKNOWN, null, http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..fd51628 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,8 +55,7 @@ public final class OpProcFactory extends PrunerOperatorFactory { TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); - predicate = predicate == null ? fop.getConf().getPredicate() : predicate; + ExprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); // Generate the partition pruning predicate http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..ccc4bb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -79,7 +79,6 @@ public class FilterDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; - private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -151,14 +150,6 @@ public class FilterDesc extends AbstractOperatorDesc { this.isSortedFilter = isSortedFilter; } - public void setOrigPredicate(ExprNodeDesc origPredicate) { - this.origPredicate = origPredicate; - } - - public ExprNodeDesc getOrigPredicate() { - return origPredicate; - } - /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/queries/clientpositive/pcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q new file mode 100644 index 0000000..4b35a4d --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -0,0 +1,66 @@ +drop table pcs_t1; +drop table pcs_t2; + +create table pcs_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +analyze table pcs_t1 partition(ds) compute statistics; +analyze table pcs_t1 partition(ds) compute statistics for columns; + +set hive.optimize.point.lookup = true; +set hive.optimize.point.lookup.min = 1; + +explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; +select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; + +set hive.optimize.point.lookup = false; +set hive.optimize.partition.columns.separate=true; +set hive.optimize.ppd=true; + +explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); + +explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); +select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); + +explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)); + +explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); +select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); + +explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); +select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/queries/clientpositive/pointlookup.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q index 1aef2ef..c460f39 100644 --- a/ql/src/test/queries/clientpositive/pointlookup.q +++ b/ql/src/test/queries/clientpositive/pointlookup.q @@ -18,8 +18,7 @@ WHERE set hive.optimize.point.lookup.min=3; -set hive.optimize.point.lookup.extract=false; - +set hive.optimize.partition.columns.separate=false; explain SELECT key FROM src @@ -38,8 +37,7 @@ WHERE AND value = '3')) ; -set hive.optimize.point.lookup.extract=true; - +set hive.optimize.partition.columns.separate=true; explain SELECT key FROM src http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/queries/clientpositive/pointlookup2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q index 31bebbb..94e99fb 100644 --- a/ql/src/test/queries/clientpositive/pointlookup2.q +++ b/ql/src/test/queries/clientpositive/pointlookup2.q @@ -14,7 +14,7 @@ from pcr_t1 insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/queries/clientpositive/pointlookup3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q index 3daa94b..79e7348 100644 --- a/ql/src/test/queries/clientpositive/pointlookup3.q +++ b/ql/src/test/queries/clientpositive/pointlookup3.q @@ -6,7 +6,7 @@ insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') sel insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds1, ds2 http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index eca29df..ddb05e2 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -153,7 +153,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 @@ -390,7 +390,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 4320f01..7b428bc 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -1275,21 +1275,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: llap @@ -4076,21 +4061,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber @@ -5229,21 +5199,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber http://git-wip-us.apache.org/repos/asf/hive/blob/c9246f44/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index e30465d..e9192a3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -1275,21 +1275,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: llap @@ -4076,21 +4061,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: vectorized, uber @@ -5229,21 +5199,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber