HIVE-11461 : Transform flat AND/OR into IN struct clause (Jesus Camacho 
Rodriguez, Ashutosh Chauhan via Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/139101d6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/139101d6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/139101d6

Branch: refs/heads/hbase-metastore
Commit: 139101d6cf3be23d9ec8a88a9a75b5969434607b
Parents: 5b67f35
Author: Ashutosh Chauhan <hashut...@apache.org>
Authored: Thu Aug 13 09:16:28 2015 -0700
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Thu Aug 13 09:16:28 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../hadoop/hive/ql/lib/PreOrderOnceWalker.java  |  44 +++
 .../hadoop/hive/ql/optimizer/Optimizer.java     |   6 +
 .../hive/ql/optimizer/PointLookupOptimizer.java | 280 +++++++++++++++++++
 .../ql/optimizer/pcr/PcrExprProcFactory.java    |   3 +-
 .../hive/ql/optimizer/ppr/OpProcFactory.java    |   3 +-
 .../apache/hadoop/hive/ql/plan/FilterDesc.java  |  14 +-
 .../annotate_stats_deep_filters.q               |   3 +-
 .../alter_partition_coltype.q.out               |  12 +-
 .../clientpositive/annotate_stats_filter.q.out  |   8 +-
 .../results/clientpositive/flatten_and_or.q.out |   8 +-
 ql/src/test/results/clientpositive/pcr.q.out    |  12 +-
 .../results/clientpositive/ppd_transform.q.out  |  12 +-
 .../test/results/clientpositive/spark/pcr.q.out |  12 +-
 .../clientpositive/spark/ppd_transform.q.out    |  12 +-
 .../clientpositive/spark/vectorized_case.q.out  |   2 +-
 .../clientpositive/tez/explainuser_1.q.out      |   2 +-
 .../clientpositive/tez/vectorized_case.q.out    |   2 +-
 .../clientpositive/vectorized_case.q.out        |   9 +-
 19 files changed, 397 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index d1cb5fb..11b9f78 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1186,6 +1186,8 @@ public class HiveConf extends Configuration {
         "Whether to transitively replicate predicate filters over equijoin 
conditions."),
     HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true,
         "Whether to push predicates down into storage handlers.  Ignored when 
hive.optimize.ppd is false."),
+    HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
+         "Whether to transform OR clauses in Filter operators into IN 
clauses"),
     // Constant propagation optimizer
     HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, 
"Whether to enable constant propagation optimizer"),
     HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, 
"Removes identity project from operator tree"),

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java 
b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java
new file mode 100644
index 0000000..d891fc2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/PreOrderOnceWalker.java
@@ -0,0 +1,44 @@
+package org.apache.hadoop.hive.ql.lib;
+
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Graph walker this class takes list of starting nodes and walks them in 
pre-order.
+ * If a rule fires up against a given node, we do not try to apply the rule
+ * on its children.
+ */
+public class PreOrderOnceWalker extends PreOrderWalker {
+
+  public PreOrderOnceWalker(Dispatcher disp) {
+    super(disp);
+  }
+
+  /**
+   * Walk the current operator and its descendants.
+   * 
+   * @param nd
+   *          current operator in the graph
+   * @throws SemanticException
+   */
+  @Override
+  public void walk(Node nd) throws SemanticException {
+    opStack.push(nd);
+    dispatch(nd, opStack);
+
+    // The rule has been applied, we bail out
+    if (retMap.get(nd) != null) {
+      opStack.pop();
+      return;
+    }
+
+    // move all the children to the front of queue
+    if (nd.getChildren() != null) {
+      for (Node n : nd.getChildren()) {
+        walk(n);
+      }
+    }
+
+    opStack.pop();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index c4e11b9..14f362f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -81,6 +81,12 @@ public class Optimizer {
       // are combined and may become eligible for reduction (like is not null 
filter).
         transformations.add(new ConstantPropagate());
     }
+
+    // Try to transform OR predicates in Filter into IN clauses.
+    if (HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+      transformations.add(new PointLookupOptimizer());
+    }
+
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
       transformations.add(new PartitionPruner());
       transformations.add(new PartitionConditionRemover());

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
new file mode 100644
index 0000000..6a8acec
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.calcite.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ForwardWalker;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.TypeRule;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+
+/**
+ * This optimization will take a Filter expression, and if its predicate 
contains
+ * an OR operator whose children are constant equality expressions, it will try
+ * to generate an IN clause (which is more efficient). If the OR operator 
contains
+ * AND operator children, the optimization might generate an IN clause that 
uses
+ * structs.
+ */
+public class PointLookupOptimizer implements Transform {
+
+  private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class);
+  private static final String IN_UDF =
+          GenericUDFIn.class.getAnnotation(Description.class).name();
+  private static final String STRUCT_UDF =
+          GenericUDFStruct.class.getAnnotation(Description.class).name();
+
+
+  @Override
+  public ParseContext transform(ParseContext pctx) throws SemanticException {
+    // 1. Trigger transformation
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, 
NodeProcessor>();
+    opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), 
new FilterTransformer());
+
+    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
+    GraphWalker ogw = new ForwardWalker(disp);
+
+    List<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pctx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+    return pctx;
+  }
+
+  private class FilterTransformer implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      FilterOperator filterOp = (FilterOperator) nd;
+      ExprNodeDesc predicate = filterOp.getConf().getPredicate();
+
+      // Generate the list bucketing pruning predicate
+      ExprNodeDesc newPredicate = generateInClause(predicate);
+      if (newPredicate != null) {
+        // Replace filter in current FIL with new FIL
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Generated new predicate with IN clause: " + newPredicate);
+        }
+        filterOp.getConf().setOrigPredicate(predicate);
+        filterOp.getConf().setPredicate(newPredicate);
+      }
+
+      return null;
+    }
+
+    private ExprNodeDesc generateInClause(ExprNodeDesc predicate) throws 
SemanticException {
+      Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, 
NodeProcessor>();
+      exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new 
OrExprProcessor());
+
+      // The dispatcher fires the processor corresponding to the closest 
matching
+      // rule and passes the context along
+      Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null);
+      GraphWalker egw = new PreOrderOnceWalker(disp);
+
+      List<Node> startNodes = new ArrayList<Node>();
+      startNodes.add(predicate);
+
+      HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
+      egw.startWalking(startNodes, outputMap);
+      return (ExprNodeDesc) outputMap.get(predicate);
+    }
+  }
+
+  private class OrExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
+
+      // 1. If it is not an OR operator, we bail out.
+      if (!FunctionRegistry.isOpOr(fd)) {
+        return null;
+      }
+
+      // 2. It is an OR operator
+      List<ExprNodeDesc> children = fd.getChildren();
+      ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> 
columnConstantsMap =
+              ArrayListMultimap.create();
+      boolean modeAnd = false;
+      for (int i = 0; i < children.size(); i++) {
+        ExprNodeDesc child = children.get(i);
+
+        // - If the child is an AND operator, extract its children
+        // - Otherwise, take the child itself
+        final List<ExprNodeDesc> conjunctions;
+        if (FunctionRegistry.isOpAnd(child)) {
+          // If it is the first child, we set the mode variable value
+          // Otherwise, if the mode we are working on is different, we
+          // bail out
+          if (i == 0) {
+            modeAnd = true;
+          } else {
+            if (!modeAnd) {
+              return null;
+            }
+          }
+
+          // Multiple children
+          conjunctions = child.getChildren();
+        } else {
+          // If it is the first child, we set the mode variable value
+          // Otherwise, if the mode we are working on is different, we
+          // bail out
+          if (i == 0) {
+            modeAnd = false;
+          } else {
+            if (modeAnd) {
+              return null;
+            }
+          }
+
+          // One child
+          conjunctions = new ArrayList<ExprNodeDesc>(1);
+          conjunctions.add(child);
+        }
+
+        // 3. We will extract the literals to introduce in the IN clause.
+        //    If the patterns OR-AND-EqOp or OR-EqOp are not matched, we bail 
out
+        for (ExprNodeDesc conjunction: conjunctions) {
+          if (!(conjunction instanceof ExprNodeGenericFuncDesc)) {
+            return null;
+          }
+
+          ExprNodeGenericFuncDesc conjCall = (ExprNodeGenericFuncDesc) 
conjunction;
+          Class<? extends GenericUDF> genericUdfClass = 
conjCall.getGenericUDF().getClass();
+          if(GenericUDFOPEqual.class == genericUdfClass) {
+            if (conjCall.getChildren().get(0) instanceof ExprNodeColumnDesc &&
+                    conjCall.getChildren().get(1) instanceof 
ExprNodeConstantDesc) {
+              ExprNodeColumnDesc ref = (ExprNodeColumnDesc) 
conjCall.getChildren().get(0);
+              String refString = ref.toString();
+              columnConstantsMap.put(refString,
+                      new Pair<ExprNodeColumnDesc,ExprNodeConstantDesc>(
+                              ref, (ExprNodeConstantDesc) 
conjCall.getChildren().get(1)));
+              if (columnConstantsMap.get(refString).size() != i+1) {
+                // If we have not added to this column desc before, we bail out
+                return null;
+              }
+            } else if (conjCall.getChildren().get(1) instanceof 
ExprNodeColumnDesc &&
+                    conjCall.getChildren().get(0) instanceof 
ExprNodeConstantDesc) {
+              ExprNodeColumnDesc ref = (ExprNodeColumnDesc) 
conjCall.getChildren().get(1);
+              String refString = ref.toString();
+              columnConstantsMap.put(refString,
+                      new Pair<ExprNodeColumnDesc,ExprNodeConstantDesc>(
+                              ref, (ExprNodeConstantDesc) 
conjCall.getChildren().get(0)));
+              if (columnConstantsMap.get(refString).size() != i+1) {
+                // If we have not added to this column desc before, we bail out
+                return null;
+              }
+            } else {
+              // We bail out
+              return null;
+            }
+          } else {
+            // We bail out
+            return null;
+          }
+        }
+      }
+
+      // 4. We build the new predicate and return it
+      ExprNodeDesc newPredicate = null;
+      List<ExprNodeDesc> newChildren = new 
ArrayList<ExprNodeDesc>(children.size());
+      // 4.1 Create structs
+      List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
+      List<String> names = new ArrayList<String>();
+      List<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
+      for (int i = 0; i < children.size(); i++) {
+        List<ExprNodeDesc> constantFields = new 
ArrayList<ExprNodeDesc>(children.size());
+
+        for (String keyString : columnConstantsMap.keySet()) {
+          Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant =
+                  columnConstantsMap.get(keyString).get(i);
+          if (i == 0) {
+            columns.add(columnConstant.left);
+            names.add(columnConstant.left.getColumn());
+            typeInfos.add(columnConstant.left.getTypeInfo());
+          }
+          constantFields.add(columnConstant.right);
+        }
+
+        if (i == 0) {
+          ExprNodeDesc columnsRefs;
+          if (columns.size() == 1) {
+            columnsRefs = columns.get(0);
+          } else {
+            columnsRefs = new ExprNodeGenericFuncDesc(
+                    TypeInfoFactory.getStructTypeInfo(names, typeInfos),
+                    
FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(),
+                    columns);
+          }
+          newChildren.add(columnsRefs);
+        }
+        ExprNodeDesc values;
+        if (constantFields.size() == 1) {
+          values = constantFields.get(0);
+        } else {
+          values = new ExprNodeGenericFuncDesc(
+                  TypeInfoFactory.getStructTypeInfo(names, typeInfos),
+                  FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(),
+                  constantFields);
+        }
+        newChildren.add(values);
+      }
+      newPredicate = new 
ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+              FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), 
newChildren);
+
+      return newPredicate;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index 71a6c73..825938a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 /**
@@ -379,7 +380,7 @@ public final class PcrExprProcFactory {
           }
         }
 
-        if (has_part_col) {
+        if (has_part_col && fd.getTypeInfo().getCategory() == 
Category.PRIMITIVE) {
           //  we need to evaluate result for every pruned partition
           if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
             // if the return type of the GenericUDF is boolean and all 
partitions agree on

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
index fd51628..7262164 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
@@ -55,7 +55,8 @@ public final class OpProcFactory extends 
PrunerOperatorFactory {
         TableScanOperator top) throws SemanticException, UDFArgumentException {
       OpWalkerCtx owc = (OpWalkerCtx) procCtx;
       // Otherwise this is not a sampling predicate and we need to
-      ExprNodeDesc predicate = fop.getConf().getPredicate();
+      ExprNodeDesc predicate = fop.getConf().getOrigPredicate();
+      predicate = predicate == null ? fop.getConf().getPredicate() : predicate;
       String alias = top.getConf().getAlias();
 
       // Generate the partition pruning predicate

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java
index 5408dc8..6a31689 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java
@@ -70,14 +70,16 @@ public class FilterDesc extends AbstractOperatorDesc {
       SampleDesc desc = new SampleDesc(numerator, denominator, null, 
inputPruning);
       return desc;
     }
-    
+
+    @Override
     public String toString() {
-      return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: 
null;  
+      return inputPruning ? "BUCKET " + numerator + " OUT OF " + denominator: 
null;
     }
   }
 
   private static final long serialVersionUID = 1L;
   private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate;
+  private transient ExprNodeDesc origPredicate;
   private boolean isSamplingPred;
   private transient SampleDesc sampleDescr;
   //Is this a filter that should perform a comparison for sorted searches
@@ -149,6 +151,14 @@ public class FilterDesc extends AbstractOperatorDesc {
     this.isSortedFilter = isSortedFilter;
   }
 
+  public void setOrigPredicate(ExprNodeDesc origPredicate) {
+    this.origPredicate = origPredicate;
+  }
+
+  public ExprNodeDesc getOrigPredicate() {
+    return origPredicate;
+  }
+
   /**
    * Some filters are generated or implied, which means it is not in the query.
    * It is added by the analyzer. For example, when we do an inner join, we add

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q 
b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
index c027532..e01a654 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_deep_filters.q
@@ -20,6 +20,7 @@ analyze table over1k compute statistics;
 analyze table over1k compute statistics for columns;
 
 set hive.stats.fetch.column.stats=true;
+set hive.optimize.point.lookup=false;
 explain select count(*) from over1k where (
 (t=1 and si=2)
 or (t=2 and si=3)
@@ -63,4 +64,4 @@ or (t=17 and si=18)
 or (t=27 and si=28)
 or (t=37 and si=38)
 or (t=47 and si=48)
-or (t=52 and si=53));
\ No newline at end of file
+or (t=52 and si=53));

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out 
b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index 9fc3c8d..06515da 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -1134,11 +1134,15 @@ STAGE PLANS:
           alias: alterdynamic_part_table
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column 
stats: NONE
           GatherStats: false
-          Select Operator
-            expressions: intcol (type: string)
-            outputColumnNames: _col0
+          Filter Operator
+            isSamplingPred: false
+            predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), 
const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
             Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column 
stats: NONE
-            ListSink
+            Select Operator
+              expressions: intcol (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE 
Column stats: NONE
+              ListSink
 
 PREHOOK: query: select intcol from pt.alterdynamic_part_table where 
(partcol1='2' and partcol2='1')or (partcol1='1' and 
partcol2='__HIVE_DEFAULT_PARTITION__')
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 492e302..af1e1c3 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -678,15 +678,15 @@ STAGE PLANS:
             alias: loc_orc
             Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: COMPLETE
             Filter Operator
-              predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
-              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE 
Column stats: COMPLETE
+              predicate: (state) IN ('OH', 'CA') (type: boolean)
+              Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE 
Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip 
(type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE 
Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE 
Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/flatten_and_or.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out 
b/ql/src/test/results/clientpositive/flatten_and_or.q.out
index 9c51ff3..5f25daa 100644
--- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
+++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
@@ -44,15 +44,15 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: (((key = '0') and (value = '8')) or ((key = '1') and 
(value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = 
'8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or 
((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = 
'8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and 
(value = '3'))) (type: boolean)
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              predicate: (struct(key,value)) IN (const struct('0','8'), const 
struct('1','5'), const struct('2','6'), const struct('3','8'), const 
struct('4','1'), const struct('5','6'), const struct('6','1'), const 
struct('7','1'), const struct('8','1'), const struct('9','1'), const 
struct('10','3')) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out 
b/ql/src/test/results/clientpositive/pcr.q.out
index d7c40a3..4c9ea77 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -2475,16 +2475,16 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = 
'2000-04-09') and (key = 2))) (type: boolean)
-              Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+              predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), 
const struct(2,'2000-04-09')) (type: boolean)
+              Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string), ds (type: 
string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
-                  Statistics: Num rows: 40 Data size: 320 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
                   tag: -1
                   auto parallelism: false
       Path -> Alias:
@@ -2588,13 +2588,13 @@ STAGE PLANS:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 
(type: string), KEY.reducesinkkey2 (type: string)
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out 
b/ql/src/test/results/clientpositive/ppd_transform.q.out
index 17248e4..f536767 100644
--- a/ql/src/test/results/clientpositive/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
@@ -390,21 +390,21 @@ STAGE PLANS:
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  predicate: (_col0) IN ('a', 'b') (type: boolean)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 Filter Operator
-                  predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  predicate: (_col0) IN ('c', 'd') (type: boolean)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out 
b/ql/src/test/results/clientpositive/spark/pcr.q.out
index fb08f10..5aa0df8 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -2534,16 +2534,16 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = 
'2000-04-09') and (key = 2))) (type: boolean)
-                    Statistics: Num rows: 40 Data size: 320 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: (struct(key,ds)) IN (const 
struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+                    Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds 
(type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 40 Data size: 320 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string)
                         sort order: +++
-                        Statistics: Num rows: 40 Data size: 320 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
             Path -> Alias:
@@ -2648,13 +2648,13 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 40 Data size: 320 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out 
b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
index 52a847a..a6e6e38 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
@@ -405,21 +405,21 @@ STAGE PLANS:
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
-                        predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: 
boolean)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        predicate: (_col0) IN ('a', 'b') (type: boolean)
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 500 Data size: 5312 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 250 Data size: 2656 Basic 
stats: COMPLETE Column stats: NONE
                           table:
                               input format: 
org.apache.hadoop.mapred.TextInputFormat
                               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                               serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       Filter Operator
-                        predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: 
boolean)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        predicate: (_col0) IN ('c', 'd') (type: boolean)
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 500 Data size: 5312 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 250 Data size: 2656 Basic 
stats: COMPLETE Column stats: NONE
                           table:
                               input format: 
org.apache.hadoop.mapred.TextInputFormat
                               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out 
b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index c2250e6..54003c3 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint 
= 10583)) (type: boolean)
+            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 
418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: 
string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE 
('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 9756b0c..e8a9786 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2909,7 +2909,7 @@ Stage-0
       Select Operator [SEL_2]
          outputColumnNames:["_col0"]
          Filter Operator [FIL_4]
-            predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
+            predicate:(c_int) IN (-6, 6) (type: boolean)
             TableScan [TS_0]
                alias:cbo_t1
 

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out 
b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index c2250e6..54003c3 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
         TableScan
           alias: alltypesorc
           Filter Operator
-            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint 
= 10583)) (type: boolean)
+            predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
             Select Operator
               expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 
418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: 
string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE 
('c') END (type: string)
               outputColumnNames: _col0, _col1, _col2

http://git-wip-us.apache.org/repos/asf/hive/blob/139101d6/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out 
b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 73bf12d..9e47014 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -46,20 +46,19 @@ STAGE PLANS:
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((csmallint = 418) or (csmallint = 12205) or 
(csmallint = 10583)) (type: boolean)
-              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
               Select Operator
                 expressions: csmallint (type: smallint), CASE WHEN ((csmallint 
= 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: 
string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE 
('c') END (type: string)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

Reply via email to