[2/2] hive git commit: HIVE-20296: Improve HivePointLookupOptimizerRule to be able to extract from more sophisticated contexts (Zoltan Haindrich reviewed by Ashutosh Chauhan)

kgyrtkirk Tue, 11 Sep 2018 04:19:49 -0700

HIVE-20296: Improve HivePointLookupOptimizerRule to be able to extract from 
more sophisticated contexts (Zoltan Haindrich reviewed by Ashutosh Chauhan)


Signed-off-by: Zoltan Haindrich <k...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/93de9504
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/93de9504
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/93de9504

Branch: refs/heads/master
Commit: 93de95041231aa82f7837f254f80a231f5c8d36e
Parents: 86685c6
Author: Zoltan Haindrich <k...@rxd.hu>
Authored: Tue Sep 11 13:06:53 2018 +0200
Committer: Zoltan Haindrich <k...@rxd.hu>
Committed: Tue Sep 11 13:06:53 2018 +0200

----------------------------------------------------------------------
 .../rules/HivePointLookupOptimizerRule.java     | 309 ++++++++++++-------
 .../rules/TestHivePointLookupOptimizerRule.java | 174 +++++++++++
 .../druid/druidmini_test_ts.q.out               |   2 +-
 .../clientpositive/llap/bucketpruning1.q.out    |   7 +-
 .../clientpositive/perf/spark/query15.q.out     |   2 +-
 .../clientpositive/perf/spark/query47.q.out     |  30 +-
 .../clientpositive/perf/spark/query57.q.out     |  30 +-
 .../clientpositive/perf/tez/query15.q.out       |   2 +-
 .../clientpositive/perf/tez/query47.q.out       |   8 +-
 .../clientpositive/perf/tez/query57.q.out       |   8 +-
 10 files changed, 412 insertions(+), 160 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
index eff9a31..4d42ab4 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -44,22 +46,19 @@ import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.collect.ArrayListMultimap;
+import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.LinkedHashMultimap;
-import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Multimap;
+import com.google.common.collect.Multimaps;
 import com.google.common.collect.Sets;
 
-
 public abstract class HivePointLookupOptimizerRule extends RelOptRule {
 
 /**
@@ -74,6 +73,7 @@ public abstract class HivePointLookupOptimizerRule extends 
RelOptRule {
       super(operand(Filter.class, any()), minNumORClauses);
     }
 
+    @Override
     public void onMatch(RelOptRuleCall call) {
       final Filter filter = call.rel(0);
       final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
@@ -93,12 +93,13 @@ public abstract class HivePointLookupOptimizerRule extends 
RelOptRule {
  * to generate an IN clause (which is more efficient). If the OR operator 
contains
  * AND operator children, the optimization might generate an IN clause that 
uses
  * structs.
- */  
+ */
   public static class JoinCondition extends HivePointLookupOptimizerRule {
     public JoinCondition (int minNumORClauses) {
       super(operand(Join.class, any()), minNumORClauses);
     }
-    
+
+    @Override
     public void onMatch(RelOptRuleCall call) {
       final Join join = call.rel(0);
       final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
@@ -132,7 +133,7 @@ public abstract class HivePointLookupOptimizerRule extends 
RelOptRule {
 
   public void analyzeCondition(RelOptRuleCall call,
           RexBuilder rexBuilder,
-          AbstractRelNode node, 
+          AbstractRelNode node,
           RexNode condition) {
 
     // 1. We try to transform possible candidates
@@ -173,29 +174,31 @@ public abstract class HivePointLookupOptimizerRule 
extends RelOptRule {
     @Override public RexNode visitCall(RexCall call) {
       RexNode node;
       switch (call.getKind()) {
-        case AND:
-          ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) 
call).getOperands());
-          List<RexNode> newOperands = new ArrayList<RexNode>();
-          for (RexNode operand: operands) {
-            RexNode newOperand;
-            if (operand.getKind() == SqlKind.OR) {
-              try {
-                newOperand = transformIntoInClauseCondition(rexBuilder,
-                        nodeOp.getRowType(), operand, minNumORClauses);
-                if (newOperand == null) {
-                  newOperand = operand;
-                }
-              } catch (SemanticException e) {
-                LOG.error("Exception in HivePointLookupOptimizerRule", e);
-                return call;
+      // FIXME: I don't think there is a need for this right now...calcite 
have already done the flattening/etc
+      // removing this case clause will not miss the OR below AND
+      case AND:
+        ImmutableList<RexNode> operands = 
RexUtil.flattenAnd(call.getOperands());
+        List<RexNode> newOperands = new ArrayList<RexNode>();
+        for (RexNode operand : operands) {
+          RexNode newOperand;
+          if (operand.getKind() == SqlKind.OR) {
+            try {
+              newOperand = transformIntoInClauseCondition(rexBuilder,
+                  nodeOp.getRowType(), operand, minNumORClauses);
+              if (newOperand == null) {
+                newOperand = operand;
               }
-            } else {
-              newOperand = operand;
+            } catch (SemanticException e) {
+              LOG.error("Exception in HivePointLookupOptimizerRule", e);
+              return call;
             }
-            newOperands.add(newOperand);
+          } else {
+            newOperand = operand;
           }
-          node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
-          break;
+          newOperands.add(newOperand);
+        }
+        node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
+        break;
         case OR:
           try {
             node = transformIntoInClauseCondition(rexBuilder,
@@ -214,106 +217,184 @@ public abstract class HivePointLookupOptimizerRule 
extends RelOptRule {
       return node;
     }
 
-    private static RexNode transformIntoInClauseCondition(RexBuilder 
rexBuilder, RelDataType inputSchema,
+    /**
+     * Represents a simple contraint.
+     *
+     * Example: a=1
+     */
+    static class Constraint {
+
+      private RexLiteral literal;
+      private RexInputRef inputRef;
+
+      public Constraint(RexInputRef inputRef, RexLiteral literal) {
+        this.literal = literal;
+        this.inputRef = inputRef;
+      }
+
+      /**
+       * Interprets argument as a constraint; if not possible returns null.
+       */
+      public static Constraint of(RexNode n) {
+        if (!(n instanceof RexCall)) {
+          return null;
+        }
+        RexCall call = (RexCall) n;
+        if (call.getOperator().getKind() != SqlKind.EQUALS) {
+          return null;
+        }
+        RexNode opA = call.operands.get(0);
+        RexNode opB = call.operands.get(1);
+        if (opA instanceof RexLiteral && opB instanceof RexInputRef) {
+          RexLiteral rexLiteral = (RexLiteral) opA;
+          RexInputRef rexInputRef = (RexInputRef) opB;
+          return new Constraint(rexInputRef, rexLiteral);
+        }
+        if (opA instanceof RexInputRef && opB instanceof RexLiteral) {
+          RexLiteral rexLiteral = (RexLiteral) opB;
+          RexInputRef rexInputRef = (RexInputRef) opA;
+          return new Constraint(rexInputRef, rexLiteral);
+        }
+        return null;
+      }
+
+      public RexInputRef getKey() {
+        return inputRef;
+      }
+
+    }
+
+    /**
+     * A group of Constraints.
+     *
+     * Examples:
+     *  (a=1 && b=1)
+     *  (a=1)
+     *
+     * Note: any rexNode is accepted as constraint; but it might be keyed with 
the empty key;
+     * which means it can't be parsed as a constraint for some reason; but for 
completeness...
+     *
+     */
+    static class ConstraintGroup {
+
+      public static final Function<ConstraintGroup, Set<RexInputRef>> 
KEY_FUNCTION = new Function<ConstraintGroup, Set<RexInputRef>>() {
+
+        @Override
+        public Set<RexInputRef> apply(ConstraintGroup a) {
+          return a.key;
+        }
+      };
+      private Map<RexInputRef, Constraint> constraints = new HashMap<>();
+      private RexNode originalRexNode;
+      private final Set<RexInputRef> key;
+
+      public ConstraintGroup(RexNode rexNode) {
+        originalRexNode = rexNode;
+
+        final List<RexNode> conjunctions = RelOptUtil.conjunctions(rexNode);
+
+        for (RexNode n : conjunctions) {
+
+          Constraint c = Constraint.of(n);
+          if (c == null) {
+            // interpretation failed; make this node opaque
+            key = Collections.emptySet();
+            return;
+          }
+          constraints.put(c.getKey(), c);
+        }
+        if (constraints.size() != conjunctions.size()) {
+          LOG.debug("unexpected situation; giving up on this branch");
+          key = Collections.emptySet();
+          return;
+        }
+        key = constraints.keySet();
+      }
+
+      public List<RexNode> getValuesInOrder(List<RexInputRef> columns) throws 
SemanticException {
+        List<RexNode> ret = new ArrayList<>();
+        for (RexInputRef rexInputRef : columns) {
+          Constraint constraint = constraints.get(rexInputRef);
+          if (constraint == null) {
+            throw new SemanticException("Unable to find constraint which was 
earlier added.");
+          }
+          ret.add(constraint.literal);
+        }
+        return ret;
+      }
+    }
+
+    private RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, 
RelDataType inputSchema,
             RexNode condition, int minNumORClauses) throws SemanticException {
       assert condition.getKind() == SqlKind.OR;
 
-      // 1. We extract the information necessary to create the predicate for 
the new
-      //    filter
-      ListMultimap<RexInputRef,RexLiteral> columnConstantsMap = 
ArrayListMultimap.create();
       ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) 
condition).getOperands());
       if (operands.size() < minNumORClauses) {
         // We bail out
         return null;
       }
+      List<ConstraintGroup> allNodes = new ArrayList<>();
+      List<ConstraintGroup> processedNodes = new ArrayList<>();
       for (int i = 0; i < operands.size(); i++) {
-        final List<RexNode> conjunctions = 
RelOptUtil.conjunctions(operands.get(i));
-        for (RexNode conjunction: conjunctions) {
-          // 1.1. If it is not a RexCall, we bail out
-          if (!(conjunction instanceof RexCall)) {
-            return null;
-          }
-          // 1.2. We extract the information that we need
-          RexCall conjCall = (RexCall) conjunction;
-          if(conjCall.getOperator().getKind() == SqlKind.EQUALS) {
-            if (conjCall.operands.get(0) instanceof RexInputRef &&
-                    conjCall.operands.get(1) instanceof RexLiteral) {
-              RexInputRef ref = (RexInputRef) conjCall.operands.get(0);
-              RexLiteral literal = (RexLiteral) conjCall.operands.get(1);
-              columnConstantsMap.put(ref, literal);
-              if (columnConstantsMap.get(ref).size() != i+1) {
-                // If we have not added to this column before, we bail out
-                return null;
-              }
-            } else if (conjCall.operands.get(1) instanceof RexInputRef &&
-                    conjCall.operands.get(0) instanceof RexLiteral) {
-              RexInputRef ref = (RexInputRef) conjCall.operands.get(1);
-              RexLiteral literal = (RexLiteral) conjCall.operands.get(0);
-              columnConstantsMap.put(ref, literal);
-              if (columnConstantsMap.get(ref).size() != i+1) {
-                // If we have not added to this column before, we bail out
-                return null;
-              }
-            } else {
-              // Bail out
-              return null;
-            }
-          } else {
-            return null;
-          }
-        }
+        ConstraintGroup m = new ConstraintGroup(operands.get(i));
+        allNodes.add(m);
       }
 
-      // 3. We build the new predicate and return it
-      List<RexNode> newOperands = new ArrayList<RexNode>(operands.size());
-      // 3.1 Create structs
-      List<RexInputRef> columns = new ArrayList<RexInputRef>();
-      List<String> names = new ArrayList<String>();
-      ImmutableList.Builder<RelDataType> paramsTypes = ImmutableList.builder();
-      List<TypeInfo> structReturnType = new ArrayList<TypeInfo>();
-      ImmutableList.Builder<RelDataType> newOperandsTypes = 
ImmutableList.builder();
-      for (int i = 0; i < operands.size(); i++) {
-        List<RexLiteral> constantFields = new 
ArrayList<RexLiteral>(operands.size());
+      Multimap<Set<RexInputRef>, ConstraintGroup> assignmentGroups =
+          Multimaps.index(allNodes, ConstraintGroup.KEY_FUNCTION);
 
-        for (RexInputRef ref : columnConstantsMap.keySet()) {
-          // If any of the elements was not referenced by every operand, we 
bail out
-          if (columnConstantsMap.get(ref).size() <= i) {
-            return null;
-          }
-          RexLiteral columnConstant = columnConstantsMap.get(ref).get(i);
-          if (i == 0) {
-            columns.add(ref);
-            names.add(inputSchema.getFieldNames().get(ref.getIndex()));
-            paramsTypes.add(ref.getType());
-            structReturnType.add(TypeConverter.convert(ref.getType()));
-          }
-          constantFields.add(columnConstant);
+      for (Entry<Set<RexInputRef>, Collection<ConstraintGroup>> sa : 
assignmentGroups.asMap().entrySet()) {
+        // skip opaque
+        if (sa.getKey().size() == 0) {
+          continue;
         }
-
-        if (i == 0) {
-          RexNode columnsRefs;
-          if (columns.size() == 1) {
-            columnsRefs = columns.get(0);
-          } else {
-            // Create STRUCT clause
-            columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, 
columns);
-          }
-          newOperands.add(columnsRefs);
-          newOperandsTypes.add(columnsRefs.getType());
+        // not enough equalities should not be handled
+        if (sa.getValue().size() < 2 || sa.getValue().size() < 
minNumORClauses) {
+          continue;
         }
-        RexNode values;
-        if (constantFields.size() == 1) {
-          values = constantFields.get(0);
-        } else {
-          // Create STRUCT clause
-          values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, 
constantFields);
-        }
-        newOperands.add(values);
-        newOperandsTypes.add(values.getType());
+
+        allNodes.add(new ConstraintGroup(buildInFor(sa.getKey(), 
sa.getValue())));
+        processedNodes.addAll(sa.getValue());
+      }
+
+      if (processedNodes.isEmpty()) {
+        return null;
       }
+      allNodes.removeAll(processedNodes);
+      List<RexNode> ops = new ArrayList<>();
+      for (ConstraintGroup mx : allNodes) {
+        ops.add(mx.originalRexNode);
+      }
+      if (ops.size() == 1) {
+        return ops.get(0);
+      } else {
+        return rexBuilder.makeCall(SqlStdOperatorTable.OR, ops);
+      }
+
+    }
 
-      // 4. Create and return IN clause
-      return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands);
+    private RexNode buildInFor(Set<RexInputRef> set, 
Collection<ConstraintGroup> value) throws SemanticException {
+
+      List<RexInputRef> columns = new ArrayList<RexInputRef>();
+      columns.addAll(set);
+      List<RexNode >operands = new ArrayList<>();
+
+      operands.add(useStructIfNeeded(columns));
+      for (ConstraintGroup node : value) {
+        List<RexNode> values = node.getValuesInOrder(columns);
+        operands.add(useStructIfNeeded(values));
+      }
+
+      return rexBuilder.makeCall(HiveIn.INSTANCE, operands);
+    }
+
+    private RexNode useStructIfNeeded(List<? extends RexNode> columns) {
+      // Create STRUCT clause
+      if (columns.size() == 1) {
+        return columns.get(0);
+      } else {
+        return rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns);
+      }
     }
 
   }
@@ -337,7 +418,7 @@ public abstract class HivePointLookupOptimizerRule extends 
RelOptRule {
       switch (call.getKind()) {
         case AND:
           // IN clauses need to be combined by keeping only common elements
-          operands = Lists.newArrayList(RexUtil.flattenAnd(((RexCall) 
call).getOperands()));
+          operands = 
Lists.newArrayList(RexUtil.flattenAnd(call.getOperands()));
           for (int i = 0; i < operands.size(); i++) {
             RexNode operand = operands.get(i);
             if (operand.getKind() == SqlKind.IN) {
@@ -374,7 +455,7 @@ public abstract class HivePointLookupOptimizerRule extends 
RelOptRule {
           break;
         case OR:
           // IN clauses need to be combined by keeping all elements
-          operands = Lists.newArrayList(RexUtil.flattenOr(((RexCall) 
call).getOperands()));
+          operands = Lists.newArrayList(RexUtil.flattenOr(call.getOperands()));
           for (int i = 0; i < operands.size(); i++) {
             RexNode operand = operands.get(i);
             if (operand.getKind() == SqlKind.IN) {

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHivePointLookupOptimizerRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHivePointLookupOptimizerRule.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHivePointLookupOptimizerRule.java
new file mode 100644
index 0000000..a593201
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHivePointLookupOptimizerRule.java
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptSchema;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Matchers;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.runners.MockitoJUnitRunner;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestHivePointLookupOptimizerRule {
+
+  @Mock
+  private RelOptSchema schemaMock;
+  @Mock
+  RelOptHiveTable tableMock;
+  @Mock
+  Table hiveTableMDMock;
+
+  private HepPlanner planner;
+  private RelBuilder builder;
+
+  @SuppressWarnings("unused")
+  private static class MyRecord {
+    public int f1;
+    public int f2;
+  }
+
+  @Before
+  public void before() {
+    HepProgramBuilder programBuilder = new HepProgramBuilder();
+    programBuilder.addRuleInstance(new 
HivePointLookupOptimizerRule.FilterCondition(2));
+
+    planner = new HepPlanner(programBuilder.build());
+
+    JavaTypeFactoryImpl typeFactory = new JavaTypeFactoryImpl();
+    RexBuilder rexBuilder = new RexBuilder(typeFactory);
+    final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder);
+    RelDataType rowTypeMock = typeFactory.createStructType(MyRecord.class);
+    Mockito.doReturn(rowTypeMock).when(tableMock).getRowType();
+    
Mockito.doReturn(tableMock).when(schemaMock).getTableForMember(Matchers.any());
+    Mockito.doReturn(hiveTableMDMock).when(tableMock).getHiveTableMD();
+
+    builder = HiveRelFactories.HIVE_BUILDER.create(optCluster, schemaMock);
+
+  }
+
+  public RexNode or(RexNode... args) {
+    return builder.call(SqlStdOperatorTable.OR, args);
+  }
+
+  public RexNode and(RexNode... args) {
+    return builder.call(SqlStdOperatorTable.AND, args);
+  }
+
+  public RexNode eq(String field, int value) {
+    return builder.call(SqlStdOperatorTable.EQUALS,
+        builder.field(field), builder.literal(value));
+  }
+
+  @Test
+  public void testSimpleCase() {
+
+    // @formatter:off
+    final RelNode basePlan = builder
+          .scan("t")
+          .filter(
+              and(
+                or(
+                    eq("f1",1),
+                    eq("f1",2)
+                    ),
+                or(
+                    eq("f2",3),
+                    eq("f2",4)
+                    )
+                )
+              )
+          .build();
+    // @formatter:on
+
+    planner.setRoot(basePlan);
+    RelNode optimizedRelNode = planner.findBestExp();
+
+    HiveFilter filter = (HiveFilter) optimizedRelNode;
+    RexNode condition = filter.getCondition();
+    assertEquals("AND(IN($0, 1, 2), IN($1, 3, 4))", condition.toString());
+  }
+
+  @Test
+  public void testSimpleStructCase() {
+
+    // @formatter:off
+    final RelNode basePlan = builder
+          .scan("t")
+          .filter(
+              or(
+                  and( eq("f1",1),eq("f2",1)),
+                  and( eq("f1",2),eq("f2",2))
+                  )
+              )
+          .build();
+    // @formatter:on
+
+    planner.setRoot(basePlan);
+    RelNode optimizedRelNode = planner.findBestExp();
+
+    HiveFilter filter = (HiveFilter) optimizedRelNode;
+    RexNode condition = filter.getCondition();
+    assertEquals("IN(ROW($0, $1), ROW(1, 1), ROW(2, 2))", 
condition.toString());
+  }
+
+  /** Despite the fact that f2=99 is there...the extraction should happen */
+  @Test
+  public void testObscuredSimple() {
+
+    // @formatter:off
+    final RelNode basePlan = builder
+          .scan("t")
+          .filter(
+              or(
+                  eq("f2",99),
+                  eq("f1",1),
+                  eq("f1",2)
+                  )
+              )
+          .build();
+    // @formatter:on
+
+    planner.setRoot(basePlan);
+    RelNode optimizedRelNode = planner.findBestExp();
+
+    HiveFilter filter = (HiveFilter) optimizedRelNode;
+    RexNode condition = filter.getCondition();
+    System.out.println(condition);
+    assertEquals("OR(IN($0, 1, 2), =($1, 99))", condition.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out 
b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out
index 9c624c2..f077fb2 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out
@@ -507,7 +507,7 @@ STAGE PLANS:
           properties:
             druid.fieldNames vc,cstring2
             druid.fieldTypes timestamp with local time zone,string
-            druid.query.json 
{"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"selector","dimension":"cstring2","value":"user1"},{"type":"selector","dimension":"__time","value":"2010-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"__time","value":"2011-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","cstring2"],"resultFormat":"compactedList"}
+            druid.query.json 
{"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T08:00:00.000Z","2011-01-01T08:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"cstring2","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","cstring2"],"resultFormat":"compactedList"}
             druid.query.type scan
           Select Operator
             expressions: vc (type: timestamp with local time zone), cstring2 
(type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out 
b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
index 260ba1c..55442ad 100644
--- a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
@@ -1542,9 +1542,6 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended
 select * from srcbucket_pruned where key = 1 or value = "One" or key = 2
 POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, `ds`
-FROM `default`.`srcbucket_pruned`
-WHERE `key` = 1 OR `value` = 'One' OR `key` = 2
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1558,12 +1555,12 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_pruned
-                  filterExpr: ((key = 1) or (value = 'One') or (key = 2)) 
(type: boolean)
+                  filterExpr: ((key) IN (1, 2) or (value = 'One')) (type: 
boolean)
                   Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE 
Column stats: PARTIAL
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: ((key = 1) or (key = 2) or (value = 'One')) 
(type: boolean)
+                    predicate: ((key) IN (1, 2) or (value = 'One')) (type: 
boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: 
COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: key (type: int), value (type: string), ds 
(type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/spark/query15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
index 67684f6..3d6fbda 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
@@ -157,7 +157,7 @@ STAGE PLANS:
                 outputColumnNames: _col3, _col4, _col7
                 Statistics: Num rows: 348467716 Data size: 47189528877 Basic 
stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: ((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 
'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', 
'83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
+                  predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) 
or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', 
'85392', '85460', '80348', '81792')) (type: boolean)
                   Statistics: Num rows: 348467716 Data size: 47189528877 Basic 
stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col4 (type: string), _col7 (type: 
decimal(7,2))

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/spark/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
index a9b5092..4a66d0b 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
@@ -245,20 +245,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Map 18 
@@ -305,20 +305,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Map 26 
@@ -345,20 +345,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Map 9 

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/spark/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
index 6785ee9..502d5f7 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
@@ -259,20 +259,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Map 19 
@@ -319,20 +319,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Map 27 
@@ -359,20 +359,20 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: date_dim
-                  filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy 
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: 
boolean)
+                  filterExpr: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (((d_year = 2000) or ((d_year = 1999) and 
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) 
(type: boolean)
-                    Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                    predicate: (((struct(d_year,d_moy)) IN (const 
struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not 
null) (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_year (type: int), 
d_moy (type: int)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73048 Data size: 81740712 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
         Reducer 12 

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/tez/query15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query15.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query15.q.out
index e1eca99..3c7ae66 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query15.q.out
@@ -71,7 +71,7 @@ Stage-0
                       Select Operator [SEL_23] (rows=348467716 width=135)
                         Output:["_col4","_col7"]
                         Filter Operator [FIL_22] (rows=348467716 width=135)
-                          predicate:((_col3 = 'CA') or (_col3 = 'GA') or 
(_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', 
'88274', '83405', '86475', '85392', '85460', '80348', '81792'))
+                          predicate:((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 
500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', 
'85392', '85460', '80348', '81792'))
                           Merge Join Operator [MERGEJOIN_77] (rows=348467716 
width=135)
                             
Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col7"]
                           <-Reducer 2 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/tez/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query47.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query47.q.out
index bd17808..f931483 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query47.q.out
@@ -199,10 +199,10 @@ Stage-0
                                                         <-Map 12 [SIMPLE_EDGE] 
vectorized
                                                           SHUFFLE [RS_282]
                                                             PartitionCols:_col0
-                                                            Select Operator 
[SEL_281] (rows=73048 width=1119)
+                                                            Select Operator 
[SEL_281] (rows=73049 width=1119)
                                                               
Output:["_col0","_col1","_col2"]
-                                                              Filter Operator 
[FIL_280] (rows=73048 width=1119)
-                                                                
predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 
2001) and (d_moy = 1))) and d_date_sk is not null)
+                                                              Filter Operator 
[FIL_280] (rows=73049 width=1119)
+                                                                
predicate:(((struct(d_year,d_moy)) IN (const struct(1999,12), const 
struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null)
                                                                 TableScan 
[TS_73] (rows=73049 width=1119)
                                                                   
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
                                                         <-Map 1 [SIMPLE_EDGE] 
vectorized
@@ -222,7 +222,7 @@ Stage-0
                                                                       SHUFFLE 
[RS_285]
                                                                         Group 
By Operator [GBY_284] (rows=1 width=12)
                                                                           
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                                          
Select Operator [SEL_283] (rows=73048 width=1119)
+                                                                          
Select Operator [SEL_283] (rows=73049 width=1119)
                                                                             
Output:["_col0"]
                                                                              
Please refer to the previous Select Operator [SEL_281]
                                                                 <-Reducer 15 
[BROADCAST_EDGE] vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/93de9504/ql/src/test/results/clientpositive/perf/tez/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query57.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query57.q.out
index 1d3c17d..fed340a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query57.q.out
@@ -193,10 +193,10 @@ Stage-0
                                                         <-Map 12 [SIMPLE_EDGE] 
vectorized
                                                           SHUFFLE [RS_282]
                                                             PartitionCols:_col0
-                                                            Select Operator 
[SEL_281] (rows=73048 width=1119)
+                                                            Select Operator 
[SEL_281] (rows=73049 width=1119)
                                                               
Output:["_col0","_col1","_col2"]
-                                                              Filter Operator 
[FIL_280] (rows=73048 width=1119)
-                                                                
predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 
2001) and (d_moy = 1))) and d_date_sk is not null)
+                                                              Filter Operator 
[FIL_280] (rows=73049 width=1119)
+                                                                
predicate:(((struct(d_year,d_moy)) IN (const struct(1999,12), const 
struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null)
                                                                 TableScan 
[TS_73] (rows=73049 width=1119)
                                                                   
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
                                                         <-Map 1 [SIMPLE_EDGE] 
vectorized
@@ -216,7 +216,7 @@ Stage-0
                                                                       SHUFFLE 
[RS_285]
                                                                         Group 
By Operator [GBY_284] (rows=1 width=12)
                                                                           
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                                          
Select Operator [SEL_283] (rows=73048 width=1119)
+                                                                          
Select Operator [SEL_283] (rows=73049 width=1119)
                                                                             
Output:["_col0"]
                                                                              
Please refer to the previous Select Operator [SEL_281]
                                                                 <-Reducer 15 
[BROADCAST_EDGE] vectorized

[2/2] hive git commit: HIVE-20296: Improve HivePointLookupOptimizerRule to be able to extract from more sophisticated contexts (Zoltan Haindrich reviewed by Ashutosh Chauhan)

Reply via email to