[ 
https://issues.apache.org/jira/browse/HIVE-24154?focusedWorklogId=485341&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-485341
 ]

ASF GitHub Bot logged work on HIVE-24154:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 16/Sep/20 19:56
            Start Date: 16/Sep/20 19:56
    Worklog Time Spent: 10m 
      Work Description: jcamachor commented on a change in pull request #1492:
URL: https://github.com/apache/hive/pull/1492#discussion_r489718951



##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
##########
@@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends 
RexNode> columns) {
     }
 
     @Override public RexNode visitCall(RexCall call) {
-      final RexNode node;
-      final List<RexNode> operands;
-      final List<RexNode> newOperands;
-      final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = 
LinkedHashMultimap.create();
       switch (call.getKind()) {
         case AND:
-          // IN clauses need to be combined by keeping only common elements
-          operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
-          for (int i = 0; i < operands.size(); i++) {
-            RexNode operand = operands.get(i);
-            if (operand.getKind() == SqlKind.IN) {
-              RexCall inCall = (RexCall) operand;
-              if 
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
-                continue;
-              }
-              RexNode ref = inCall.getOperands().get(0);
-              if (inLHSExprToRHSExprs.containsKey(ref)) {
-                Set<RexNode> expressions = Sets.newHashSet();
-                for (int j = 1; j < inCall.getOperands().size(); j++) {
-                  expressions.add(inCall.getOperands().get(j));
-                }
-                inLHSExprToRHSExprs.get(ref).retainAll(expressions);
-                if (!inLHSExprToRHSExprs.containsKey(ref)) {
-                  // Note that Multimap does not keep a key if all its values 
are removed.
-                  // Hence, since there are no common expressions and it is 
within an AND,
-                  // we should return false
-                  return rexBuilder.makeLiteral(false);
-                }
-              } else {
-                for (int j = 1; j < inCall.getOperands().size(); j++) {
-                  inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
-                }
-              }
-              operands.remove(i);
-              --i;
-            } else if (operand.getKind() == SqlKind.EQUALS) {
-              Constraint c = Constraint.of(operand);
-              if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
-                continue;
+          return handleAND(rexBuilder, call);
+        case OR:
+          return handleOR(rexBuilder, call);
+        default:
+          return super.visitCall(call);
+      }
+    }
+
+    private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) {
+      // IN clauses need to be combined by keeping only common elements
+      final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = 
LinkedHashMultimap.create();
+      // We will use this set to keep those expressions that may evaluate
+      // into a null value.
+      final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs = 
LinkedHashMultimap.create();
+      final List<RexNode> operands = new 
ArrayList<>(RexUtil.flattenAnd(call.getOperands()));
+      for (int i = 0; i < operands.size(); i++) {
+        RexNode operand = operands.get(i);
+        if (operand.getKind() == SqlKind.IN) {
+          RexCall inCall = (RexCall) operand;
+          if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+            continue;
+          }
+          RexNode ref = inCall.getOperands().get(0);
+          if (ref.getType().isNullable()) {
+            inLHSExprToRHSNullableExprs.put(ref, ref);
+          }
+          if (inLHSExprToRHSExprs.containsKey(ref)) {
+            Set<RexNode> expressions = Sets.newHashSet();
+            for (int j = 1; j < inCall.getOperands().size(); j++) {
+              RexNode constNode = inCall.getOperands().get(j);
+              expressions.add(constNode);
+              if (constNode.getType().isNullable()) {
+                inLHSExprToRHSNullableExprs.put(ref, constNode);
               }
-              RexNode ref = c.exprNode;
-              if (inLHSExprToRHSExprs.containsKey(ref)) {
-                
inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode));
-                if (!inLHSExprToRHSExprs.containsKey(ref)) {
-                  // Note that Multimap does not keep a key if all its values 
are removed.
-                  // Hence, since there are no common expressions and it is 
within an AND,
-                  // we should return false
-                  return rexBuilder.makeLiteral(false);
-                }
-              } else {
-                inLHSExprToRHSExprs.put(ref, c.constNode);
+            }
+            inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+            if (!inLHSExprToRHSExprs.containsKey(ref)) {
+              // Note that Multimap does not keep a key if all its values are 
removed.
+              return createResultFromEmptySet(rexBuilder, ref, 
inLHSExprToRHSNullableExprs);
+            }
+          } else {
+            for (int j = 1; j < inCall.getOperands().size(); j++) {
+              RexNode constNode = inCall.getOperands().get(j);
+              inLHSExprToRHSExprs.put(ref, constNode);
+              if (constNode.getType().isNullable()) {
+                inLHSExprToRHSNullableExprs.put(ref, constNode);
               }
-              operands.remove(i);
-              --i;
             }
           }
-          // Create IN clauses
-          newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
-          newOperands.addAll(operands);
-          // Return node
-          node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
-          break;
-        case OR:
-          // IN clauses need to be combined by keeping all elements
-          operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands()));
-          for (int i = 0; i < operands.size(); i++) {
-            RexNode operand = operands.get(i);
-            if (operand.getKind() == SqlKind.IN) {
-              RexCall inCall = (RexCall) operand;
-              if 
(!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
-                continue;
-              }
-              RexNode ref = inCall.getOperands().get(0);
-              for (int j = 1; j < inCall.getOperands().size(); j++) {
-                inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
-              }
-              operands.remove(i);
-              --i;
+          operands.remove(i);
+          --i;
+        } else if (operand.getKind() == SqlKind.EQUALS) {
+          Constraint c = Constraint.of(operand);
+          if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) {
+            continue;
+          }
+          if (c.exprNode.getType().isNullable()) {
+            inLHSExprToRHSNullableExprs.put(c.exprNode, c.exprNode);
+          }
+          if (c.constNode.getType().isNullable()) {
+            inLHSExprToRHSNullableExprs.put(c.exprNode, c.constNode);
+          }
+          if (inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+            
inLHSExprToRHSExprs.get(c.exprNode).retainAll(Collections.singleton(c.constNode));
+            if (!inLHSExprToRHSExprs.containsKey(c.exprNode)) {
+              // Note that Multimap does not keep a key if all its values are 
removed.
+              return createResultFromEmptySet(rexBuilder, c.exprNode, 
inLHSExprToRHSNullableExprs);
             }
+          } else {
+            inLHSExprToRHSExprs.put(c.exprNode, c.constNode);
           }
-          // Create IN clauses
-          newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs);
-          newOperands.addAll(operands);
-          // Return node
-          node = RexUtil.composeDisjunction(rexBuilder, newOperands, false);
-          break;
-        default:
-          return super.visitCall(call);
+          operands.remove(i);
+          --i;
+        }
       }
-      return node;
+      // Create IN clauses
+      final List<RexNode> newOperands = createInClauses(rexBuilder, 
inLHSExprToRHSExprs);
+      newOperands.addAll(operands);
+      // Return node
+      return RexUtil.composeConjunction(rexBuilder, newOperands, false);
+    }
+
+    private static RexNode handleOR(RexBuilder rexBuilder, RexCall call) {
+      // IN clauses need to be combined by keeping all elements
+      final List<RexNode> operands = new 
ArrayList<>(RexUtil.flattenOr(call.getOperands()));
+      final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = 
LinkedHashMultimap.create();
+      for (int i = 0; i < operands.size(); i++) {
+        RexNode operand = operands.get(i);
+        if (operand.getKind() == SqlKind.IN) {
+          RexCall inCall = (RexCall) operand;
+          if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+            continue;
+          }
+          RexNode ref = inCall.getOperands().get(0);
+          for (int j = 1; j < inCall.getOperands().size(); j++) {
+            inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j));
+          }
+          operands.remove(i);
+          --i;
+        }
+      }
+      // Create IN clauses
+      final List<RexNode> newOperands = createInClauses(rexBuilder, 
inLHSExprToRHSExprs);
+      newOperands.addAll(operands);
+      // Return node
+      return RexUtil.composeDisjunction(rexBuilder, newOperands, false);
+    }
+
+    private static RexNode createResultFromEmptySet(RexBuilder rexBuilder,
+        RexNode ref, Multimap<RexNode, RexNode> inLHSExprToRHSNullableExprs) {
+      if (inLHSExprToRHSNullableExprs.containsKey(ref)) {

Review comment:
       @kgyrtkirk I pushed a new commit and added a few more test cases.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 485341)
    Time Spent: 2h 20m  (was: 2h 10m)

> Missing simplification opportunity with IN and EQUALS clauses
> -------------------------------------------------------------
>
>                 Key: HIVE-24154
>                 URL: https://issues.apache.org/jira/browse/HIVE-24154
>             Project: Hive
>          Issue Type: Improvement
>          Components: CBO
>            Reporter: Jesus Camacho Rodriguez
>            Assignee: Jesus Camacho Rodriguez
>            Priority: Minor
>              Labels: pull-request-available
>          Time Spent: 2h 20m
>  Remaining Estimate: 0h
>
> For instance, in perf driver CBO query 74, there are several filters that 
> could be simplified further:
> {code}
> HiveFilter(condition=[AND(=($1, 1999), IN($1, 1998, 1999))])
> {code}
> This may lead to incorrect estimates and leads to unnecessary execution time.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to