[ https://issues.apache.org/jira/browse/HIVE-24154?focusedWorklogId=485132&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-485132 ]
ASF GitHub Bot logged work on HIVE-24154: ----------------------------------------- Author: ASF GitHub Bot Created on: 16/Sep/20 13:05 Start Date: 16/Sep/20 13:05 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on a change in pull request #1492: URL: https://github.com/apache/hive/pull/1492#discussion_r489412449 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ########## @@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends RexNode> columns) { } @Override public RexNode visitCall(RexCall call) { - final RexNode node; - final List<RexNode> operands; - final List<RexNode> newOperands; - final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); switch (call.getKind()) { case AND: - // IN clauses need to be combined by keeping only common elements - operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (operand.getKind() == SqlKind.IN) { - RexCall inCall = (RexCall) operand; - if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { - continue; - } - RexNode ref = inCall.getOperands().get(0); - if (inLHSExprToRHSExprs.containsKey(ref)) { - Set<RexNode> expressions = Sets.newHashSet(); - for (int j = 1; j < inCall.getOperands().size(); j++) { - expressions.add(inCall.getOperands().get(j)); - } - inLHSExprToRHSExprs.get(ref).retainAll(expressions); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - for (int j = 1; j < inCall.getOperands().size(); j++) { - inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); - } - } - operands.remove(i); - --i; - } else if (operand.getKind() == SqlKind.EQUALS) { - Constraint c = Constraint.of(operand); - if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) { - continue; + return handleAND(rexBuilder, call); + case OR: + return handleOR(rexBuilder, call); + default: + return super.visitCall(call); + } + } + + private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) { + // IN clauses need to be combined by keeping only common elements + final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + // We will use this set to keep those expressions that may evaluate + // into a null value. + final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs = LinkedHashMultimap.create(); + final List<RexNode> operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + RexNode ref = inCall.getOperands().get(0); + if (ref.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, ref); + } + if (inLHSExprToRHSExprs.containsKey(ref)) { + Set<RexNode> expressions = Sets.newHashSet(); + for (int j = 1; j < inCall.getOperands().size(); j++) { + RexNode constNode = inCall.getOperands().get(j); + expressions.add(constNode); + if (constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, constNode); } - RexNode ref = c.exprNode; - if (inLHSExprToRHSExprs.containsKey(ref)) { - inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode)); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - inLHSExprToRHSExprs.put(ref, c.constNode); + } + inLHSExprToRHSExprs.get(ref).retainAll(expressions); + if (!inLHSExprToRHSExprs.containsKey(ref)) { + // Note that Multimap does not keep a key if all its values are removed. + return createResultFromEmptySet(rexBuilder, ref, inLHSExprToRHSNullableExprs); + } + } else { + for (int j = 1; j < inCall.getOperands().size(); j++) { + RexNode constNode = inCall.getOperands().get(j); + inLHSExprToRHSExprs.put(ref, constNode); + if (constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, constNode); } - operands.remove(i); - --i; } } - // Create IN clauses - newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); - newOperands.addAll(operands); - // Return node - node = RexUtil.composeConjunction(rexBuilder, newOperands, false); - break; - case OR: - // IN clauses need to be combined by keeping all elements - operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands())); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (operand.getKind() == SqlKind.IN) { - RexCall inCall = (RexCall) operand; - if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { - continue; - } - RexNode ref = inCall.getOperands().get(0); - for (int j = 1; j < inCall.getOperands().size(); j++) { - inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); - } - operands.remove(i); - --i; + operands.remove(i); + --i; + } else if (operand.getKind() == SqlKind.EQUALS) { + Constraint c = Constraint.of(operand); + if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) { + continue; + } + if (c.exprNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(c.exprNode, c.exprNode); + } + if (c.constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(c.exprNode, c.constNode); + } + if (inLHSExprToRHSExprs.containsKey(c.exprNode)) { + inLHSExprToRHSExprs.get(c.exprNode).retainAll(Collections.singleton(c.constNode)); + if (!inLHSExprToRHSExprs.containsKey(c.exprNode)) { + // Note that Multimap does not keep a key if all its values are removed. + return createResultFromEmptySet(rexBuilder, c.exprNode, inLHSExprToRHSNullableExprs); } + } else { + inLHSExprToRHSExprs.put(c.exprNode, c.constNode); } - // Create IN clauses - newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); - newOperands.addAll(operands); - // Return node - node = RexUtil.composeDisjunction(rexBuilder, newOperands, false); - break; - default: - return super.visitCall(call); + operands.remove(i); + --i; + } } - return node; + // Create IN clauses + final List<RexNode> newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + return RexUtil.composeConjunction(rexBuilder, newOperands, false); + } + + private static RexNode handleOR(RexBuilder rexBuilder, RexCall call) { + // IN clauses need to be combined by keeping all elements + final List<RexNode> operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands())); + final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + RexNode ref = inCall.getOperands().get(0); + for (int j = 1; j < inCall.getOperands().size(); j++) { + inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); + } + operands.remove(i); + --i; + } + } + // Create IN clauses + final List<RexNode> newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + return RexUtil.composeDisjunction(rexBuilder, newOperands, false); + } + + private static RexNode createResultFromEmptySet(RexBuilder rexBuilder, + RexNode ref, Multimap<RexNode, RexNode> inLHSExprToRHSNullableExprs) { + if (inLHSExprToRHSNullableExprs.containsKey(ref)) { + // We handle possible null values in the expressions. + List<RexNode> nullableExprs = + inLHSExprToRHSNullableExprs.get(ref) + .stream() + .map(n -> rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ImmutableList.of(n))) + .collect(Collectors.toList()); + return rexBuilder.makeCall(SqlStdOperatorTable.CASE, Review comment: I believe this would not need to be a `CASE` it could be an `AND` as well ``` (x IS NULL OR y IS NULL) AND null ``` but I know - it won't make much difference - the `CASE` might be more readbale :) ########## File path: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ########## @@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends RexNode> columns) { } @Override public RexNode visitCall(RexCall call) { - final RexNode node; - final List<RexNode> operands; - final List<RexNode> newOperands; - final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); switch (call.getKind()) { case AND: - // IN clauses need to be combined by keeping only common elements - operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (operand.getKind() == SqlKind.IN) { - RexCall inCall = (RexCall) operand; - if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { - continue; - } - RexNode ref = inCall.getOperands().get(0); - if (inLHSExprToRHSExprs.containsKey(ref)) { - Set<RexNode> expressions = Sets.newHashSet(); - for (int j = 1; j < inCall.getOperands().size(); j++) { - expressions.add(inCall.getOperands().get(j)); - } - inLHSExprToRHSExprs.get(ref).retainAll(expressions); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - for (int j = 1; j < inCall.getOperands().size(); j++) { - inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); - } - } - operands.remove(i); - --i; - } else if (operand.getKind() == SqlKind.EQUALS) { - Constraint c = Constraint.of(operand); - if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) { - continue; + return handleAND(rexBuilder, call); + case OR: + return handleOR(rexBuilder, call); + default: + return super.visitCall(call); + } + } + + private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) { + // IN clauses need to be combined by keeping only common elements + final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + // We will use this set to keep those expressions that may evaluate + // into a null value. + final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs = LinkedHashMultimap.create(); + final List<RexNode> operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + RexNode ref = inCall.getOperands().get(0); + if (ref.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, ref); + } + if (inLHSExprToRHSExprs.containsKey(ref)) { + Set<RexNode> expressions = Sets.newHashSet(); + for (int j = 1; j < inCall.getOperands().size(); j++) { + RexNode constNode = inCall.getOperands().get(j); + expressions.add(constNode); + if (constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, constNode); } - RexNode ref = c.exprNode; - if (inLHSExprToRHSExprs.containsKey(ref)) { - inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode)); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - inLHSExprToRHSExprs.put(ref, c.constNode); + } + inLHSExprToRHSExprs.get(ref).retainAll(expressions); + if (!inLHSExprToRHSExprs.containsKey(ref)) { + // Note that Multimap does not keep a key if all its values are removed. + return createResultFromEmptySet(rexBuilder, ref, inLHSExprToRHSNullableExprs); Review comment: at this point - aren't we in the middle of processing all the operands of the AND? ``` x IN (1,2) AND x IN (3,4) AND y IN (1,2) ``` I suspect for the above expression we don't know anything about `y` (yet) - isn't that a problem? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ########## @@ -678,100 +679,135 @@ private RexNode useStructIfNeeded(List<? extends RexNode> columns) { } @Override public RexNode visitCall(RexCall call) { - final RexNode node; - final List<RexNode> operands; - final List<RexNode> newOperands; - final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); switch (call.getKind()) { case AND: - // IN clauses need to be combined by keeping only common elements - operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (operand.getKind() == SqlKind.IN) { - RexCall inCall = (RexCall) operand; - if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { - continue; - } - RexNode ref = inCall.getOperands().get(0); - if (inLHSExprToRHSExprs.containsKey(ref)) { - Set<RexNode> expressions = Sets.newHashSet(); - for (int j = 1; j < inCall.getOperands().size(); j++) { - expressions.add(inCall.getOperands().get(j)); - } - inLHSExprToRHSExprs.get(ref).retainAll(expressions); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - for (int j = 1; j < inCall.getOperands().size(); j++) { - inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); - } - } - operands.remove(i); - --i; - } else if (operand.getKind() == SqlKind.EQUALS) { - Constraint c = Constraint.of(operand); - if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) { - continue; + return handleAND(rexBuilder, call); + case OR: + return handleOR(rexBuilder, call); + default: + return super.visitCall(call); + } + } + + private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) { + // IN clauses need to be combined by keeping only common elements + final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + // We will use this set to keep those expressions that may evaluate + // into a null value. + final Multimap<RexNode,RexNode> inLHSExprToRHSNullableExprs = LinkedHashMultimap.create(); + final List<RexNode> operands = new ArrayList<>(RexUtil.flattenAnd(call.getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + RexNode ref = inCall.getOperands().get(0); + if (ref.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, ref); + } + if (inLHSExprToRHSExprs.containsKey(ref)) { + Set<RexNode> expressions = Sets.newHashSet(); + for (int j = 1; j < inCall.getOperands().size(); j++) { + RexNode constNode = inCall.getOperands().get(j); + expressions.add(constNode); + if (constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, constNode); } - RexNode ref = c.exprNode; - if (inLHSExprToRHSExprs.containsKey(ref)) { - inLHSExprToRHSExprs.get(ref).retainAll(Collections.singleton(c.constNode)); - if (!inLHSExprToRHSExprs.containsKey(ref)) { - // Note that Multimap does not keep a key if all its values are removed. - // Hence, since there are no common expressions and it is within an AND, - // we should return false - return rexBuilder.makeLiteral(false); - } - } else { - inLHSExprToRHSExprs.put(ref, c.constNode); + } + inLHSExprToRHSExprs.get(ref).retainAll(expressions); + if (!inLHSExprToRHSExprs.containsKey(ref)) { + // Note that Multimap does not keep a key if all its values are removed. + return createResultFromEmptySet(rexBuilder, ref, inLHSExprToRHSNullableExprs); + } + } else { + for (int j = 1; j < inCall.getOperands().size(); j++) { + RexNode constNode = inCall.getOperands().get(j); + inLHSExprToRHSExprs.put(ref, constNode); + if (constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(ref, constNode); } - operands.remove(i); - --i; } } - // Create IN clauses - newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); - newOperands.addAll(operands); - // Return node - node = RexUtil.composeConjunction(rexBuilder, newOperands, false); - break; - case OR: - // IN clauses need to be combined by keeping all elements - operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands())); - for (int i = 0; i < operands.size(); i++) { - RexNode operand = operands.get(i); - if (operand.getKind() == SqlKind.IN) { - RexCall inCall = (RexCall) operand; - if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { - continue; - } - RexNode ref = inCall.getOperands().get(0); - for (int j = 1; j < inCall.getOperands().size(); j++) { - inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); - } - operands.remove(i); - --i; + operands.remove(i); + --i; + } else if (operand.getKind() == SqlKind.EQUALS) { + Constraint c = Constraint.of(operand); + if (c == null || !HiveCalciteUtil.isDeterministic(c.exprNode)) { + continue; + } + if (c.exprNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(c.exprNode, c.exprNode); + } + if (c.constNode.getType().isNullable()) { + inLHSExprToRHSNullableExprs.put(c.exprNode, c.constNode); + } + if (inLHSExprToRHSExprs.containsKey(c.exprNode)) { + inLHSExprToRHSExprs.get(c.exprNode).retainAll(Collections.singleton(c.constNode)); + if (!inLHSExprToRHSExprs.containsKey(c.exprNode)) { + // Note that Multimap does not keep a key if all its values are removed. + return createResultFromEmptySet(rexBuilder, c.exprNode, inLHSExprToRHSNullableExprs); } + } else { + inLHSExprToRHSExprs.put(c.exprNode, c.constNode); } - // Create IN clauses - newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); - newOperands.addAll(operands); - // Return node - node = RexUtil.composeDisjunction(rexBuilder, newOperands, false); - break; - default: - return super.visitCall(call); + operands.remove(i); + --i; + } } - return node; + // Create IN clauses + final List<RexNode> newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + return RexUtil.composeConjunction(rexBuilder, newOperands, false); + } + + private static RexNode handleOR(RexBuilder rexBuilder, RexCall call) { + // IN clauses need to be combined by keeping all elements + final List<RexNode> operands = new ArrayList<>(RexUtil.flattenOr(call.getOperands())); + final Multimap<RexNode,RexNode> inLHSExprToRHSExprs = LinkedHashMultimap.create(); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + RexNode ref = inCall.getOperands().get(0); + for (int j = 1; j < inCall.getOperands().size(); j++) { + inLHSExprToRHSExprs.put(ref, inCall.getOperands().get(j)); + } + operands.remove(i); + --i; + } + } + // Create IN clauses + final List<RexNode> newOperands = createInClauses(rexBuilder, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + return RexUtil.composeDisjunction(rexBuilder, newOperands, false); + } + + private static RexNode createResultFromEmptySet(RexBuilder rexBuilder, + RexNode ref, Multimap<RexNode, RexNode> inLHSExprToRHSNullableExprs) { + if (inLHSExprToRHSNullableExprs.containsKey(ref)) { Review comment: right now I don't fully agree with this logic - if we have an expression like: ``` x IN (1,2) AND x in (3,4) and y IN (5,6) ``` it's clear that we may only return `false` or `null`; but I think we may still need to choose `false` based on the other expressions; for the above: * when `x is null, y=5` then the result will be `null` * but when `x is null, y=1` then the result will be `false` in light of this reasoning it seem to me that this starts looking more and more sophisticated.... I think it might worth a try to see if these "early-return-type" simplifications kick-in a lot or not - and as an alternative approach consider removing them instead of saving them. I think at some point we should run the simplification earlier when the `OR`s are still open - so that it could analyze-it properly ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 485132) Time Spent: 1h 40m (was: 1.5h) > Missing simplification opportunity with IN and EQUALS clauses > ------------------------------------------------------------- > > Key: HIVE-24154 > URL: https://issues.apache.org/jira/browse/HIVE-24154 > Project: Hive > Issue Type: Improvement > Components: CBO > Reporter: Jesus Camacho Rodriguez > Assignee: Jesus Camacho Rodriguez > Priority: Minor > Labels: pull-request-available > Time Spent: 1h 40m > Remaining Estimate: 0h > > For instance, in perf driver CBO query 74, there are several filters that > could be simplified further: > {code} > HiveFilter(condition=[AND(=($1, 1999), IN($1, 1998, 1999))]) > {code} > This may lead to incorrect estimates and leads to unnecessary execution time. -- This message was sent by Atlassian Jira (v8.3.4#803005)