[ https://issues.apache.org/jira/browse/HIVE-27264?focusedWorklogId=857674&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-857674 ]
ASF GitHub Bot logged work on HIVE-27264: ----------------------------------------- Author: ASF GitHub Bot Created on: 18/Apr/23 12:37 Start Date: 18/Apr/23 12:37 Worklog Time Spent: 10m Work Description: zabetak commented on code in PR #4237: URL: https://github.com/apache/hive/pull/4237#discussion_r1169885465 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java: ########## @@ -669,6 +670,22 @@ private static RexNode handleAND(RexBuilder rexBuilder, RexCall call) { return RexUtil.composeConjunction(rexBuilder, newOperands, false); } + private static void retainAll(Collection<RexNode> elementsToRetain, Collection<RexNode> collection) { + collection.removeIf(rexNode -> elementsToRetain.stream().noneMatch( + rexNodeToRetain -> equalsWithSimilarType(rexNode, rexNodeToRetain))); + } + + private static boolean equalsWithSimilarType(RexNode rexNode1, RexNode rexNode2) { + if (!(rexNode1 instanceof RexLiteral) || !(rexNode2 instanceof RexLiteral)) { + return rexNode1.equals(rexNode2); + } + + RexLiteral rexLiteral1 = (RexLiteral) rexNode1; + RexLiteral rexLiteral2 = (RexLiteral) rexNode2; + return rexLiteral1.getValue().compareTo(rexLiteral2.getValue()) == 0 && Review Comment: Can we arrive here with the NULL literal? Will getValue return something or fail with NPE? ########## ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHivePointLookupOptimizerRule.java: ########## @@ -348,4 +356,100 @@ public void testRecursionIsNotObstructed() { condition.toString()); } + @Test + public void testSameVarcharLiteralDifferentPrecision() { + + final RexBuilder rexBuilder = relBuilder.getRexBuilder(); + RelDataType stringType30 = rexBuilder.getTypeFactory().createTypeWithCharsetAndCollation( + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, 30), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); + RexNode lita30 = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString("AAA111"), stringType30, true); + RexNode litb30 = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString("BBB222"), stringType30, true); + + RelDataType stringType14 = rexBuilder.getTypeFactory().createTypeWithCharsetAndCollation( + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, 14), + Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT); + RexNode lita14 = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString("AAA111"), stringType14, true); + RexNode litb14 = rexBuilder.makeLiteral(RexNodeExprFactory.makeHiveUnicodeString("BBB222"), stringType14, true); + + final RelNode basePlan = relBuilder + .scan("t") + .filter(and(relBuilder, + relBuilder.call(SqlStdOperatorTable.IN, relBuilder.field("f2"), lita30, litb30), + relBuilder.call(SqlStdOperatorTable.IN, relBuilder.field("f2"), lita14, litb14))) + .build(); + + planner.setRoot(basePlan); + RelNode optimizedRelNode = planner.findBestExp(); + + HiveFilter filter = (HiveFilter) optimizedRelNode; + RexNode condition = filter.getCondition(); + System.out.println(condition); + assertEquals("IN($1, " + + "_UTF-16LE'AAA111':VARCHAR(30) CHARACTER SET \"UTF-16LE\", " + + "_UTF-16LE'BBB222':VARCHAR(30) CHARACTER SET \"UTF-16LE\")", Review Comment: Did you check if the results are inline with `RexSimplify` and `ReduceExpressionsRules`? ########## ql/src/test/queries/clientpositive/pointlookup6.q: ########## @@ -0,0 +1,19 @@ +set hive.optimize.point.lookup.min=2; + +create table r_table ( + string_col varchar(30) +); + +create table l_table ( + string_col varchar(14) +); + +insert into r_table VALUES ('AAA111'); +insert into l_table VALUES ('AAA111'); + +explain cbo +SELECT l_table.string_col from l_table, r_table +WHERE r_table.string_col = l_table.string_col AND l_table.string_col IN ('AAA111', 'BBB222') AND r_table.string_col IN ('AAA111', 'BBB222'); Review Comment: If instead of `IN` we have an equivalent query with `OR` do we still hit the problem? Do we get the expected plan when the condition is expressed with `OR`? Issue Time Tracking ------------------- Worklog Id: (was: 857674) Time Spent: 1h 50m (was: 1h 40m) > Literals in conjunction of two in expression are considered not equals if > type precision is different > ----------------------------------------------------------------------------------------------------- > > Key: HIVE-27264 > URL: https://issues.apache.org/jira/browse/HIVE-27264 > Project: Hive > Issue Type: Bug > Components: CBO > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 1h 50m > Remaining Estimate: 0h > > {code} > create table r_table ( > string_col varchar(30) > ); > create table l_table ( > string_col varchar(14) > ); > insert into r_table VALUES ('AAA111'); > insert into l_table VALUES ('AAA111'); > SELECT l_table.string_col from l_table, r_table > WHERE r_table.string_col = l_table.string_col AND l_table.string_col IN > ('AAA111', 'BBB222') AND r_table.string_col IN ('AAA111', 'BBB222'); > {code} > Should give one row > {code} > AAA111 > {code} > but it returns empty rs > Workaround > {code} > set hive.optimize.point.lookup=false; > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)