kasakrisz commented on code in PR #5196: URL: https://github.com/apache/hive/pull/5196#discussion_r1635892818
########## iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_mixed.q.out: ########## @@ -84,17 +84,17 @@ Stage-4 <-Reducer 2 [CONTAINS] File Output Operator [FS_46] table:{"name:":"default.ice01"} - Select Operator [SEL_44] (rows=3 width=295) + Select Operator [SEL_44] (rows=3 width=266) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_43] (rows=3 width=295) + Merge Join Operator [MERGEJOIN_43] (rows=3 width=266) Conds:RS_57._col4=RS_63._col0(Left Semi),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_57] PartitionCols:_col4 - Select Operator [SEL_55] (rows=5 width=295) + Select Operator [SEL_55] (rows=6 width=280) Review Comment: What cause the change in the number of rows and width ? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java: ########## @@ -72,6 +78,22 @@ public HiveFilter(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexN super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition); this.correlationInfos = new CorrelationInfoSupplier(getCondition()); } + + @Override + public RelWriter explainTerms(RelWriter pw) { + // Remove this method after upgrading Calcite to 1.35+ Review Comment: This comment can be easily forgotten. How about adding a check here? Similar to https://github.com/apache/hive/blob/33cadc5b498b57f779cddc9bf4e3f8aef2d9f6dc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java#L104-L107 ``` if (Bug.CALCITE_VERSION > 1.35) { throw ... } ``` ########## packaging/pom.xml: ########## @@ -173,6 +173,10 @@ \Qhttps://www.eclipse.org/org/documents/epl-v10.php\E \Qhttp://www.eclipse.org/org/documents/epl-v10.php\E </EPL-1.0> + <EPL-2.0> + \Qhttps://raw.githubusercontent.com/locationtech/jts/master/LICENSE_EPLv2.txt\E + \Qhttps://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt\E + </EPL-2.0> Review Comment: why is this change necessary? ########## ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java: ########## @@ -157,7 +157,7 @@ public void testCombine() throws Exception { drv.run(loadFileCommand); - String cmd = "select key*1 from " + tblName; + String cmd = "select key*2 from " + tblName; Review Comment: What is the reason of this change? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java: ########## @@ -171,7 +176,36 @@ public Double visitCall(RexCall call) { break; } - default: + case SEARCH: + RexLiteral literal = (RexLiteral) call.operands.get(1); + Sarg<?> sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class), "Sarg"); + RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl(new HiveTypeSystemImpl())); Review Comment: How about `childRel.getCluster().getRexBuilder()` ? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java: ########## @@ -322,18 +324,24 @@ public RexNode getExpression(String functionText, FunctionInfo fi, // unix_timestamp(args) -> to_unix_timestamp(args) calciteOp = HiveToUnixTimestampSqlOperator.INSTANCE; } - expr = rexBuilder.makeCall(returnType, calciteOp, inputs); + expr = getExpression(returnType, calciteOp, inputs); } if (expr instanceof RexCall && !expr.isA(SqlKind.CAST)) { RexCall call = (RexCall) expr; expr = rexBuilder.makeCall(returnType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } - return expr; } + private RexNode getExpression(RelDataType returnType, SqlOperator calciteOp, List<RexNode> inputs) { + if (Objects.requireNonNull(calciteOp.getKind()) == SqlKind.IN) { Review Comment: Please add a message about what can not be null. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java: ########## @@ -656,7 +658,9 @@ public static ImmutableList<RexNode> getPredsNotPushedAlready(Set<String> predic } // Build map to not convert multiple times, further remove already included predicates Map<String,RexNode> stringToRexNode = Maps.newLinkedHashMap(); + RexSimplify simplify = new RexSimplify(inp.getCluster().getRexBuilder(), RelOptPredicateList.EMPTY, RexUtil.EXECUTOR); for (RexNode r : predsToPushDown) { + r = simplify.simplify(r); Review Comment: Why `RexSimplify` is needed here? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/HiveJdbcImplementor.java: ########## @@ -128,6 +129,11 @@ public HiveJdbcImplementor(SqlDialect dialect, JavaTypeFactory typeFactory) { return result(join, leftResult, rightResult); } +public Result visit(JdbcTableScan scan) { + return result(scan.jdbcTable.tableName(), + ImmutableList.of(Clause.FROM), scan, null); +} + Review Comment: Why does this method needed? Is it override something ? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java: ########## @@ -198,6 +203,16 @@ public Double visitCall(RexCall call) { return null; } + if (call.getKind() == SqlKind.SEARCH) { + RexCall expandedCall = (RexCall) RexUtil.expandSearch( + new RexBuilder(new JavaTypeFactoryImpl(new HiveTypeSystemImpl())), Review Comment: Can `RexBuilder` instance be passed through `RexSortPredicatesShuttle` from `rewriteFilter` ? ``` final RexSortPredicatesShuttle sortPredicatesShuttle = new RexSortPredicatesShuttle( input, filter.getCluster().getMetadataQuery(), filter.getCluster().getRexBuilder()); ``` ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java: ########## @@ -138,6 +141,17 @@ public boolean isSynthetic() { @Override public RelWriter explainTerms(RelWriter pw) { + // Remove this if block after upgrading Calcite to 1.35+ Review Comment: How about adding a check here and throw exception instead of the comment? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java: ########## @@ -179,7 +178,7 @@ protected HivePointLookupOptimizerRule( this.minNumORClauses = minNumORClauses; } - public RexNode analyzeRexNode(RexBuilder rexBuilder, RexNode condition) { + public static RexNode analyzeRexNode(RexBuilder rexBuilder, RexNode condition, int minNumORClauses) { Review Comment: Why does this became static? ########## ql/src/test/queries/clientpositive/cardinality_preserving_join_opt2.q: ########## @@ -2,6 +2,7 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.stats.fetch.column.stats=true; +set hive.cbo.rule.exclusion.regex=HivePreFilteringRule; Review Comment: Why does this rule excluded? Should it be excluded in production too? ########## ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java: ########## @@ -508,8 +492,19 @@ public void testComputeRangePredicateSelectivityBetweenWithNULLS() { public void testComputeRangePredicateSelectivityNotBetweenWithNULLS() { doReturn((double) 20).when(tableMock).getRowCount(); doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0)); - RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.BETWEEN, boolTrue, inputRef0, int1, int3); + RexNode filter = makeNotBetween(inputRef0, int1, int3); FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq); Assert.assertEquals(0.55, estimator.estimateSelectivity(filter), DELTA); } + + private RexNode makeNotBetween(RexNode inputRef, RexNode left, RexNode right) { + RexNode withOr = REX_BUILDER.makeCall( + SqlStdOperatorTable.OR, + REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef, left), + REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef, right) + ); + RexSimplify simplify = new RexSimplify(REX_BUILDER, RelOptPredicateList.EMPTY, RexUtil.EXECUTOR); + + return simplify.simplify(withOr); Review Comment: how about ``` return REX_BUILDER.makeCall(SqlStdOperatorTable.NOT, REX_BUILDER.makeBetween(inputRef0, int1, int3)); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org