This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 7583142cbff HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis) 7583142cbff is described below commit 7583142cbffcb3958a546a9aaa15700bbc243df9 Author: Alessandro Solimando <alessandro.solima...@gmail.com> AuthorDate: Mon Jan 24 13:08:56 2022 +0100 HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis) Closes #2966 --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../hive/ql/optimizer/calcite/HiveCalciteUtil.java | 52 ++++++++++++++ .../HiveJoinPushTransitivePredicatesRule.java | 82 +++++++++------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 7 +- .../cbo_join_transitive_pred_loop_1.q | 17 +++++ .../cbo_join_transitive_pred_loop_2.q | 24 +++++++ .../cbo_join_transitive_pred_loop_3.q | 23 ++++++ .../cbo_join_transitive_pred_loop_4.q | 23 ++++++ .../llap/cbo_join_transitive_pred_loop_1.q.out | 75 ++++++++++++++++++++ .../llap/cbo_join_transitive_pred_loop_2.q.out | 74 +++++++++++++++++++ .../llap/cbo_join_transitive_pred_loop_3.q.out | 67 ++++++++++++++++++ .../llap/cbo_join_transitive_pred_loop_4.q.out | 73 +++++++++++++++++++ 12 files changed, 470 insertions(+), 51 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 99964fc7732..caf223dd91b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2530,6 +2530,10 @@ public class HiveConf extends Configuration { "If this config is true only pushed down filters remain in the operator tree, \n" + "and the original filter is removed. If this config is false, the original filter \n" + "is also left in the operator tree at the original place."), + HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN("hive.optimize.join.disjunctive.transitive.predicates.pushdown", + true, "Whether to transitively infer disjunctive predicates across joins. \n" + + "Disjunctive predicates are hard to simplify and pushing them down might lead to infinite rule matching " + + "causing stackoverflow and OOM errors"), HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 160bfb86f6c..264756f0413 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -1214,6 +1214,58 @@ public class HiveCalciteUtil { } } + private static class DisjunctivePredicatesFinder extends RexVisitorImpl<Void> { + // accounting for DeMorgan's law + boolean inNegation = false; + boolean hasDisjunction = false; + + public DisjunctivePredicatesFinder() { + super(true); + } + + @Override + public Void visitCall(RexCall call) { + switch (call.getKind()) { + case OR: + if (inNegation) { + return super.visitCall(call); + } else { + this.hasDisjunction = true; + return null; + } + case AND: + if (inNegation) { + this.hasDisjunction = true; + return null; + } else { + return super.visitCall(call); + } + case NOT: + inNegation = !inNegation; + return super.visitCall(call); + default: + return super.visitCall(call); + } + } + } + + /** + * Returns whether the expression has disjunctions (OR) at any level of nesting. + * <ul> + * <li> Example 1: OR(=($0, $1), IS NOT NULL($2))):INTEGER (OR in the top-level expression) </li> + * <li> Example 2: NOT(AND(=($0, $1), IS NOT NULL($2)) </li> + * this is equivalent to OR((<>($0, $1), IS NULL($2)) + * <li> Example 3: AND(OR(=($0, $1), IS NOT NULL($2)))) (OR in inner expression) </li> + * </ul> + * @param node the expression where to look for disjunctions. + * @return true if the given expressions contains a disjunction, false otherwise. + */ + public static boolean hasDisjuction(RexNode node) { + DisjunctivePredicatesFinder finder = new DisjunctivePredicatesFinder(); + node.accept(finder); + return finder.hasDisjunction; + } + /** * Checks if any of the expression given as list expressions are from right side of the join. * This is used during anti join conversion. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 3a2bf82f2ff..da68a7474cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -20,14 +20,13 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -42,9 +41,6 @@ import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hive.common.util.AnnotationUtils; @@ -65,21 +61,11 @@ import com.google.common.collect.Sets; */ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { - public static final HiveJoinPushTransitivePredicatesRule INSTANCE_JOIN = - new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY); + private final boolean allowDisjunctivePredicates; - public static final HiveJoinPushTransitivePredicatesRule INSTANCE_SEMIJOIN = - new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY); - - public static final HiveJoinPushTransitivePredicatesRule INSTANCE_ANTIJOIN = - new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY); - - private final FilterFactory filterFactory; - - public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz, - FilterFactory filterFactory) { + public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz, boolean allowDisjunctivePredicates) { super(operand(clazz, any())); - this.filterFactory = filterFactory; + this.allowDisjunctivePredicates = allowDisjunctivePredicates; } @Override @@ -95,11 +81,11 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { RelNode rChild = join.getRight(); Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0)); - List<RexNode> leftPreds = getValidPreds(join.getCluster(), lChild, - leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType()); + List<RexNode> leftPreds = + getValidPreds(lChild, leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType()); Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1)); - List<RexNode> rightPreds = getValidPreds(join.getCluster(), rChild, - rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType()); + List<RexNode> rightPreds = + getValidPreds(rChild, rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType()); RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false); RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false); @@ -109,13 +95,15 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { if (!newLeftPredicate.isAlwaysTrue()) { RelNode curr = lChild; - lChild = filterFactory.createFilter(lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of()); + lChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter( + lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of()); call.getPlanner().onCopy(curr, lChild); } if (!newRightPredicate.isAlwaysTrue()) { RelNode curr = rChild; - rChild = filterFactory.createFilter(rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of()); + rChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter( + rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of()); call.getPlanner().onCopy(curr, rChild); } @@ -130,10 +118,10 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { call.transformTo(newRel); } - private ImmutableList<RexNode> getValidPreds(RelOptCluster cluster, RelNode child, - Set<String> predicatesToExclude, List<RexNode> rexs, RelDataType rType) { + private ImmutableList<RexNode> getValidPreds(RelNode child, Set<String> predicatesToExclude, + List<RexNode> rexs, RelDataType rType) { InputRefValidator validator = new InputRefValidator(rType.getFieldList()); - List<RexNode> valids = new ArrayList<RexNode>(rexs.size()); + List<RexNode> valids = new ArrayList<>(rexs.size()); for (RexNode rex : rexs) { try { rex.accept(validator); @@ -143,30 +131,27 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { } } - // We need to filter i) those that have been pushed already as stored in the join, - // and ii) those that were already in the subtree rooted at child - ImmutableList<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, - child, valids); - return toPush; - } - - private RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex, RelDataType rType) { - RexNode typeSafeRex = rex; - if ((typeSafeRex instanceof RexCall) && HiveCalciteUtil.isComparisonOp((RexCall) typeSafeRex)) { - RexBuilder rb = cluster.getRexBuilder(); - List<RexNode> fixedPredElems = new ArrayList<RexNode>(); - RelDataType commonType = cluster.getTypeFactory().leastRestrictive( - RexUtil.types(((RexCall) rex).getOperands())); - for (RexNode rn : ((RexCall) rex).getOperands()) { - fixedPredElems.add(rb.ensureType(commonType, rn, true)); - } - - typeSafeRex = rb.makeCall(((RexCall) typeSafeRex).getOperator(), fixedPredElems); + // We need to filter: + // i) those that have been pushed already as stored in the join, + // ii) those that were already in the subtree rooted at child. + List<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, child, valids); + + // Disjunctive predicates, when merged with other existing predicates, might become redundant but RexSimplify still + // cannot simplify them. This situation generally leads to OOM, since these new predicates keep getting inferred + // between the LHS and the RHS recursively, they grow by getting merged with existing predicates, but they can + // never be simplified by RexSimplify, in this way the fix-point is never reached. + // This restriction can be lifted if RexSimplify gets more powerful, and it can handle such cases. + if (!allowDisjunctivePredicates) { + toPush = toPush.stream() + .filter(e -> !HiveCalciteUtil.hasDisjuction(e)) + .collect(Collectors.toList()); } - return typeSafeRex; + return ImmutableList.copyOf(toPush); } + //~ Inner Classes ---------------------------------------------------------- + private static class InputRefValidator extends RexVisitorImpl<Void> { private final List<RelDataTypeField> types; @@ -178,7 +163,8 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { @Override public Void visitCall(RexCall call) { - if(AnnotationUtils.getAnnotation(GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) { + if(AnnotationUtils.getAnnotation( + GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) { if(call.getOperands().get(0) instanceof RexInputRef && !types.get(((RexInputRef)call.getOperands().get(0)).getIndex()).getType().isNullable()) { // No need to add not null filter for a constant. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 6ba9a9075f4..cb3fddb60b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1782,6 +1782,7 @@ public class CalcitePlanner extends SemanticAnalyzer { final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + final boolean allowDisjunctivePredicates = conf.getBoolVar(ConfVars.HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN); final HepProgramBuilder program = new HepProgramBuilder(); @@ -1879,9 +1880,9 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN); rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinAddNotNullRule.INSTANCE_ANTIJOIN); - rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); - rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); - rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_ANTIJOIN); + rules.add(new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, allowDisjunctivePredicates)); + rules.add(new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, allowDisjunctivePredicates)); + rules.add(new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, allowDisjunctivePredicates)); rules.add(HiveSortMergeRule.INSTANCE); rules.add(HiveSortPullUpConstantsRule.SORT_LIMIT_INSTANCE); rules.add(HiveSortPullUpConstantsRule.SORT_EXCHANGE_INSTANCE); diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q new file mode 100644 index 00000000000..d9986318f11 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q @@ -0,0 +1,17 @@ +set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false; + +CREATE TABLE test1 (act_nbr string); +CREATE TABLE test2 (month int); +CREATE TABLE test3 (mth int, con_usd double); + +EXPLAIN CBO +SELECT c.month, + d.con_usd +FROM + (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month + FROM test1 + UNION ALL + SELECT month + FROM test2 + WHERE month = 202110) c +JOIN test3 d ON c.month = d.mth; diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q new file mode 100644 index 00000000000..44abbbe69c8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q @@ -0,0 +1,24 @@ +set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false; + +CREATE EXTERNAL TABLE table2 ( + tenant_id int +) PARTITIONED BY (date_key int) +STORED AS PARQUET; + +CREATE EXTERNAL TABLE tenant_1 ( + tenant_id int, + tenant_key bigint +) STORED AS PARQUET; + +EXPLAIN CBO +SELECT * FROM ( + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id = 0 + UNION ALL + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id <> 0 +) a +JOIN tenant_1 dt + ON a.tenant_id = dt.tenant_id; diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q new file mode 100644 index 00000000000..96fd7fa1795 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q @@ -0,0 +1,23 @@ +set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false; + +CREATE TABLE tableA ( + bd_id bigint, + quota_type string +); + +EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + UNION ALL + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8; diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q new file mode 100644 index 00000000000..e7d31f46ad3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q @@ -0,0 +1,23 @@ +set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false; + +CREATE TABLE tableA ( + bd_id bigint, + quota_type string +); + +EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + INTERSECT + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8; diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out new file mode 100644 index 00000000000..17c1bcc9d41 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out @@ -0,0 +1,75 @@ +PREHOOK: query: CREATE TABLE test1 (act_nbr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1 (act_nbr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: CREATE TABLE test2 (month int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2 (month int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: CREATE TABLE test3 (mth int, con_usd double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test3 +POSTHOOK: query: CREATE TABLE test3 (mth int, con_usd double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test3 +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +PREHOOK: query: EXPLAIN CBO +SELECT c.month, + d.con_usd +FROM + (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month + FROM test1 + UNION ALL + SELECT month + FROM test2 + WHERE month = 202110) c +JOIN test3 d ON c.month = d.mth +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +PREHOOK: Input: default@test3 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT c.month, + d.con_usd +FROM + (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month + FROM test1 + UNION ALL + SELECT month + FROM test2 + WHERE month = 202110) c +JOIN test3 d ON c.month = d.mth +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +POSTHOOK: Input: default@test3 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(month=[$0], con_usd=[$2]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(month=[$0]) + HiveUnion(all=[true]) + HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER]) + HiveFilter(condition=[IS NOT NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)]) + HiveProject(DUMMY=[0]) + HiveTableScan(table=[[default, test1]], table:alias=[test1]) + HiveProject($f0=[CAST(202110):INTEGER]) + HiveFilter(condition=[=($0, 202110)]) + HiveTableScan(table=[[default, test2]], table:alias=[test2]) + HiveProject(mth=[$0], con_usd=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, test3]], table:alias=[d]) + diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out new file mode 100644 index 00000000000..270de1776ed --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: CREATE EXTERNAL TABLE table2 ( + tenant_id int +) PARTITIONED BY (date_key int) +STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: CREATE EXTERNAL TABLE table2 ( + tenant_id int +) PARTITIONED BY (date_key int) +STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: CREATE EXTERNAL TABLE tenant_1 ( + tenant_id int, + tenant_key bigint +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tenant_1 +POSTHOOK: query: CREATE EXTERNAL TABLE tenant_1 ( + tenant_id int, + tenant_key bigint +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tenant_1 +PREHOOK: query: EXPLAIN CBO +SELECT * FROM ( + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id = 0 + UNION ALL + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id <> 0 +) a +JOIN tenant_1 dt + ON a.tenant_id = dt.tenant_id +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +PREHOOK: Input: default@tenant_1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT * FROM ( + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id = 0 + UNION ALL + SELECT date_key, tenant_id + FROM table2 + WHERE tenant_id <> 0 +) a +JOIN tenant_1 dt + ON a.tenant_id = dt.tenant_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +POSTHOOK: Input: default@tenant_1 +#### A masked pattern was here #### +CBO PLAN: +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(date_key=[$0], tenant_id=[$1]) + HiveUnion(all=[true]) + HiveProject(date_key=[$1], tenant_id=[CAST(0):INTEGER]) + HiveFilter(condition=[=($0, 0)]) + HiveTableScan(table=[[default, table2]], table:alias=[table2]) + HiveProject(date_key=[$1], tenant_id=[$0]) + HiveFilter(condition=[<>($0, 0)]) + HiveTableScan(table=[[default, table2]], table:alias=[table2]) + HiveProject(tenant_id=[$0], tenant_key=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, tenant_1]], table:alias=[dt]) + diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out new file mode 100644 index 00000000000..365cb86b2d9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: CREATE TABLE tableA ( + bd_id bigint, + quota_type string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tableA +POSTHOOK: query: CREATE TABLE tableA ( + bd_id bigint, + quota_type string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tableA +PREHOOK: query: EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + UNION ALL + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@tablea +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + UNION ALL + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tablea +#### A masked pattern was here #### +CBO PLAN: +HiveProject(bd_id=[$0]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(bd_id=[$0]) + HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) + HiveProject($f0=[$0]) + HiveUnion(all=[true]) + HiveProject($f0=[CAST(9:BIGINT):BIGINT]) + HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) + HiveProject(bd_id=[$0]) + HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) + diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out new file mode 100644 index 00000000000..da16c285203 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out @@ -0,0 +1,73 @@ +PREHOOK: query: CREATE TABLE tableA ( + bd_id bigint, + quota_type string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tableA +POSTHOOK: query: CREATE TABLE tableA ( + bd_id bigint, + quota_type string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tableA +PREHOOK: query: EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + INTERSECT + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@tablea +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT a.bd_id +FROM ( + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) + ) a JOIN ( + SELECT t.bd_id + FROM tableA t + WHERE t.bd_id = 9 AND t.quota_type IN ('A','B') + INTERSECT + SELECT t.bd_id + FROM tableA t + WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B')) +) b ON a.bd_id = b.bd_id +WHERE a.bd_id = 8 OR a.bd_id <> 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tablea +#### A masked pattern was here #### +CBO PLAN: +HiveProject(bd_id=[$0]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(bd_id=[$0]) + HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) + HiveProject($f0=[$0]) + HiveFilter(condition=[=($1, 2)]) + HiveAggregate(group=[{0}], agg#0=[count($1)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[CAST(9:BIGINT):BIGINT]) + HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) + HiveProject(bd_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, tablea]], table:alias=[t]) +