This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 841be7f43e7 HIVE-27801: Exists subquery rewrite results in a wrong plan (Denys Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena) 841be7f43e7 is described below commit 841be7f43e73387c1024c26c26566f336f1972d3 Author: Denys Kuzmenko <denisk...@gmail.com> AuthorDate: Wed Dec 13 11:15:18 2023 +0200 HIVE-27801: Exists subquery rewrite results in a wrong plan (Denys Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena) Closes #4922 --- .../calcite/rules/HiveRelDecorrelator.java | 16 ++---- .../subquery_complex_correlation_predicates.q | 15 +++++ .../subquery_complex_correlation_predicates.q.out | 66 ++++++++++++++++++++++ 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index fd54c0fd689..f10a19e41ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -3014,18 +3014,10 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor { } /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */ private static class CorelMapBuilder extends HiveRelShuttleImpl { - private final SortedMap<CorrelationId, RelNode> mapCorToCorRel = - new TreeMap<>(); - - private final SortedSetMultimap<RelNode, CorRef> mapRefRelToCorRef = - Multimaps.newSortedSetMultimap( - new HashMap<RelNode, Collection<CorRef>>(), - new Supplier<TreeSet<CorRef>>() { - @Override - public TreeSet<CorRef> get() { - return Sets.newTreeSet(); - } - }); + private final SortedMap<CorrelationId, RelNode> mapCorToCorRel = new TreeMap<>(); + + private final Multimap<RelNode, CorRef> mapRefRelToCorRef = + Multimaps.newListMultimap(new HashMap<>(), Lists::newArrayList); private final Map<RexFieldAccess, CorRef> mapFieldAccessToCorVar = new HashMap<>(); diff --git a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q index ccfed8dcb23..1d6dbbee8c0 100644 --- a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q +++ b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q @@ -78,3 +78,18 @@ where not exists (select a_authorkey from author a where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400)); + +-- HIVE-27801: Exists subquery rewrite results in a wrong plan +drop table if exists store_sales; +create table store_sales (promo_sk int, sales_price int, list_price int); + +insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15); + +explain cbo +select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price); + +select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price); diff --git a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out index fa9768cc5de..95055833efd 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out @@ -290,3 +290,69 @@ POSTHOOK: Input: default@author POSTHOOK: Input: default@book #### A masked pattern was here #### Men Without Women +PREHOOK: query: drop table if exists store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create table store_sales (promo_sk int, sales_price int, list_price int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales (promo_sk int, sales_price int, list_price int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store_sales.list_price SCRIPT [] +POSTHOOK: Lineage: store_sales.promo_sk SCRIPT [] +POSTHOOK: Lineage: store_sales.sales_price SCRIPT [] +PREHOOK: query: explain cbo +select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price) +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSemiJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[semi]) + HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[a]) + HiveProject(promo_sk0=[$3], sales_price0=[$4]) + HiveJoin(condition=[AND(=($3, $0), >($4, $2), <($4, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[b]) + HiveProject(promo_sk=[$0], sales_price=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[a]) + +PREHOOK: query: select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price) +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select * from store_sales A where exists( +select 1 from store_sales B + where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and A.sales_price < B.sales_price) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +#### A masked pattern was here ####