This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 841be7f43e7 HIVE-27801: Exists subquery rewrite results in a wrong 
plan (Denys Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena)
841be7f43e7 is described below

commit 841be7f43e73387c1024c26c26566f336f1972d3
Author: Denys Kuzmenko <denisk...@gmail.com>
AuthorDate: Wed Dec 13 11:15:18 2023 +0200

    HIVE-27801: Exists subquery rewrite results in a wrong plan (Denys 
Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena)
    
    Closes #4922
---
 .../calcite/rules/HiveRelDecorrelator.java         | 16 ++----
 .../subquery_complex_correlation_predicates.q      | 15 +++++
 .../subquery_complex_correlation_predicates.q.out  | 66 ++++++++++++++++++++++
 3 files changed, 85 insertions(+), 12 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index fd54c0fd689..f10a19e41ad 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -3014,18 +3014,10 @@ public final class HiveRelDecorrelator implements 
ReflectiveVisitor {
   }
   /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */
   private static class CorelMapBuilder extends HiveRelShuttleImpl {
-    private final SortedMap<CorrelationId, RelNode> mapCorToCorRel =
-        new TreeMap<>();
-
-    private final SortedSetMultimap<RelNode, CorRef> mapRefRelToCorRef =
-        Multimaps.newSortedSetMultimap(
-            new HashMap<RelNode, Collection<CorRef>>(),
-            new Supplier<TreeSet<CorRef>>() {
-              @Override
-              public TreeSet<CorRef> get() {
-                return Sets.newTreeSet();
-              }
-            });
+    private final SortedMap<CorrelationId, RelNode> mapCorToCorRel = new 
TreeMap<>();
+
+    private final Multimap<RelNode, CorRef> mapRefRelToCorRef =
+        Multimaps.newListMultimap(new HashMap<>(), Lists::newArrayList);
 
     private final Map<RexFieldAccess, CorRef> mapFieldAccessToCorVar = new 
HashMap<>();
 
diff --git 
a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q 
b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
index ccfed8dcb23..1d6dbbee8c0 100644
--- 
a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
+++ 
b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
@@ -78,3 +78,18 @@ where not exists
           (select a_authorkey
            from author a
            where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400));
+    
+-- HIVE-27801: Exists subquery rewrite results in a wrong plan              
+drop table if exists store_sales;       
+create table store_sales (promo_sk int, sales_price int, list_price int);
+                   
+insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);
+
+explain cbo
+select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price);  
+            
+select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price);            
diff --git 
a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
 
b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
index fa9768cc5de..95055833efd 100644
--- 
a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
@@ -290,3 +290,69 @@ POSTHOOK: Input: default@author
 POSTHOOK: Input: default@book
 #### A masked pattern was here ####
 Men Without Women
+PREHOOK: query: drop table if exists store_sales
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists store_sales
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table store_sales (promo_sk int, sales_price int, 
list_price int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: create table store_sales (promo_sk int, sales_price int, 
list_price int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@store_sales
+PREHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 
10, 15)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 
10, 15)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store_sales.list_price SCRIPT []
+POSTHOOK: Lineage: store_sales.promo_sk SCRIPT []
+POSTHOOK: Lineage: store_sales.sales_price SCRIPT []
+PREHOOK: query: explain cbo
+select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[semi])
+  HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2])
+    HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+      HiveTableScan(table=[[default, store_sales]], table:alias=[a])
+  HiveProject(promo_sk0=[$3], sales_price0=[$4])
+    HiveJoin(condition=[AND(=($3, $0), >($4, $2), <($4, $1))], 
joinType=[inner], algorithm=[none], cost=[not available])
+      HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2])
+        HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT 
NULL($1))])
+          HiveTableScan(table=[[default, store_sales]], table:alias=[b])
+      HiveProject(promo_sk=[$0], sales_price=[$1])
+        HiveAggregate(group=[{0, 1}])
+          HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+            HiveTableScan(table=[[default, store_sales]], table:alias=[a])
+
+PREHOOK: query: select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select * from store_sales A where exists( 
+select 1 from store_sales B 
+    where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and 
A.sales_price < B.sales_price)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####

Reply via email to