[ https://issues.apache.org/jira/browse/HIVE-27801?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Denys Kuzmenko updated HIVE-27801: ---------------------------------- Description: reproduce (no rows should be returned): {code} set hive.explain.user=false; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.stats.autogather=false; set hive.exec.dynamic.partition.mode=nonstrict; drop table if exists store_sales; create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price int) stored as orc tblproperties('transactional'='true'); insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15); explain cbo select * from store_sales A where exists ( select 1 from store_sales B where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price); select * from store_sales A where exists( select 1 from store_sales B where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price); explain cbo select * from store_sales A LEFT SEMI JOIN store_sales B ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price; select * from store_sales A LEFT SEMI JOIN store_sales B ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price; {code} plan diff: [^Screenshot 2023-10-10 at 20.14.03.png] expectation: query should return 0 rows was: reproduce (no rows should be returned): {code} set hive.explain.user=false; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.stats.autogather=false; set hive.exec.dynamic.partition.mode=nonstrict; drop table if exists store_sales; create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price int) stored as orc tblproperties('transactional'='true'); insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15); explain cbo select * from store_sales A where exists ( select 1 from store_sales B where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price); select * from store_sales A where exists( select 1 from store_sales B where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price); explain cbo select * from store_sales A LEFT SEMI JOIN store_sales B ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price; select * from store_sales A LEFT SEMI JOIN store_sales B ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and A.ss_sales_price<B.ss_sales_price; {code} plan diff: [^Screenshot 2023-10-10 at 20.14.03.png] > Exists subquery rewrite into LEFT SEMI JOIN produce incorrect plan > ------------------------------------------------------------------ > > Key: HIVE-27801 > URL: https://issues.apache.org/jira/browse/HIVE-27801 > Project: Hive > Issue Type: Bug > Affects Versions: 4.0.0 > Reporter: Denys Kuzmenko > Priority: Critical > Labels: hive-4.0.0-must > Attachments: Screenshot 2023-10-10 at 20.14.03.png > > > reproduce (no rows should be returned): > {code} > set hive.explain.user=false; > set hive.support.concurrency=true; > set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; > set hive.stats.autogather=false; > set hive.exec.dynamic.partition.mode=nonstrict; > drop table if exists store_sales; > create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price > int) stored as orc tblproperties('transactional'='true'); > insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15); > explain cbo > select * from store_sales A where exists ( > select 1 from store_sales B > where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price > and A.ss_sales_price<B.ss_sales_price); > > select * from store_sales A where exists( > select 1 from store_sales B > where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price > and A.ss_sales_price<B.ss_sales_price); > > explain cbo > select * from store_sales A > LEFT SEMI JOIN store_sales B > ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and > A.ss_sales_price<B.ss_sales_price; > select * from store_sales A > LEFT SEMI JOIN store_sales B > ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and > A.ss_sales_price<B.ss_sales_price; > {code} > plan diff: > [^Screenshot 2023-10-10 at 20.14.03.png] > expectation: query should return 0 rows -- This message was sent by Atlassian Jira (v8.20.10#820010)