[ 
https://issues.apache.org/jira/browse/HIVE-27801?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Denys Kuzmenko updated HIVE-27801:
----------------------------------
    Description: 
reproduce:
{code}
set hive.explain.user=false;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.stats.autogather=false;
set hive.exec.dynamic.partition.mode=nonstrict;

drop table if exists store_sales;

create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price 
int) stored as orc tblproperties('transactional'='true');
insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);

explain cbo 
select * from store_sales A where exists ( 
select 1 from store_sales B 
    where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);
    
select * from store_sales A where exists( 
select 1 from store_sales B 
    where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);  
    
explain cbo    
select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;     

select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;    
{code}
[^]

  was:
reproduce:
{code}
set hive.explain.user=false;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.stats.autogather=false;
set hive.exec.dynamic.partition.mode=nonstrict;

drop table if exists store_sales;

create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price 
int) stored as orc tblproperties('transactional'='true');
insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);

explain cbo 
select * from store_sales A where exists ( 
select 1 from store_sales B 
    where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);
    
select * from store_sales A where exists( 
select 1 from store_sales B 
    where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);  
    
explain cbo    
select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;     

select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;    
{code}


> Exists subquery rewrite into LEFT SEMI JOIN produce incorrect plan
> ------------------------------------------------------------------
>
>                 Key: HIVE-27801
>                 URL: https://issues.apache.org/jira/browse/HIVE-27801
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Denys Kuzmenko
>            Priority: Major
>         Attachments: Screenshot 2023-10-10 at 20.14.03.png
>
>
> reproduce:
> {code}
> set hive.explain.user=false;
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> set hive.stats.autogather=false;
> set hive.exec.dynamic.partition.mode=nonstrict;
> drop table if exists store_sales;
> create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price 
> int) stored as orc tblproperties('transactional'='true');
> insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);
> explain cbo 
> select * from store_sales A where exists ( 
> select 1 from store_sales B 
>     where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price 
> and A.ss_sales_price<B.ss_sales_price);
>     
> select * from store_sales A where exists( 
> select 1 from store_sales B 
>     where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price 
> and A.ss_sales_price<B.ss_sales_price);  
>     
> explain cbo    
> select * from store_sales A 
> LEFT SEMI JOIN store_sales B 
> ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
> A.ss_sales_price<B.ss_sales_price;     
> select * from store_sales A 
> LEFT SEMI JOIN store_sales B 
> ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
> A.ss_sales_price<B.ss_sales_price;    
> {code}
> [^]



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to