Dongkyu Hwangbo created TAJO-1774:
-------------------------------------
Summary: Cannot run INTERSECT/EXCEPT clause with inter-subquery
that has same column name
Key: TAJO-1774
URL: https://issues.apache.org/jira/browse/TAJO-1774
Project: Tajo
Issue Type: Improvement
Affects Versions: 0.11.0
Reporter: Dongkyu Hwangbo
Priority: Minor
This is TPC-DS q38.
{code:SQL}
select count(*) from (
select distinct c_last_name, c_first_name, d_date
from store_sales, date_dim, customer
where store_sales.ss_sold_date_sk = date_dim.d_date_sk
and store_sales.ss_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11
intersect
select distinct c_last_name, c_first_name, d_date
from catalog_sales, date_dim, customer
where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11
intersect
select distinct c_last_name, c_first_name, d_date
from web_sales, date_dim, customer
where web_sales.ws_sold_date_sk = date_dim.d_date_sk
and web_sales.ws_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11
) hot_cust
limit 100;
{code}
when i enter this query, tsql gives me this error message.
{noformat}
ERROR: column name 'tpcds.customer.c_last_name (TEXT),
tpcds.customer.c_last_name (TEXT)' is ambiguous
{noformat}
so, to run this query, I rewrite query like below.
{code:SQL}
drop table temp1;
drop table temp2;
drop table temp3;
create table temp1(c_last_name text, c_first_name text, d_date date);
create table temp2(c_last_name text, c_first_name text, d_date date);
create table temp3(c_last_name text, c_first_name text, d_date date);
insert overwrite into temp1
select distinct c_last_name, c_first_name, d_date
from store_sales, date_dim, customer
where store_sales.ss_sold_date_sk = date_dim.d_date_sk
and store_sales.ss_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11;
insert overwrite into temp2
select distinct c_last_name, c_first_name, d_date
from catalog_sales, date_dim, customer
where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11;
insert overwrite into temp3
select distinct c_last_name, c_first_name, d_date
from web_sales, date_dim, customer
where web_sales.ws_sold_date_sk = date_dim.d_date_sk
and web_sales.ws_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200 + 11;
select count(*)
from temp1, temp2, temp3
where temp1.c_last_name = temp2.c_last_name
and temp1.c_first_name = temp2.c_last_name
and temp1.d_date = temp2.d_date
and temp2.c_last_name = temp3.c_last_name
and temp2.c_first_name = temp3.c_last_name
and temp2.d_date = temp3.d_date
and temp3.c_last_name = temp1.c_last_name
and temp3.c_first_name = temp1.c_last_name
and temp3.d_date = temp1.d_date;
{code}
then tajo can run with this query.
I think tajo cannot recognize multiple subquery with same column name.
same problem is occured with TPC-DS q87. Here is query.
{code:SQL}
select count(*)
from ((select distinct c_last_name, c_first_name, d_date
from store_sales, date_dim, customer
where store_sales.ss_sold_date_sk = date_dim.d_date_sk
and store_sales.ss_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200+11)
except
(select distinct c_last_name, c_first_name, d_date
from catalog_sales, date_dim, customer
where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200+11)
except
(select distinct c_last_name, c_first_name, d_date
from web_sales, date_dim, customer
where web_sales.ws_sold_date_sk = date_dim.d_date_sk
and web_sales.ws_bill_customer_sk = customer.c_customer_sk
and d_month_seq between 1200 and 1200+11)
) cool_cust
;
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)