Dongkyu Hwangbo created TAJO-1774:
-------------------------------------

             Summary: Cannot run INTERSECT/EXCEPT clause with inter-subquery 
that has same column name
                 Key: TAJO-1774
                 URL: https://issues.apache.org/jira/browse/TAJO-1774
             Project: Tajo
          Issue Type: Improvement
    Affects Versions: 0.11.0
            Reporter: Dongkyu Hwangbo
            Priority: Minor


This is TPC-DS q38.
{code:SQL}
select count(*) from (
    select distinct c_last_name, c_first_name, d_date
    from store_sales, date_dim, customer
          where store_sales.ss_sold_date_sk = date_dim.d_date_sk
      and store_sales.ss_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11
  intersect
    select distinct c_last_name, c_first_name, d_date
    from catalog_sales, date_dim, customer
          where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
      and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11
  intersect
    select distinct c_last_name, c_first_name, d_date
    from web_sales, date_dim, customer
          where web_sales.ws_sold_date_sk = date_dim.d_date_sk
      and web_sales.ws_bill_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11
) hot_cust
limit 100;
{code}
when i enter this query, tsql gives me this error message.
{noformat}
ERROR: column name 'tpcds.customer.c_last_name (TEXT), 
tpcds.customer.c_last_name (TEXT)' is ambiguous
{noformat}
so, to run this query, I rewrite query like below.
{code:SQL}
drop table temp1;
drop table temp2;
drop table temp3;

create table temp1(c_last_name text, c_first_name text, d_date date);
create table temp2(c_last_name text, c_first_name text, d_date date);
create table temp3(c_last_name text, c_first_name text, d_date date);

insert overwrite into temp1
select distinct c_last_name, c_first_name, d_date
    from store_sales, date_dim, customer
          where store_sales.ss_sold_date_sk = date_dim.d_date_sk
      and store_sales.ss_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11;

insert overwrite into temp2
select distinct c_last_name, c_first_name, d_date
    from catalog_sales, date_dim, customer
          where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
      and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11;

insert overwrite into temp3
    select distinct c_last_name, c_first_name, d_date
    from web_sales, date_dim, customer
          where web_sales.ws_sold_date_sk = date_dim.d_date_sk
      and web_sales.ws_bill_customer_sk = customer.c_customer_sk
      and d_month_seq between 1200 and 1200 + 11;

select count(*)
from temp1, temp2, temp3
where temp1.c_last_name = temp2.c_last_name
and temp1.c_first_name = temp2.c_last_name
and temp1.d_date = temp2.d_date
and temp2.c_last_name = temp3.c_last_name
and temp2.c_first_name = temp3.c_last_name
and temp2.d_date = temp3.d_date
and temp3.c_last_name = temp1.c_last_name
and temp3.c_first_name = temp1.c_last_name
and temp3.d_date = temp1.d_date;
{code}
then tajo can run with this query. 
I think tajo cannot recognize multiple subquery with same column name.

same problem is occured with TPC-DS q87. Here is query.
{code:SQL}
select count(*) 
from ((select distinct c_last_name, c_first_name, d_date
       from store_sales, date_dim, customer
       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
         and store_sales.ss_customer_sk = customer.c_customer_sk
         and d_month_seq between 1200 and 1200+11)
       except
      (select distinct c_last_name, c_first_name, d_date
       from catalog_sales, date_dim, customer
       where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
         and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
         and d_month_seq between 1200 and 1200+11)
       except
      (select distinct c_last_name, c_first_name, d_date
       from web_sales, date_dim, customer
       where web_sales.ws_sold_date_sk = date_dim.d_date_sk
         and web_sales.ws_bill_customer_sk = customer.c_customer_sk
         and d_month_seq between 1200 and 1200+11)
) cool_cust
;
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to