[
https://issues.apache.org/jira/browse/TAJO-748?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jaehwa Jung updated TAJO-748:
-----------------------------
Description:
I found that inline view doesn't run expected at multiple join as follows:
*Environment*
* DataSet: TPC-DS
* tajo.dist-query.join.broadcast.auto : false
*Case: 1*
{code:xml}
SELECT COUNT(*)
FROM (
SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27'
) cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk);
{code}
- expected result: 4163848
- activated result: 4163848
*Case: 2*
{code:xml}
select count(*)
from item i
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk);
{code}
- expected result: 102000
- activated result: 102000
*Case: 3*
{code:xml}
SELECT COUNT(*)
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk)
WHERE i.i_manufact_id = 436;
{code}
- expected result: 80
- activated result: 4586
*Case: 4*
{code:xml}
SELECT COUNT(*)
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk)
{code}
- expected result: 71147
- activated result: 4163848
For reference, I made activated result using hive.
was:
I found that inline view doesn't run expected at multiple join as follows:
*Environment*
* DataSet: TPC-DS
*Case: 1*
{code:xml}
SELECT COUNT(*)
FROM (
SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27'
) cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk);
{code}
- expected result: 4163848
- activated result: 4163848
*Case: 2*
{code:xml}
select count(*)
from item i
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk);
{code}
- expected result: 102000
- activated result: 102000
*Case: 3*
{code:xml}
SELECT COUNT(*)
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk)
WHERE i.i_manufact_id = 436;
{code}
- expected result: 80
- activated result: 4586
*Case: 4*
{code:xml}
SELECT COUNT(*)
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
1.3 * avg(cs_ext_discount_amt) as
avg_cs_ext_discount_amt
FROM (SELECT cs.cs_item_sk as cs_item_sk,
cs.cs_ext_discount_amt as
cs_ext_discount_amt
FROM catalog_sales cs
JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
WHERE d.d_date between '2000-01-27' and '2000-04-27')
cs2
GROUP BY cs2.cs_item_sk) tmp1
ON (i.i_item_sk = tmp1.cs_item_sk)
{code}
- expected result: 71147
- activated result: 4163848
> Inline view doesn't run as expected at multiple join.
> -----------------------------------------------------
>
> Key: TAJO-748
> URL: https://issues.apache.org/jira/browse/TAJO-748
> Project: Tajo
> Issue Type: Sub-task
> Reporter: Jaehwa Jung
>
> I found that inline view doesn't run expected at multiple join as follows:
> *Environment*
> * DataSet: TPC-DS
> * tajo.dist-query.join.broadcast.auto : false
> *Case: 1*
> {code:xml}
> SELECT COUNT(*)
> FROM (
> SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27'
> ) cs1
> JOIN item i ON (i.i_item_sk = cs1.cs_item_sk);
> {code}
> - expected result: 4163848
> - activated result: 4163848
> *Case: 2*
> {code:xml}
> select count(*)
> from item i
> JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
> 1.3 * avg(cs_ext_discount_amt) as
> avg_cs_ext_discount_amt
> FROM (SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as
> cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27')
> cs2
> GROUP BY cs2.cs_item_sk) tmp1
> ON (i.i_item_sk = tmp1.cs_item_sk);
> {code}
> - expected result: 102000
> - activated result: 102000
> *Case: 3*
> {code:xml}
> SELECT COUNT(*)
> FROM (SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
> JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
> JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
> 1.3 * avg(cs_ext_discount_amt) as
> avg_cs_ext_discount_amt
> FROM (SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as
> cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27')
> cs2
> GROUP BY cs2.cs_item_sk) tmp1
> ON (i.i_item_sk = tmp1.cs_item_sk)
> WHERE i.i_manufact_id = 436;
> {code}
> - expected result: 80
> - activated result: 4586
> *Case: 4*
> {code:xml}
> SELECT COUNT(*)
> FROM (SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27') cs1
> JOIN item i ON (i.i_item_sk = cs1.cs_item_sk)
> JOIN (SELECT cs2.cs_item_sk as cs_item_sk,
> 1.3 * avg(cs_ext_discount_amt) as
> avg_cs_ext_discount_amt
> FROM (SELECT cs.cs_item_sk as cs_item_sk,
> cs.cs_ext_discount_amt as
> cs_ext_discount_amt
> FROM catalog_sales cs
> JOIN date_dim d ON (d.d_date_sk = cs.cs_sold_date_sk)
> WHERE d.d_date between '2000-01-27' and '2000-04-27')
> cs2
> GROUP BY cs2.cs_item_sk) tmp1
> ON (i.i_item_sk = tmp1.cs_item_sk)
> {code}
> - expected result: 71147
> - activated result: 4163848
> For reference, I made activated result using hive.
--
This message was sent by Atlassian JIRA
(v6.2#6252)