[ https://issues.apache.org/jira/browse/DRILL-6861?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Boaz Ben-Zvi updated DRILL-6861: -------------------------------- Description: Following DRILL-6755 (_Avoid building a hash table when the probe side is empty_) - The special case of an empty spilled probe-partition was not handled. When such a case happens, the Hash-Join terminates early (returns NONE) and the remaining partitions are not processed/returned (which may lead to incorrect results). A test case - force tpcds/query95 to spill (sf1) : {code:java} 0: jdbc:drill:zk=local> alter system set `exec.hashjoin.max_batches_in_memory` = 40; +-------+-----------------------------------------------+ | ok | summary | +-------+-----------------------------------------------+ | true | exec.hashjoin.max_batches_in_memory updated. | +-------+-----------------------------------------------+ 1 row selected (1.325 seconds) 0: jdbc:drill:zk=local> WITH ws_wh AS . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT ws1.ws_order_number, . . . . . . . . . . . > ws1.ws_warehouse_sk wh1, . . . . . . . . . . . > ws2.ws_warehouse_sk wh2 . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` ws1, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_sales` ws2 . . . . . . . . . . . > WHERE ws1.ws_order_number = ws2.ws_order_number . . . . . . . . . . . > AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) . . . . . . . . . . . > SELECT . . . . . . . . . . . > Count(DISTINCT ws1.ws_order_number) AS `order count` , . . . . . . . . . . . > Sum(ws1.ws_ext_ship_cost) AS `total shipping cost` , . . . . . . . . . . . > Sum(ws1.ws_net_profit) AS `total net profit` . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` ws1 , . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/date_dim` dd, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/customer_address` ca, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_site` wbst . . . . . . . . . . . > WHERE dd.d_date BETWEEN '2000-04-01' AND ( . . . . . . . . . . . > Cast('2000-04-01' AS DATE) + INTERVAL '60' day) . . . . . . . . . . . > AND ws1.ws_ship_date_sk = dd.d_date_sk . . . . . . . . . . . > AND ws1.ws_ship_addr_sk = ca.ca_address_sk . . . . . . . . . . . > AND ca.ca_state = 'IN' . . . . . . . . . . . > AND ws1.ws_web_site_sk = wbst.web_site_sk . . . . . . . . . . . > AND wbst.web_company_name = 'pri' . . . . . . . . . . . > AND ws1.ws_order_number IN . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT ws_wh.ws_order_number . . . . . . . . . . . > FROM ws_wh) . . . . . . . . . . . > AND ws1.ws_order_number IN . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT wr.wr_order_number . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_returns` wr, . . . . . . . . . . . > ws_wh . . . . . . . . . . . > WHERE wr.wr_order_number = ws_wh.ws_order_number) . . . . . . . . . . . > ORDER BY count(DISTINCT ws1.ws_order_number) . . . . . . . . . . . > LIMIT 100; +--------------+----------------------+---------------------+ | order count | total shipping cost | total net profit | +--------------+----------------------+---------------------+ | 17 | 38508.130000000005 | 20822.3 | +--------------+----------------------+---------------------+ 1 row selected (105.621 seconds) {code} The correct results should be: {code:java} +--------------+----------------------+---------------------+ | order count | total shipping cost | total net profit | +--------------+----------------------+---------------------+ | 34 | 63754.72 | 15919.009999999998 | +--------------+----------------------+---------------------+ {code} was: Following DRILL-6755 (_Avoid building a hash table when the probe side is empty_) - The special case of an empty spilled probe-partition was not handled. When such a case happens, the Hash-Join terminates early (returns NONE) and the remaining partitions are not processed/returned (which may lead to incorrect results). A test case - force tpcds/query95 to spill : {code:java} 0: jdbc:drill:zk=local> alter system set `exec.hashjoin.max_batches_in_memory` = 40; +-------+-----------------------------------------------+ | ok | summary | +-------+-----------------------------------------------+ | true | exec.hashjoin.max_batches_in_memory updated. | +-------+-----------------------------------------------+ 1 row selected (1.325 seconds) 0: jdbc:drill:zk=local> WITH ws_wh AS . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT ws1.ws_order_number, . . . . . . . . . . . > ws1.ws_warehouse_sk wh1, . . . . . . . . . . . > ws2.ws_warehouse_sk wh2 . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` ws1, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_sales` ws2 . . . . . . . . . . . > WHERE ws1.ws_order_number = ws2.ws_order_number . . . . . . . . . . . > AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) . . . . . . . . . . . > SELECT . . . . . . . . . . . > Count(DISTINCT ws1.ws_order_number) AS `order count` , . . . . . . . . . . . > Sum(ws1.ws_ext_ship_cost) AS `total shipping cost` , . . . . . . . . . . . > Sum(ws1.ws_net_profit) AS `total net profit` . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` ws1 , . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/date_dim` dd, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/customer_address` ca, . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_site` wbst . . . . . . . . . . . > WHERE dd.d_date BETWEEN '2000-04-01' AND ( . . . . . . . . . . . > Cast('2000-04-01' AS DATE) + INTERVAL '60' day) . . . . . . . . . . . > AND ws1.ws_ship_date_sk = dd.d_date_sk . . . . . . . . . . . > AND ws1.ws_ship_addr_sk = ca.ca_address_sk . . . . . . . . . . . > AND ca.ca_state = 'IN' . . . . . . . . . . . > AND ws1.ws_web_site_sk = wbst.web_site_sk . . . . . . . . . . . > AND wbst.web_company_name = 'pri' . . . . . . . . . . . > AND ws1.ws_order_number IN . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT ws_wh.ws_order_number . . . . . . . . . . . > FROM ws_wh) . . . . . . . . . . . > AND ws1.ws_order_number IN . . . . . . . . . . . > ( . . . . . . . . . . . > SELECT wr.wr_order_number . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_returns` wr, . . . . . . . . . . . > ws_wh . . . . . . . . . . . > WHERE wr.wr_order_number = ws_wh.ws_order_number) . . . . . . . . . . . > ORDER BY count(DISTINCT ws1.ws_order_number) . . . . . . . . . . . > LIMIT 100; +--------------+----------------------+---------------------+ | order count | total shipping cost | total net profit | +--------------+----------------------+---------------------+ | 17 | 38508.130000000005 | 20822.3 | +--------------+----------------------+---------------------+ 1 row selected (105.621 seconds) {code} The correct results should be: {code:java} +--------------+----------------------+---------------------+ | order count | total shipping cost | total net profit | +--------------+----------------------+---------------------+ | 34 | 63754.72 | 15919.009999999998 | +--------------+----------------------+---------------------+ {code} > Hash-Join: Spilled partitions are skipped following an empty probe side > ----------------------------------------------------------------------- > > Key: DRILL-6861 > URL: https://issues.apache.org/jira/browse/DRILL-6861 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Relational Operators > Affects Versions: 1.14.0 > Reporter: Boaz Ben-Zvi > Assignee: Boaz Ben-Zvi > Priority: Blocker > Fix For: 1.15.0 > > Original Estimate: 96h > Remaining Estimate: 96h > > Following DRILL-6755 (_Avoid building a hash table when the probe side > is empty_) - The special case of an empty spilled probe-partition was not > handled. When such a case happens, the Hash-Join terminates early (returns > NONE) and the remaining partitions are not processed/returned (which may lead > to incorrect results). > A test case - force tpcds/query95 to spill (sf1) : > {code:java} > 0: jdbc:drill:zk=local> alter system set > `exec.hashjoin.max_batches_in_memory` = 40; > +-------+-----------------------------------------------+ > | ok | summary | > +-------+-----------------------------------------------+ > | true | exec.hashjoin.max_batches_in_memory updated. | > +-------+-----------------------------------------------+ > 1 row selected (1.325 seconds) > 0: jdbc:drill:zk=local> WITH ws_wh AS > . . . . . . . . . . . > ( > . . . . . . . . . . . > SELECT ws1.ws_order_number, > . . . . . . . . . . . > ws1.ws_warehouse_sk wh1, > . . . . . . . . . . . > ws2.ws_warehouse_sk wh2 > . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` > ws1, > . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_sales` > ws2 > . . . . . . . . . . . > WHERE ws1.ws_order_number = > ws2.ws_order_number > . . . . . . . . . . . > AND ws1.ws_warehouse_sk <> > ws2.ws_warehouse_sk) > . . . . . . . . . . . > SELECT > . . . . . . . . . . . > Count(DISTINCT ws1.ws_order_number) AS > `order count` , > . . . . . . . . . . . > Sum(ws1.ws_ext_ship_cost) AS > `total shipping cost` , > . . . . . . . . . . . > Sum(ws1.ws_net_profit) AS > `total net profit` > . . . . . . . . . . . > FROM dfs.`/data/tpcds/sf1/parquet/web_sales` ws1 , > . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/date_dim` dd, > . . . . . . . . . . . > > dfs.`/data/tpcds/sf1/parquet/customer_address` ca, > . . . . . . . . . . . > dfs.`/data/tpcds/sf1/parquet/web_site` wbst > . . . . . . . . . . . > WHERE dd.d_date BETWEEN '2000-04-01' AND ( > . . . . . . . . . . . > Cast('2000-04-01' AS DATE) + > INTERVAL '60' day) > . . . . . . . . . . . > AND ws1.ws_ship_date_sk = dd.d_date_sk > . . . . . . . . . . . > AND ws1.ws_ship_addr_sk = ca.ca_address_sk > . . . . . . . . . . . > AND ca.ca_state = 'IN' > . . . . . . . . . . . > AND ws1.ws_web_site_sk = wbst.web_site_sk > . . . . . . . . . . . > AND wbst.web_company_name = 'pri' > . . . . . . . . . . . > AND ws1.ws_order_number IN > . . . . . . . . . . . > ( > . . . . . . . . . . . > SELECT ws_wh.ws_order_number > . . . . . . . . . . . > FROM ws_wh) > . . . . . . . . . . . > AND ws1.ws_order_number IN > . . . . . . . . . . . > ( > . . . . . . . . . . . > SELECT wr.wr_order_number > . . . . . . . . . . . > FROM > dfs.`/data/tpcds/sf1/parquet/web_returns` wr, > . . . . . . . . . . . > ws_wh > . . . . . . . . . . . > WHERE wr.wr_order_number = > ws_wh.ws_order_number) > . . . . . . . . . . . > ORDER BY count(DISTINCT ws1.ws_order_number) > . . . . . . . . . . . > LIMIT 100; > +--------------+----------------------+---------------------+ > | order count | total shipping cost | total net profit | > +--------------+----------------------+---------------------+ > | 17 | 38508.130000000005 | 20822.3 | > +--------------+----------------------+---------------------+ > 1 row selected (105.621 seconds) > {code} > The correct results should be: > {code:java} > +--------------+----------------------+---------------------+ > | order count | total shipping cost | total net profit | > +--------------+----------------------+---------------------+ > | 34 | 63754.72 | 15919.009999999998 | > +--------------+----------------------+---------------------+ > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)