[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-14 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
   Resolution: Fixed
Fix Version/s: 4.0.0
   Status: Resolved  (was: Patch Available)

Pushed to master.

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Fix For: 4.0.0
>
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch, HIVE-21021.6.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-13 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Open  (was: Patch Available)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch, HIVE-21021.6.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-13 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch, HIVE-21021.6.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-13 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.6.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch, HIVE-21021.6.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-12 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-12 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.5.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-12 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Open  (was: Patch Available)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch, HIVE-21021.5.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-11 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Open  (was: Patch Available)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-11 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-11 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.4.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch, HIVE-21021.4.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-09 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-09 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.3.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-09 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Open  (was: Patch Available)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch, 
> HIVE-21021.3.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-08 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-08 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Open  (was: Patch Available)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-08 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.2.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch, HIVE-21021.2.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-07 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Attachment: HIVE-21021.1.patch

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (HIVE-21021) Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch

2018-12-07 Thread Vineet Garg (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-21021?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Vineet Garg updated HIVE-21021:
---
Status: Patch Available  (was: Open)

> Scalar subquery with only aggregate in subquery (no group by) has unnecessary 
> sq_count_check branch
> ---
>
> Key: HIVE-21021
> URL: https://issues.apache.org/jira/browse/HIVE-21021
> Project: Hive
>  Issue Type: Improvement
>Affects Versions: 3.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21021.1.patch
>
>
> {code:sql}
> CREATE TABLE `store_sales`(
>   `ss_sold_date_sk` int,
>   `ss_quantity` int,
>   `ss_list_price` decimal(7,2));
> CREATE TABLE `date_dim`(
>   `d_date_sk` int,
>   `d_year` int);
> explain cbo with avg_sales as
>  (select avg(quantity*list_price) average_sales
>   from (select ss_quantity quantity
>  ,ss_list_price list_price
>from store_sales
>,date_dim
>where ss_sold_date_sk = d_date_sk
>  and d_year between 1999 and 2001 ) x)
> select * from store_sales where ss_list_price > (select average_sales from 
> avg_sales);
> {code}
> {noformat}
> CBO PLAN:
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
>   HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
> rows, 0.0 cpu, 0.0 io}])
> HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
> cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
> HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
>   HiveProject($f0=[/($0, $1)])
> HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
>   HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
> HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
>   HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
> HiveFilter(condition=[IS NOT NULL($0)])
>   HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
>   HiveProject(d_date_sk=[$0])
> HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
> NOT NULL($0))])
>   HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> HiveProject(cnt=[$0])
>   HiveFilter(condition=[<=(sq_count_check($0), 1)])
> HiveProject(cnt=[$0])
>   HiveAggregate(group=[{}], cnt=[COUNT()])
> HiveProject
>   HiveProject($f0=[$0])
> HiveAggregate(group=[{}], agg#0=[count($0)])
>   HiveJoin(condition=[=($0, $3)], joinType=[inner], 
> algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
> HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
> ss_list_price=[$2])
>   HiveFilter(condition=[IS NOT NULL($0)])
> HiveTableScan(table=[[sub, store_sales]], 
> table:alias=[store_sales])
> HiveProject(d_date_sk=[$0])
>   HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 
> 2001), IS NOT NULL($0))])
> HiveTableScan(table=[[sub, date_dim]], 
> table:alias=[date_dim])
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)