Vineet Garg created HIVE-21021: ---------------------------------- Summary: Scalar subquery with only aggregate in subquery (no group by) has unnecessary sq_count_check branch Key: HIVE-21021 URL: https://issues.apache.org/jira/browse/HIVE-21021 Project: Hive Issue Type: Improvement Affects Versions: 3.0.0 Reporter: Vineet Garg Assignee: Vineet Garg
{code:sql} CREATE TABLE `store_sales`( `ss_sold_date_sk` int, `ss_quantity` int, `ss_list_price` decimal(7,2)); CREATE TABLE `date_dim`( `d_date_sk` int, `d_year` int); explain cbo with avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity ,ss_list_price list_price from store_sales ,date_dim where ss_sold_date_sk = d_date_sk and d_year between 1999 and 2001 ) x) select * from store_sales where ss_list_price > (select average_sales from avg_sales); {code} {noformat} CBO PLAN: HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}]) HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}]) HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales]) HiveProject($f0=[/($0, $1)]) HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}]) HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[sub, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveProject HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count($0)]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}]) HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[sub, date_dim]], table:alias=[date_dim]) {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)