[ https://issues.apache.org/jira/browse/DRILL-3942?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Victoria Markman updated DRILL-3942: ------------------------------------ Description: It seems to me that we should be able to do that, x is a grouping column: {code} 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(x=[$0], y=[$1], z=[$2]) 00-02 UnionExchange 01-01 Project(x=[$0], y=[$1], z=[$2]) 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) 01-03 SelectionVectorRemover 01-04 Filter(condition=[IS NOT NULL($0)]) 01-05 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) 01-06 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3]) 01-07 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) 02-01 UnorderedMuxExchange 03-01 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) 03-03 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], ss_quantity=[$0]) 03-04 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, `ss_quantity`]]]) {code} If I add another not null filter, it is pushed down: {code} 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null and y is not null; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(x=[$0], y=[$1], z=[$2]) 00-02 UnionExchange 01-01 Project(x=[$0], y=[$1], z=[$2]) 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) 01-03 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) 01-04 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3]) 01-05 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) 02-01 UnorderedMuxExchange 03-01 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) 03-03 SelectionVectorRemover 03-04 Filter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) 03-05 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], ss_quantity=[$0]) 03-06 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, `ss_quantity`]]]) {code} IS NULL filter is pushed down: {code} 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is null; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(x=[$0], y=[$1], z=[$2]) 00-02 Project(x=[$0], y=[$1], z=[$2]) 00-03 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) 00-04 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) 00-05 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) 00-06 SelectionVectorRemover 00-07 Filter(condition=[IS NULL($0)]) 00-08 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], ss_quantity=[$0]) 00-09 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, `ss_quantity`]]]) {code} was: It seems to me that we should be able to do that, x is a grouping column: {code} 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(x=[$0], y=[$1], z=[$2]) 00-02 UnionExchange 01-01 Project(x=[$0], y=[$1], z=[$2]) 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) 01-03 SelectionVectorRemover 01-04 Filter(condition=[IS NOT NULL($0)]) 01-05 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) 01-06 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3]) 01-07 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) 02-01 UnorderedMuxExchange 03-01 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) 03-03 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], ss_quantity=[$0]) 03-04 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, `ss_quantity`]]]) {code} If I add another not null filter, it is pushed down: {code} 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null and y is not null; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(x=[$0], y=[$1], z=[$2]) 00-02 UnionExchange 01-01 Project(x=[$0], y=[$1], z=[$2]) 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) 01-03 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) 01-04 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3]) 01-05 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) 02-01 UnorderedMuxExchange 03-01 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) 03-03 SelectionVectorRemover 03-04 Filter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) 03-05 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], ss_quantity=[$0]) 03-06 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, `ss_quantity`]]]) {code} > IS NOT NULL filter is not pushed pass aggregation > ------------------------------------------------- > > Key: DRILL-3942 > URL: https://issues.apache.org/jira/browse/DRILL-3942 > Project: Apache Drill > Issue Type: Bug > Components: Query Planning & Optimization > Affects Versions: 1.2.0 > Reporter: Victoria Markman > > It seems to me that we should be able to do that, x is a grouping column: > {code} > 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select > ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by > ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null; > +------+------+ > | text | json | > +------+------+ > | 00-00 Screen > 00-01 Project(x=[$0], y=[$1], z=[$2]) > 00-02 UnionExchange > 01-01 Project(x=[$0], y=[$1], z=[$2]) > 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) > 01-03 SelectionVectorRemover > 01-04 Filter(condition=[IS NOT NULL($0)]) > 01-05 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], > agg#1=[$SUM0($3)]) > 01-06 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > $f2=[$2], $f3=[$3]) > 01-07 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) > 02-01 UnorderedMuxExchange > 03-01 Project(ss_sold_date_sk=[$0], > ss_customer_sk=[$1], $f2=[$2], $f3=[$3], > E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) > 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], > agg#1=[COUNT($2)]) > 03-03 Project(ss_sold_date_sk=[$2], > ss_customer_sk=[$1], ss_quantity=[$0]) > 03-04 Scan(groupscan=[ParquetGroupScan > [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], > selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, > usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, > `ss_quantity`]]]) > {code} > If I add another not null filter, it is pushed down: > {code} > 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select > ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by > ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is not null and y is > not null; > +------+------+ > | text | json | > +------+------+ > | 00-00 Screen > 00-01 Project(x=[$0], y=[$1], z=[$2]) > 00-02 UnionExchange > 01-01 Project(x=[$0], y=[$1], z=[$2]) > 01-02 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) > 01-03 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], > agg#1=[$SUM0($3)]) > 01-04 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > $f2=[$2], $f3=[$3]) > 01-05 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) > 02-01 UnorderedMuxExchange > 03-01 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > $f2=[$2], $f3=[$3], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, > hash64AsDouble($0)))]) > 03-02 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], > agg#1=[COUNT($2)]) > 03-03 SelectionVectorRemover > 03-04 Filter(condition=[AND(IS NOT NULL($0), IS > NOT NULL($1))]) > 03-05 Project(ss_sold_date_sk=[$2], > ss_customer_sk=[$1], ss_quantity=[$0]) > 03-06 Scan(groupscan=[ParquetGroupScan > [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], > selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, > usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, > `ss_quantity`]]]) > {code} > IS NULL filter is pushed down: > {code} > 0: jdbc:drill:schema=dfs> explain plan for select x, y, z from ( select > ss_sold_date_sk, ss_customer_sk, avg(ss_quantity) from store_sales group by > ss_sold_date_sk, ss_customer_sk ) as sq(x, y, z) where x is null; > +------+------+ > | text | json | > +------+------+ > | 00-00 Screen > 00-01 Project(x=[$0], y=[$1], z=[$2]) > 00-02 Project(x=[$0], y=[$1], z=[$2]) > 00-03 Project(ss_sold_date_sk=[$0], ss_customer_sk=[$1], > EXPR$2=[CAST(/(CastHigh(CASE(=($3, 0), null, $2)), $3)):ANY NOT NULL]) > 00-04 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[$SUM0($3)]) > 00-05 HashAgg(group=[{0, 1}], agg#0=[$SUM0($2)], > agg#1=[COUNT($2)]) > 00-06 SelectionVectorRemover > 00-07 Filter(condition=[IS NULL($0)]) > 00-08 Project(ss_sold_date_sk=[$2], ss_customer_sk=[$1], > ss_quantity=[$0]) > 00-09 Scan(groupscan=[ParquetGroupScan > [entries=[ReadEntryWithPath [path=maprfs:///tpcds1/parquet/store_sales]], > selectionRoot=maprfs:/tpcds1/parquet/store_sales, numFiles=1, > usedMetadataFile=false, columns=[`ss_sold_date_sk`, `ss_customer_sk`, > `ss_quantity`]]]) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)