Yingyi Bu created ASTERIXDB-2044: ------------------------------------ Summary: Listify in subqueries Key: ASTERIXDB-2044 URL: https://issues.apache.org/jira/browse/ASTERIXDB-2044 Project: Apache AsterixDB Issue Type: Bug Components: COMP - Compiler Reporter: Yingyi Bu Assignee: Yingyi Bu
The following query will result in unnecessary listifies in the optimized query plan. {noformat} DROP DATAVERSE tpch IF EXISTS; CREATE dataverse tpch; USE tpch; CREATE TYPE LineItemType AS CLOSED { l_orderkey : integer, l_partkey : integer, l_suppkey : integer, l_linenumber : integer, l_quantity : double, l_extendedprice : double, l_discount : double, l_tax : double, l_returnflag : string, l_linestatus : string, l_shipdate : string, l_commitdate : string, l_receiptdate : string, l_shipinstruct : string, l_shipmode : string, l_comment : string } CREATE DATASET LineItem(LineItemType) PRIMARY KEY l_orderkey,l_linenumber; SELECT l_returnflag AS l_returnflag, l_linestatus AS l_linestatus, coll_count(cheap) AS count_cheaps, coll_count(expensive) AS count_expensives FROM LineItem AS l /* +hash */ GROUP BY l.l_returnflag AS l_returnflag,l.l_linestatus AS l_linestatus GROUP AS g LET cheap = ( SELECT ELEMENT m FROM (FROM g SELECT VALUE l) AS m WHERE m.l_discount > 0.05 ), expensive = ( SELECT ELEMENT m FROM (FROM g SELECT VALUE l) AS m WHERE m.l_discount <= 0.05 ) ORDER BY l_returnflag,l_linestatus ; {noformat} {noformat} distribute result [$$31] -- DISTRIBUTE_RESULT |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| project ([$$31]) -- STREAM_PROJECT |PARTITIONED| assign [$$31] <- [{"l_returnflag": $$l_returnflag, "l_linestatus": $$l_linestatus, "count_cheaps": $$36, "count_expensives": $$37}] -- ASSIGN |PARTITIONED| exchange -- SORT_MERGE_EXCHANGE [$$l_returnflag(ASC), $$l_linestatus(ASC) ] |PARTITIONED| project ([$$l_returnflag, $$l_linestatus, $$36, $$37]) -- STREAM_PROJECT |PARTITIONED| subplan { aggregate [$$37] <- [agg-count($$m)] -- AGGREGATE |LOCAL| select (le($$39, 0.05)) -- STREAM_SELECT |LOCAL| assign [$$39] <- [$$m.getField(6)] -- ASSIGN |LOCAL| unnest $$m <- scan-collection($$24) -- UNNEST |LOCAL| subplan { aggregate [$$24] <- [listify($$23)] -- AGGREGATE |LOCAL| assign [$$23] <- [$$g.getField(0)] -- ASSIGN |LOCAL| unnest $$g <- scan-collection($$15) -- UNNEST |LOCAL| nested tuple source -- NESTED_TUPLE_SOURCE |LOCAL| } -- SUBPLAN |LOCAL| nested tuple source -- NESTED_TUPLE_SOURCE |LOCAL| } -- SUBPLAN |PARTITIONED| subplan { aggregate [$$36] <- [agg-count($$m)] -- AGGREGATE |LOCAL| select (gt($$38, 0.05)) -- STREAM_SELECT |LOCAL| assign [$$38] <- [$$m.getField(6)] -- ASSIGN |LOCAL| unnest $$m <- scan-collection($$18) -- UNNEST |LOCAL| subplan { aggregate [$$18] <- [listify($$17)] -- AGGREGATE |LOCAL| assign [$$17] <- [$$g.getField(0)] -- ASSIGN |LOCAL| unnest $$g <- scan-collection($$15) -- UNNEST |LOCAL| nested tuple source -- NESTED_TUPLE_SOURCE |LOCAL| } -- SUBPLAN |LOCAL| nested tuple source -- NESTED_TUPLE_SOURCE |LOCAL| } -- SUBPLAN |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| group by ([$$l_returnflag := $$32; $$l_linestatus := $$33]) decor ([]) { aggregate [$$15] <- [listify($$g)] -- AGGREGATE |LOCAL| nested tuple source -- NESTED_TUPLE_SOURCE |LOCAL| } -- PRE_CLUSTERED_GROUP_BY[$$32, $$33] |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| order (ASC, $$32) (ASC, $$33) -- STABLE_SORT [$$32(ASC), $$33(ASC)] |PARTITIONED| exchange -- HASH_PARTITION_EXCHANGE [$$32, $$33] |PARTITIONED| project ([$$32, $$33, $$g]) -- STREAM_PROJECT |PARTITIONED| assign [$$g, $$33, $$32] <- [{"l": $$l}, $$l.getField(9), $$l.getField(8)] -- ASSIGN |PARTITIONED| project ([$$l]) -- STREAM_PROJECT |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| data-scan []<-[$$34, $$35, $$l] <- tpch.LineItem -- DATASOURCE_SCAN |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| empty-tuple-source -- EMPTY_TUPLE_SOURCE |PARTITIONED| {noformat} -- This message was sent by Atlassian JIRA (v6.4.14#64029)