[ https://issues.apache.org/jira/browse/ASTERIXDB-1266?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dmitry Lychagin reassigned ASTERIXDB-1266: ------------------------------------------ Assignee: Dmitry Lychagin (was: Yingyi Bu) > Sqlpp query degrades the performance by picking NLJ instead of HHJ > ------------------------------------------------------------------ > > Key: ASTERIXDB-1266 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-1266 > Project: Apache AsterixDB > Issue Type: Bug > Components: *DB - AsterixDB, COMP - Compiler > Reporter: Pouria > Assignee: Dmitry Lychagin > Labels: soon > > Translating TPCH Q17 from AQL to Sqlpp causes the optimizer to pick NLJ > (instead of HHJ) which drops the performance of the query. > Below you can find the AQL and Sqlpp versions of the query along with their > corresponding optimized plans: > Q17-SQL-PP > {noformat} > use tpch_1g; > declare function tmp() { > ( > select element {'t_partkey':l_partkey,'t_avg_quantity':(0.2 * avg(( > select element i.l_quantity > from l as i > )))} > from LineItem as l > group by l.l_partkey as l_partkey > ) > }; > select element ( sum(( > select element l.l_extendedprice > from LineItem as l, > Part as p, > tmp() as t > where (((p.p_partkey = l.l_partkey) and (p.p_brand = 'Brand#23') and > (p.p_container = 'MED BOX')) and ((l.l_partkey = t.t_partkey) and > (l.l_quantity < t.t_avg_quantity))) > )) / 7.0); > {noformat} > Query plan: > {noformat} > distribute result [%0->$$54] > -- DISTRIBUTE_RESULT |UNPARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| > project ([$$54]) > -- STREAM_PROJECT |UNPARTITIONED| > assign [$$54] <- [function-call: asterix:numeric-divide, > Args:[%0->$$55, ADouble: {7.0}]] > -- ASSIGN |UNPARTITIONED| > aggregate [$$55] <- [function-call: asterix:agg-sum, Args:[%0->$$72]] > -- AGGREGATE |UNPARTITIONED| > exchange > -- RANDOM_MERGE_EXCHANGE |PARTITIONED| > aggregate [$$72] <- [function-call: asterix:agg-local-sum, > Args:[%0->$$51]] > -- AGGREGATE |PARTITIONED| > project ([$$51]) > -- STREAM_PROJECT |PARTITIONED| > select (function-call: algebricks:lt, Args:[%0->$$65, > %0->$$66]) > -- STREAM_SELECT |PARTITIONED| > project ([$$65, $$66, $$51]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (function-call: algebricks:and, > Args:[function-call: algebricks:eq, Args:[%0->$$57, %0->$$14], function-call: > algebricks:eq, Args:[%0->$$60, %0->$$57]]) > -- NESTED_LOOP |PARTITIONED| > exchange > -- BROADCAST_EXCHANGE |PARTITIONED| > project ([$$65, $$51, $$57]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$51, $$65, $$57] <- [function-call: > asterix:field-access-by-index, Args:[%0->$$11, AInt32: {5}], function-call: > asterix:field-access-by-index, Args:[%0->$$11, AInt32: {4}], function-call: > asterix:field-access-by-index, Args:[%0->$$11, AInt32: {1}]] > -- ASSIGN |PARTITIONED| > project ([$$11]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$11] <- [%0->$$15] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > replicate > -- SPLIT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$15]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > data-scan []<-[$$61, $$62, $$15] > <- tpch_1g:LineItem > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE > |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (TRUE) > -- NESTED_LOOP |PARTITIONED| > exchange > -- BROADCAST_EXCHANGE |PARTITIONED| > project ([$$60]) > -- STREAM_PROJECT |PARTITIONED| > select (function-call: algebricks:and, > Args:[function-call: algebricks:eq, Args:[function-call: > asterix:field-access-by-index, Args:[%0->$$16, AInt32: {6}], AString: {MED > BOX}], function-call: algebricks:eq, Args:[function-call: > asterix:field-access-by-index, Args:[%0->$$16, AInt32: {3}], AString: > {Brand#23}]]) > -- STREAM_SELECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$60, $$16] <- tpch_1g:Part > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$66, $$14]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$66] <- [function-call: > asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$63]] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > group by ([$$14 := %0->$$71]) decor ([]) { > aggregate [$$63] <- > [function-call: asterix:agg-global-avg, Args:[%0->$$70]] > -- AGGREGATE |LOCAL| > nested tuple source > -- NESTED_TUPLE_SOURCE > |LOCAL| > } > -- PRE_CLUSTERED_GROUP_BY[$$71] > |PARTITIONED| > exchange > -- HASH_PARTITION_MERGE_EXCHANGE > MERGE:[$$71(ASC)] HASH:[$$71] |PARTITIONED| > group by ([$$71 := %0->$$56]) decor > ([]) { > aggregate [$$70] <- > [function-call: asterix:agg-local-avg, Args:[%0->$$29]] > -- AGGREGATE |LOCAL| > nested tuple source > -- NESTED_TUPLE_SOURCE > |LOCAL| > } > -- SORT_GROUP_BY[$$56] |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > project ([$$56, $$29]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$29, $$56] <- > [function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {4}], > function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {1}]] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > replicate > -- SPLIT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > project ([$$15]) > -- STREAM_PROJECT > |PARTITIONED| > exchange > -- > ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan > []<-[$$61, $$62, $$15] <- tpch_1g:LineItem > -- DATASOURCE_SCAN > |PARTITIONED| > exchange > -- > ONE_TO_ONE_EXCHANGE |PARTITIONED| > > empty-tuple-source > -- > EMPTY_TUPLE_SOURCE |PARTITIONED| > {noformat} > Q17-AQL > {noformat} > declare function tmp(){ > for $l in dataset('LineItem') > group by $l_partkey := $l.l_partkey with $l > return { > "t_partkey": $l_partkey, > "t_avg_quantity": 0.2 * avg(for $i in $l return $i.l_quantity) > } > } > sum( > for $l in dataset('LineItem') > for $p in dataset('Part') > where $p.p_partkey = $l.l_partkey > and $p.p_brand = 'Brand#23' > and $p.p_container = 'MED BOX' > for $t in tmp() > where $l.l_partkey = $t.t_partkey > and $l.l_quantity < $t.t_avg_quantity > return $l.l_extendedprice > )/7.0 > {noformat} > {noformat} > distribute result [%0->$$7] > -- DISTRIBUTE_RESULT |UNPARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| > project ([$$7]) > -- STREAM_PROJECT |UNPARTITIONED| > assign [$$7] <- [function-call: asterix:numeric-divide, Args:[%0->$$41, > ADouble: {7.0}]] > -- ASSIGN |UNPARTITIONED| > aggregate [$$41] <- [function-call: asterix:agg-sum, Args:[%0->$$58]] > -- AGGREGATE |UNPARTITIONED| > exchange > -- RANDOM_MERGE_EXCHANGE |PARTITIONED| > aggregate [$$58] <- [function-call: asterix:agg-local-sum, > Args:[%0->$$38]] > -- AGGREGATE |PARTITIONED| > project ([$$38]) > -- STREAM_PROJECT |PARTITIONED| > select (function-call: algebricks:lt, Args:[%0->$$50, > %0->$$51]) > -- STREAM_SELECT |PARTITIONED| > project ([$$50, $$51, $$38]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (function-call: algebricks:eq, Args:[%0->$$43, > %0->$$9]) > -- HYBRID_HASH_JOIN [$$43][$$9] |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$50, $$38, $$43]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (function-call: algebricks:eq, > Args:[%0->$$46, %0->$$43]) > -- HYBRID_HASH_JOIN [$$43][$$46] |PARTITIONED| > exchange > -- HASH_PARTITION_EXCHANGE [$$43] > |PARTITIONED| > project ([$$50, $$38, $$43]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$38, $$50, $$43] <- > [function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {5}], > function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {4}], > function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {1}]] > -- ASSIGN |PARTITIONED| > project ([$$4]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$4] <- [%0->$$8] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > replicate > -- SPLIT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > project ([$$8]) > -- STREAM_PROJECT > |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > data-scan []<-[$$47, > $$48, $$8] <- tpch_1g:LineItem > -- DATASOURCE_SCAN > |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE > |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$46]) > -- STREAM_PROJECT |PARTITIONED| > select (function-call: algebricks:and, > Args:[function-call: algebricks:eq, Args:[function-call: > asterix:field-access-by-index, Args:[%0->$$5, AInt32: {3}], AString: > {Brand#23}], function-call: algebricks:eq, Args:[function-call: > asterix:field-access-by-index, Args:[%0->$$5, AInt32: {6}], AString: {MED > BOX}]]) > -- STREAM_SELECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$46, $$5] <- > tpch_1g:Part > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE > |PARTITIONED| > exchange > -- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED| > project ([$$51, $$9]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$51] <- [function-call: > asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$53]] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > group by ([$$9 := %0->$$57]) decor ([]) { > aggregate [$$53] <- [function-call: > asterix:agg-global-avg, Args:[%0->$$56]] > -- AGGREGATE |LOCAL| > nested tuple source > -- NESTED_TUPLE_SOURCE |LOCAL| > } > -- PRE_CLUSTERED_GROUP_BY[$$57] |PARTITIONED| > exchange > -- HASH_PARTITION_MERGE_EXCHANGE > MERGE:[$$57(ASC)] HASH:[$$57] |PARTITIONED| > group by ([$$57 := %0->$$42]) decor ([]) { > aggregate [$$56] <- > [function-call: asterix:agg-local-avg, Args:[%0->$$27]] > -- AGGREGATE |LOCAL| > nested tuple source > -- NESTED_TUPLE_SOURCE > |LOCAL| > } > -- SORT_GROUP_BY[$$42] |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$42, $$27]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$27, $$42] <- > [function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {4}], > function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {1}]] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > replicate > -- SPLIT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > project ([$$8]) > -- STREAM_PROJECT > |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE > |PARTITIONED| > data-scan []<-[$$47, > $$48, $$8] <- tpch_1g:LineItem > -- DATASOURCE_SCAN > |PARTITIONED| > exchange > -- > ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- > EMPTY_TUPLE_SOURCE |PARTITIONED| > {noformat} > -- This message was sent by Atlassian JIRA (v6.4.14#64029)