[ https://issues.apache.org/jira/browse/ASTERIXDB-2233?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wail Alkowaileet reassigned ASTERIXDB-2233: ------------------------------------------- Assignee: Wail Alkowaileet > Common conjunctions in disjunctions > ----------------------------------- > > Key: ASTERIXDB-2233 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-2233 > Project: Apache AsterixDB > Issue Type: Improvement > Components: COMP - Compiler > Reporter: Wail Alkowaileet > Assignee: Wail Alkowaileet > Priority: Major > > (Inspired by Apache Impala) > Boolean expression in the form: > * (p and q and r) or r -> r > * (p and q) or (p and q) -> p and q > This transformation unlocks other optimizations to kick in and it is > compatible with MISSING/NULL truth table. > Before: > Query: > {noformat} > SELECT t.x > FROM Tweets as t, TweetsExt as te > WHERE (t.x = te.x and te.y > 10) or (t.x = te.x and te.z > 10) > {noformat} > Plan: > {noformat} > distribute result [$$34] > -- DISTRIBUTE_RESULT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$34]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$34] <- [{"x": $$38}] > -- ASSIGN |PARTITIONED| > project ([$$38]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (or(and(eq($$38, $$39), gt($$40, 10)), and(eq($$38, $$39), > gt($$42, 10)))) > -- NESTED_LOOP |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$38]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$38] <- [$$t.getField("x")] > -- ASSIGN |PARTITIONED| > project ([$$t]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$37, $$t] <- TwitterDataverse.Tweets > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > exchange > -- BROADCAST_EXCHANGE |PARTITIONED| > project ([$$39, $$40, $$42]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$42, $$40, $$39] <- [$$te.getField("z"), > $$te.getField("y"), $$te.getField("x")] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$te] <- TwitterDataverse.TweetsExt > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > {noformat} > After: > {noformat} > SELECT t.x > FROM Tweets as t, TweetsExt as te > WHERE t.x = te.x and (te.y > 10 or te.z > 10) > {noformat} > Plan: > {noformat} > distribute result [$$30] > -- DISTRIBUTE_RESULT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$30]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$30] <- [{"x": $$31}] > -- ASSIGN |PARTITIONED| > project ([$$31]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > join (eq($$31, $$33)) > -- HYBRID_HASH_JOIN [$$31][$$33] |PARTITIONED| > exchange > -- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED| > project ([$$31]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$31] <- [$$t.getField("x")] > -- ASSIGN |PARTITIONED| > project ([$$t]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$32, $$t] <- TwitterDataverse.Tweets > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > exchange > -- HASH_PARTITION_EXCHANGE [$$33] |PARTITIONED| > project ([$$33]) > -- STREAM_PROJECT |PARTITIONED| > select (or(gt($$te.getField("y"), 10), > gt($$te.getField("z"), 10))) > -- STREAM_SELECT |PARTITIONED| > assign [$$33] <- [$$te.getField("x")] > -- ASSIGN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > data-scan []<-[$$te] <- TwitterDataverse.TweetsExt > -- DATASOURCE_SCAN |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)