[ https://issues.apache.org/jira/browse/ASTERIXDB-2199?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16306415#comment-16306415 ]
Steven Jacobs commented on ASTERIXDB-2199: ------------------------------------------ I'm guessing that this bug happens in Master as well? If so, I guess it would be another issue? > Nested primary key and hash repartitioning bug > ----------------------------------------------- > > Key: ASTERIXDB-2199 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-2199 > Project: Apache AsterixDB > Issue Type: Bug > Components: *DB - AsterixDB > Reporter: Shiva Jahangiri > Assignee: Steven Jacobs > > If a join is happening on primary keys of two tables, no hash partitioning > should happen. Having the following DDL(Note that primary key of Friendship2 > is string): > DROP DATAVERSE Facebook IF EXISTS; > CREATE DATAVERSE Facebook; > Use Facebook; > CREATE TYPE FriendshipType AS closed { > id:string, > friends :[string] > }; > CREATE DATASET Friendship2(FriendshipType) > PRIMARY KEY id; > insert into Friendship2([ {"id":"1","friends" : [ "2","3","4"]}, > {"id":"2","friends" : [ "4","5","6"]} > ]); > By running the following query: > Use Facebook; > select * from Friendship2 first, Friendship2 second where first.id = > second.id; > we can see that there is no hash partitioning happening in optimized logical > plan which is correct as join is happening on the primary key of both > relations and data is already partitioned on primary key: > { > "operator":"distribute-result", > "expressions":"$$9", > "operatorId" : "1.1", > "physical-operator":"DISTRIBUTE_RESULT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.2", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" :["$$9"], > "operatorId" : "1.3", > "physical-operator":"STREAM_PROJECT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"assign", > "variables" :["$$9"], > "expressions":"{ first : $$first, second : $$second}", > "operatorId" : "1.4", > "physical-operator":"ASSIGN", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" :["$$first","$$second"], > "operatorId" : "1.5", > "physical-operator":"STREAM_PROJECT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.6", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"join", > "condition":"eq($$10, $$11)", > "operatorId" : "1.7", > "physical-operator":"HYBRID_HASH_JOIN > [$$10][$$11]", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.8", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"data-scan", > "variables" :["$$10","$$first"], > "data-source":"Facebook.Friendship2", > "operatorId" : "1.9", > > "physical-operator":"DATASOURCE_SCAN", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.10", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"empty-tuple-source", > "operatorId" : "1.11", > > "physical-operator":"EMPTY_TUPLE_SOURCE", > > "execution-mode":"PARTITIONED" > } > ] > } > ] > } > ] > } > , { > "operator":"exchange", > "operatorId" : "1.12", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"data-scan", > "variables" :["$$11","$$second"], > "data-source":"Facebook.Friendship2", > "operatorId" : "1.13", > > "physical-operator":"DATASOURCE_SCAN", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.14", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"empty-tuple-source", > "operatorId" : "1.15", > > "physical-operator":"EMPTY_TUPLE_SOURCE", > > "execution-mode":"PARTITIONED" > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > However, if in DDL primary key of a relation is nested (e.g. person.id in > Friendship)such as follow: > DROP DATAVERSE Facebook IF EXISTS; > CREATE DATAVERSE Facebook; > Use Facebook; > CREATE TYPE PersonType AS closed { > id:string > }; > CREATE TYPE FriendshipType AS closed { > person : PersonType, > Friends :[PersonType] > }; > /* Creating Datasets */ > CREATE DATASET Person(PersonType) > PRIMARY KEY id; > CREATE DATASET Friendship(FriendshipType) > PRIMARY KEY person.id; > Use Facebook; > insert into Person ([{"id":"1"},{"id":"2"},{"id":"3"},{"id":"4"}]); > insert into Friendship ([ {"person":{"id":"1"},"Friends" : [ > {"id":"2"},{"id":"3"},{"id":"4"}]} > ]); > By running the following query which is equivalent with the previous query: > Use Facebook; > select * from Friendship first, Friendship second where first.person.id = > second.person.id; > We can see that Hash_Partitioning_Exchange happens which shows that if > primary key is nested, AsterixDB does not recognize that data is partitioned > based on it and should not repartition it. Following is the optimized logical > plan of this query with two hash partitioning happened on two relations > involved in join while there should be none in this case. > { > "operator":"distribute-result", > "expressions":"$$11", > "operatorId" : "1.1", > "physical-operator":"DISTRIBUTE_RESULT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.2", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" :["$$11"], > "operatorId" : "1.3", > "physical-operator":"STREAM_PROJECT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"assign", > "variables" :["$$11"], > "expressions":"{ first : $$first, second : $$second}", > "operatorId" : "1.4", > "physical-operator":"ASSIGN", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" :["$$first","$$second"], > "operatorId" : "1.5", > "physical-operator":"STREAM_PROJECT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.6", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"join", > "condition":"eq($$14, $$15)", > "operatorId" : "1.7", > "physical-operator":"HYBRID_HASH_JOIN > [$$14][$$15]", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.8", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" :["$$first","$$14"], > "operatorId" : "1.9", > "physical-operator":"STREAM_PROJECT", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"assign", > "variables" :["$$first","$$14"], > "expressions":"$$second, $$15", > "operatorId" : "1.10", > "physical-operator":"ASSIGN", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.11", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"replicate", > "operatorId" : "1.12", > > "physical-operator":"REPLICATE", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"exchange", > "operatorId" : > "1.13", > > "physical-operator":"HASH_PARTITION_EXCHANGE [$$15]", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"assign", > "variables" > :["$$15"], > > "expressions":"$$second.getField(0).getField(0)", > > "operatorId" : "1.14", > > "physical-operator":"ASSIGN", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"project", > > "variables" :["$$second"], > > "operatorId" : "1.15", > > "physical-operator":"STREAM_PROJECT", > > "execution-mode":"PARTITIONED", > > "inputs":[ > { > > "operator":"exchange", > > "operatorId" : "1.16", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > > "execution-mode":"PARTITIONED", > > "inputs":[ > > { > > "operator":"data-scan", > > "variables" :["$$13","$$second"], > > "data-source":"Facebook.Friendship", > > "operatorId" : "1.17", > > "physical-operator":"DATASOURCE_SCAN", > > "execution-mode":"PARTITIONED", > > "inputs":[ > > { > > "operator":"exchange", > > "operatorId" : "1.18", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > > "execution-mode":"PARTITIONED", > > "inputs":[ > > { > > "operator":"empty-tuple-source", > > "operatorId" : "1.19", > > "physical-operator":"EMPTY_TUPLE_SOURCE", > > "execution-mode":"PARTITIONED" > > } > > ] > > } > > ] > > } > > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > , { > "operator":"exchange", > "operatorId" : "1.20", > "physical-operator":"ONE_TO_ONE_EXCHANGE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"replicate", > "operatorId" : "1.12", > "physical-operator":"REPLICATE", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"exchange", > "operatorId" : "1.13", > > "physical-operator":"HASH_PARTITION_EXCHANGE [$$15]", > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"assign", > "variables" :["$$15"], > > "expressions":"$$second.getField(0).getField(0)", > "operatorId" : "1.14", > > "physical-operator":"ASSIGN", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > "operator":"project", > "variables" > :["$$second"], > "operatorId" : "1.15", > > "physical-operator":"STREAM_PROJECT", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"exchange", > "operatorId" : > "1.16", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"data-scan", > "variables" > :["$$13","$$second"], > > "data-source":"Facebook.Friendship", > > "operatorId" : "1.17", > > "physical-operator":"DATASOURCE_SCAN", > > "execution-mode":"PARTITIONED", > "inputs":[ > { > > "operator":"exchange", > > "operatorId" : "1.18", > > "physical-operator":"ONE_TO_ONE_EXCHANGE", > > "execution-mode":"PARTITIONED", > > "inputs":[ > { > > "operator":"empty-tuple-source", > > "operatorId" : "1.19", > > "physical-operator":"EMPTY_TUPLE_SOURCE", > > "execution-mode":"PARTITIONED" > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } > ] > } -- This message was sent by Atlassian JIRA (v6.4.14#64029)