Yingyi Bu created ASTERIXDB-1411:
------------------------------------
Summary: Index-join is not picked for a LOJ query
Key: ASTERIXDB-1411
URL: https://issues.apache.org/jira/browse/ASTERIXDB-1411
Project: Apache AsterixDB
Issue Type: Bug
Reporter: Yingyi Bu
Assignee: Taewoo Kim
DDL:
{noformat}
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type TwitterUserType as closed {
screen-name: string,
lang: string,
friends-count: int64,
statuses-count: int64,
name: string,
followers-count: int64
}
create type TweetMessageType as open {
tweetid: int64,
user: TwitterUserType,
sender-location: point,
send-time: datetime,
referred-topics: {{ string }},
message-text: string,
countA: int64
}
create dataset TweetMessages(TweetMessageType)
primary key tweetid;
create dataset TweetMessagesTmp(TweetMessageType)
primary key tweetid;
create index msgCountBIx on TweetMessages(countB: int64) type btree enforced;
{noformat}
Query:
{noformat}
use dataverse test;
for $t1 in dataset('TweetMessages')
where $t1.tweetid < int64("10")
order by $t1.tweetid
return {
"tweetid1": $t1.tweetid,
"count1":$t1.countA,
"t2info": for $t2 in dataset('TweetMessages')
where $t1.countA /* +indexnl */= $t2.countB and
$t1.tweetid != $t2.tweetid
order by $t2.tweetid
return {"tweetid2": $t2.tweetid,
"count2":$t2.countB}
};
{noformat}
Query plan:
{noformat}
distribute result [%0->$$28]
-- DISTRIBUTE_RESULT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$28])
-- STREAM_PROJECT |PARTITIONED|
assign [$$28] <- [function-call: asterix:open-record-constructor,
Args:[AString: {tweetid1}, %0->$$41, AString: {count1}, %0->$$35, AString:
{t2info}, %0->$$27]]
-- ASSIGN |PARTITIONED|
exchange
-- SORT_MERGE_EXCHANGE [$$41(ASC) ] |PARTITIONED|
group by ([$$41 := %0->$$31]) decor ([%0->$$35]) {
aggregate [$$27] <- [function-call: asterix:listify,
Args:[function-call: asterix:open-record-constructor, Args:[AString:
{tweetid2}, %0->$$32, AString: {count2}, %0->$$34]]]
-- AGGREGATE |LOCAL|
select (function-call: algebricks:not,
Args:[function-call: algebricks:is-null, Args:[%0->$$40]])
-- STREAM_SELECT |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- PRE_CLUSTERED_GROUP_BY[$$31] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$31) (ASC, %0->$$32)
-- STABLE_SORT [$$31(ASC), $$32(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED|
left outer join (function-call: algebricks:and,
Args:[function-call: algebricks:eq, Args:[%0->$$35, %0->$$34], function-call:
algebricks:neq, Args:[%0->$$31, %0->$$32]])
-- NESTED_LOOP |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$35, $$31])
-- STREAM_PROJECT |PARTITIONED|
assign [$$35] <- [function-call:
asterix:field-access-by-index, Args:[%0->$$5, AInt32: {6}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
unnest-map [$$31, $$5] <- function-call:
asterix:index-search, Args:[AString: {TweetMessages}, AInt32: {0}, AString:
{test}, AString: {TweetMessages}, ABoolean: {false}, ABoolean: {false}, AInt32:
{0}, AInt32: {1}, %0->$$42, TRUE, FALSE, FALSE]
-- BTREE_SEARCH |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
assign [$$42] <- [AInt64: {10}]
-- ASSIGN |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$32, $$34, $$40])
-- STREAM_PROJECT |PARTITIONED|
assign [$$40, $$34] <- [TRUE, function-call:
asterix:field-access-by-name, Args:[%0->$$6, AString: {countB}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$32, $$6] <- test:TweetMessages
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)