[ 
https://issues.apache.org/jira/browse/ASTERIXDB-1544?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15391382#comment-15391382
 ] 

Wenhai commented on ASTERIXDB-1544:
-----------------------------------

Done.

> Omit the fuzzyjoin on inverted index
> ------------------------------------
>
>                 Key: ASTERIXDB-1544
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1544
>             Project: Apache AsterixDB
>          Issue Type: Bug
>         Environment: MAC/linux
>            Reporter: Wenhai
>            Assignee: Wenhai
>            Priority: Critical
>
> In the current master, we have NO testCases covering the fuzzyjoin on the 
> (inverted) indexed field. Once we trigger a fuzzy join "~=" on a indexed 
> field, we will always get a error with following log.
> Schema
> {noformat}
> drop dataverse fuzzyjointest if exists;
> create dataverse fuzzyjointest;
> use dataverse fuzzyjointest;
> create type DBLPType as open {
>   tid: uuid,
>   id: int64,
>   dblpid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create type CSXType as closed {
>   tid: uuid,
>   id: int64,
>   csxid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create dataset DBLP(DBLPType) primary key tid autogenerated;
> create dataset CSX(CSXType) primary key tid autogenerated;
> load dataset DBLP
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"))
>  pre-sorted;
> load dataset CSX
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
> use dataverse fuzzyjointest;
> drop index DBLP.title_index if exists;
> create index title_index on DBLP(title) type keyword;
> drop index DBLP.author_index if exists;
> create index author_index on DBLP(authors) type keyword;
> drop index CSX.csx_author_index if exists;
> create index csx_author_index on CSX(authors) type keyword;
> {noformat}
> Query
> {noformat}
> use dataverse fuzzyjointest;
> set simthreshold '.7f'
> for $o in dataset('DBLP')
> for $t in dataset('CSX')
> where word-tokens($o.authors) ~= word-tokens($t.authors)
> return {"cid": $t.id, "did": $o.id}
> {noformat}
> Plan
> {noformat}
> distribute result [%0->$$9]
> -- DISTRIBUTE_RESULT  |PARTITIONED|
>   exchange 
>   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>     project ([$$9])
>     -- STREAM_PROJECT  |PARTITIONED|
>       assign [$$9] <- [function-call: asterix:closed-record-constructor, 
> Args:[AString: {cid}, %0->$$18, AString: {did}, %0->$$19]]
>       -- ASSIGN  |PARTITIONED|
>         project ([$$18, $$19])
>         -- STREAM_PROJECT  |PARTITIONED|
>           exchange 
>           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>             join (function-call: algebricks:eq, Args:[%0->$$22, %0->$$12])
>             -- HYBRID_HASH_JOIN [$$22][$$12]  |PARTITIONED|
>               exchange 
>               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                 project ([$$19, $$22])
>                 -- STREAM_PROJECT  |PARTITIONED|
>                   assign [$$19] <- [function-call: 
> asterix:field-access-by-index, Args:[%0->$$0, AInt32: {1}]]
>                   -- ASSIGN  |PARTITIONED|
>                     exchange 
>                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                       data-scan []<-[$$22, $$0] <- fuzzyjointest:DBLP
>                       -- DATASOURCE_SCAN  |PARTITIONED|
>                         exchange 
>                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                           empty-tuple-source
>                           -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
>               exchange 
>               -- HASH_PARTITION_EXCHANGE [$$12]  |PARTITIONED|
>                 project ([$$18, $$12])
>                 -- STREAM_PROJECT  |PARTITIONED|
>                   select (function-call: asterix:get-item, 
> Args:[function-call: asterix:similarity-jaccard-check, Args:[%0->$$14, 
> function-call: asterix:word-tokens, Args:[%0->$$17], AFloat: {0.7}], AInt32: 
> {0}])
>                   -- STREAM_SELECT  |PARTITIONED|
>                     project ([$$17, $$18, $$12, $$14])
>                     -- STREAM_PROJECT  |PARTITIONED|
>                       assign [$$18, $$17] <- [function-call: 
> asterix:field-access-by-index, Args:[%0->$$1, AInt32: {1}], function-call: 
> asterix:field-access-by-index, Args:[%0->$$1, AInt32: {4}]]
>                       -- ASSIGN  |PARTITIONED|
>                         project ([$$1, $$12, $$14])
>                         -- STREAM_PROJECT  |PARTITIONED|
>                           exchange 
>                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                             unnest-map [$$13, $$1] <- function-call: 
> asterix:index-search, Args:[AString: {CSX}, AInt32: {0}, AString: 
> {fuzzyjointest}, AString: {CSX}, ABoolean: {true}, ABoolean: {false}, AInt32: 
> {1}, %0->$$26, AInt32: {1}, %0->$$26, TRUE, TRUE, TRUE]
>                             -- BTREE_SEARCH  |PARTITIONED|
>                               exchange 
>                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                 order (ASC, %0->$$26) 
>                                 -- STABLE_SORT [$$26(ASC)]  |PARTITIONED|
>                                   exchange 
>                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                     unnest-map [$$26] <- function-call: 
> asterix:index-search, Args:[AString: {csx_author_index}, AInt32: {4}, 
> AString: {fuzzyjointest}, AString: {CSX}, ABoolean: {true}, ABoolean: {true}, 
> AInt32: {1}, AFloat: {0.7}, AInt32: {25}, AInt32: {1}, %0->$$14]
>                                     -- 
> LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH  |PARTITIONED|
>                                       exchange 
>                                       -- BROADCAST_EXCHANGE  |PARTITIONED|
>                                         project ([$$12, $$14])
>                                         -- STREAM_PROJECT  |PARTITIONED|
>                                           assign [$$14] <- [function-call: 
> asterix:word-tokens, Args:[function-call: asterix:field-access-by-index, 
> Args:[%0->$$25, AInt32: {4}]]]
>                                           -- ASSIGN  |PARTITIONED|
>                                             exchange 
>                                             -- ONE_TO_ONE_EXCHANGE  
> |PARTITIONED|
>                                               data-scan []<-[$$12, $$25] <- 
> fuzzyjointest:DBLP
>                                               -- DATASOURCE_SCAN  
> |PARTITIONED|
>                                                 exchange 
>                                                 -- ONE_TO_ONE_EXCHANGE  
> |PARTITIONED|
>                                                   empty-tuple-source
>                                                   -- EMPTY_TUPLE_SOURCE  
> |PARTITIONED|
> {noformat}
> The same error occurs similar to issue-1487.
> {noformat}
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
>       at 
> org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
>       at 
> org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: 
> null
>       at 
> org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:319)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:297)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
>       ... 4 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
>       ... 5 more
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
>       at 
> org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
>       at 
> org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: 
> null
>       at 
> org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:319)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:297)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
>       ... 4 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
>       ... 5 more
> Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
> SEVERE: Job failed on account of:
> HYR0002: null
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
>       at 
> org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
>       at 
> org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: 
> null
>       at 
> org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:319)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
>       at org.apache.hyracks.control.nc.Task.run(Task.java:297)
>       ... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
>       at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
>       ... 4 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
>       at 
> org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
>       ... 5 more
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to