Wenhai created ASTERIXDB-1208:
---------------------------------

             Summary: ngram tokenizer failure with negative length
                 Key: ASTERIXDB-1208
                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1208
             Project: Apache AsterixDB
          Issue Type: Bug
          Components: Hyracks Core
            Reporter: Wenhai


drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type DBLPOpenType as open {
  id: int64,
  dblpid: string,
  authors: string,
  misc: string
}
create dataset DBLPOpen(DBLPOpenType) primary key id;
insert into dataset DBLPOpen { "id": 93, "dblpid": 
"journals/iandc/IbarraJCR91", "authors": "Some Classes of Languages in NC¹", 
"misc": "2006-04-25 86-106 Inf. Comput. January 1991 90 1 
db/journals/iandc/iandc90.html#IbarraJCR91" }

use dataverse test;
set import-private-functions 'true'
for $d in dataset DBLPOpen
where similarity-jaccard(gram-tokens("",3,false),gram-tokens($d.title,3,false)) 
>= 0.5
return {"rec": $d}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to