Wenhai created ASTERIXDB-1208:
---------------------------------
Summary: ngram tokenizer failure with negative length
Key: ASTERIXDB-1208
URL: https://issues.apache.org/jira/browse/ASTERIXDB-1208
Project: Apache AsterixDB
Issue Type: Bug
Components: Hyracks Core
Reporter: Wenhai
drop dataverse test if exists;
create dataverse test;
use dataverse test;
create type DBLPOpenType as open {
id: int64,
dblpid: string,
authors: string,
misc: string
}
create dataset DBLPOpen(DBLPOpenType) primary key id;
insert into dataset DBLPOpen { "id": 93, "dblpid":
"journals/iandc/IbarraJCR91", "authors": "Some Classes of Languages in NC¹",
"misc": "2006-04-25 86-106 Inf. Comput. January 1991 90 1
db/journals/iandc/iandc90.html#IbarraJCR91" }
use dataverse test;
set import-private-functions 'true'
for $d in dataset DBLPOpen
where similarity-jaccard(gram-tokens("",3,false),gram-tokens($d.title,3,false))
>= 0.5
return {"rec": $d}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)