Wenhai Li has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1119
Change subject: ASTERIX-1487: fix the wrong plan for inverted fuzzyjoin. The following commits from your working branch will be included: ...................................................................... ASTERIX-1487: fix the wrong plan for inverted fuzzyjoin. The following commits from your working branch will be included: 1. Add the test case of ASTERIX-1487 with half records required. commit 957aaa59f279b53c7901758f88a1820dd8b689c9 Author: Michael <lwhaym...@yahoo.com> Date: Fri Aug 26 06:53:00 2016 -0700 ASTERIX-1487: fix the wrong plan for inverted index. Change-Id: I1aef69a2278853fd9f8020da6639331b367ed5ad --- A asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.1.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.2.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.3.query.aql A asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_4.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml 5 files changed, 108 insertions(+), 0 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/19/1119/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.1.ddl.aql new file mode 100644 index 0000000..811c2b5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.1.ddl.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse fuzzyjoin if exists; + +create dataverse fuzzyjoin; + +use dataverse fuzzyjoin; + +create type DBLPType as open { + id: int64, + dblpid: string?, + title: string?, + authors: string?, + misc: string? +} + +create type CSXType as open { + id: int64, + csxid: string?, + title: string?, + authors: string?, + misc: string? +} + +create dataset DBLP(DBLPType) primary key id; +create dataset CSX(CSXType) primary key id; + +create index author_index on DBLP(authors) type keyword; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.2.update.aql new file mode 100644 index 0000000..fc2fb4b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.2.update.aql @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse fuzzyjoin; + +load dataset DBLP +using localfs +(("path"="asterix_nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")); + +load dataset CSX +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.3.query.aql new file mode 100644 index 0000000..e9fdc2b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_5.3.query.aql @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse fuzzyjointest; + +set simthreshold '.7f' + +let $s := count( +for $t in dataset('CSX') +for $o in dataset('DBLP') +where contains($o.title, "System") and +word-tokens($o.authors) ~= word-tokens($t.authors) +return $t) +return $s diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_4.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_4.1.adm new file mode 100644 index 0000000..00750ed --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/dblp-csx-aqlplus_5/dblp-csx-aqlplus_4.1.adm @@ -0,0 +1 @@ +3 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml index bf1893a..a526676 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -2651,6 +2651,11 @@ </compilation-unit> </test-case> <test-case FilePath="fuzzyjoin"> + <compilation-unit name="dblp-csx-aqlplus_5"> + <output-dir compare="Text">dblp-csx-aqlplus_5</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="fuzzyjoin"> <compilation-unit name="dblp-csx-dblp-aqlplus_1"> <output-dir compare="Text">dblp-csx-dblp-aqlplus_1</output-dir> </compilation-unit> -- To view, visit https://asterix-gerrit.ics.uci.edu/1119 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1aef69a2278853fd9f8020da6639331b367ed5ad Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Wenhai Li <lwhaym...@yahoo.com>