http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-let.aql deleted file mode 100644 index 82b1cd5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-let.aql +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm"; - -for $o in dataset('DBLP') -let $jacc := similarity-jaccard-check(word-tokens($o.nested.title), word-tokens("Transactions for Cooperative Environments"), 0.5f) -where $jacc[0] -return $o -
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-multi-let.aql deleted file mode 100644 index 26b42ed..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-complex/word-jaccard-check-multi-let.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm"; - -// This test is complex because we have three assigns to drill into. -for $paper in dataset('DBLP') -let $paper_tokens := word-tokens($paper.nested.title) -let $query_tokens := word-tokens("Transactions for Cooperative Environments") -let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f) -where $jacc[0] -return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql deleted file mode 100644 index d24c284..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. - * Issue : 730, 741 - * Expected Res : Success - * Date : 8th May 2014 - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageNestedType as open { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - countA: int32, - countB: int32 -} - -create type TweetMessageType as open { - nested: TweetMessageNestedType -} - -create dataset TweetMessages(TweetMessageType) -primary key nested.tweetid; - -create index msgNgramIx on TweetMessages(nested.message-text: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm"; - -for $t1 in dataset('TweetMessages') -where $t1.nested.tweetid > int64("240") -order by $t1.nested.tweetid -return { - "tweet": {"id": $t1.nested.tweetid, "topics" : $t1.nested.message-text} , - "similar-tweets": for $t2 in dataset('TweetMessages') - let $sim := edit-distance-check($t1.nested.message-text, $t2.nested.message-text, 7) - where $sim[0] and - $t2.nested.tweetid != $t1.nested.tweetid - order by $t2.nested.tweetid - return {"id": $t2.nested.tweetid, "topics" : $t2.nested.message-text} -}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_02.aql deleted file mode 100644 index 5fc32d5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_02.aql +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on CSX(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where edit-distance-check($a.nested.authors, $b.nested.authors, 3)[0] and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_03.aql deleted file mode 100644 index 9758b48..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_03.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on the edit-distance-check function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance-check($a.nested.authors, $b.nested.authors, 3)[0] and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_04.aql deleted file mode 100644 index 3291f52..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-check_04.aql +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index_DBLP on DBLP(nested.authors: string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance-check($a.nested.authors, $b.nested.authors, 3)[0] and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-inline.aql deleted file mode 100644 index b7ffdd4..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance-inline.aql +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $ed := edit-distance($a.nested.authors, $b.nested.authors) -where $ed < 3 and $a.nested.id < $b.nested.id -return {"aauthors": $a.nested.authors, "bauthors": $b.nested.authors, "ed": $ed} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_02.aql deleted file mode 100644 index 4d1010a..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_02.aql +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on CSX(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where edit-distance($a.nested.authors, $b.nested.authors) < 3 and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_03.aql deleted file mode 100644 index eb55bb1..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_03.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on the edit-distance function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance($a.nested.authors, $b.nested.authors) < 3 and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_04.aql deleted file mode 100644 index e546740..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-edit-distance_04.aql +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index_DBLP on DBLP(nested.authors: string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance($a.nested.authors, $b.nested.authors) < 3 and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql deleted file mode 100644 index 081f022..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type CSXTypetmp as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on DBLP(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $b in dataset('CSX') -for $a in dataset('DBLP') -where $a.nested.authors ~= $b.nested.authors and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql deleted file mode 100644 index cd29e65..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on ~= using edit distance of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.authors: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where $a.nested.authors ~= $b.nested.authors and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql deleted file mode 100644 index 2f7c560..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on CSX(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_01.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where gram-tokens($a.nested.title, 3, false) ~= gram-tokens($b.nested.title, 3, false) and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql deleted file mode 100644 index 5e12569..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on ~= using Jaccard of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_01.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where gram-tokens($a.nested.title, 3, false) ~= gram-tokens($b.nested.title, 3, false) and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_02.aql deleted file mode 100644 index 5576768..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_02.aql +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on CSX(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where similarity-jaccard-check(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_03.aql deleted file mode 100644 index 32829fb..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_03.aql +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on the similarity-jaccard-check function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard-check(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_04.aql deleted file mode 100644 index dc4c210..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-check_04.aql +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index_DBLP on DBLP(nested.title: string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-inline.aql deleted file mode 100644 index bd599b4..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard-inline.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-jaccard-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false)) -where $jacc >= 0.5f and $a.nested.id < $b.nested.id -return {"atitle": $a.nested.title, "btitle": $b.nested.title, "jacc": $jacc} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_02.aql deleted file mode 100644 index e9f491b..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_02.aql +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index on CSX(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_03.aql deleted file mode 100644 index e064c03..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_03.aql +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on the similarity-jaccard function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index ngram_index on DBLP(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_04.aql deleted file mode 100644 index b08eabf..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/ngram-jaccard_04.aql +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index ngram_index_DBLP on DBLP(nested.title: string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(nested.title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_02.aql deleted file mode 100644 index e19b4af..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index keyword_index on CSX(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_01.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where word-tokens($a.nested.title) ~= word-tokens($b.nested.title) and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_03.aql deleted file mode 100644 index 5fd64c9..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-fuzzyeq-jaccard_03.aql +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on ~= using Jaccard of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_01.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where word-tokens($a.nested.title) ~= word-tokens($b.nested.title) and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check-after-btree-access.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check-after-btree-access.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check-after-btree-access.aql deleted file mode 100644 index 9ef8bb2..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check-after-btree-access.aql +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, TweetMessages, based on the similarity-jaccard-check function of its text-messages' word tokens. - * TweetMessages has a keyword index on text-message and btree index on the primary key tweetid, and we expect the join to be - * transformed into btree and inverted indexed nested-loop joins. We test whether the join condition can be transformed into - * multiple indexed nested loop joins of various type of indexes. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageNestedType as open { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - countA: int32, - countB: int32 -} - -create type TweetMessageType as closed { - nested: TweetMessageNestedType -} - -create dataset TweetMessages(TweetMessageType) -primary key nested.tweetid; - -create index twmSndLocIx on TweetMessages(nested.sender-location) type rtree; -create index msgCountAIx on TweetMessages(nested.countA) type btree; -create index msgCountBIx on TweetMessages(nested.countB) type btree; -create index msgTextIx on TweetMessages(nested.message-text: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check-after-btree-access.adm"; - -for $t1 in dataset('TweetMessages') -for $t2 in dataset('TweetMessages') -let $sim := similarity-jaccard-check(word-tokens($t1.nested.message-text), word-tokens($t2.nested.message-text), 0.6f) -where $sim[0] and $t1.nested.tweetid < int64("20") and $t2.nested.tweetid != $t1.nested.tweetid -return { - "t1": $t1.nested.tweetid, - "t2": $t2.nested.tweetid, - "sim": $sim[1] -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_02.aql deleted file mode 100644 index 583dda6..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_02.aql +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index keyword_index on CSX(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where similarity-jaccard-check(word-tokens($a.nested.title), word-tokens($b.nested.title), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_03.aql deleted file mode 100644 index 2cff649..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_03.aql +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Selg joins dataset DBLP, based on the similarity-jaccard-check function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index_DBLP on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard-check(word-tokens($a.nested.title), word-tokens($b.nested.title), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_04.aql deleted file mode 100644 index fe7b2f9..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-check_04.aql +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -create index keyword_index on CSX(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(word-tokens($a.nested.title), word-tokens($b.nested.title), 0.5f)[0] - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-inline.aql deleted file mode 100644 index 2ec1846..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard-inline.aql +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_word-jaccard-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title)) -where $jacc >= 0.5f and $a.nested.id < $b.nested.id -return {"atitle": $a.nested.title, "btitle": $b.nested.title, "jacc": $jacc} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_02.aql deleted file mode 100644 index 2d52ae9..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_02.aql +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index keyword_index on CSX(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_01.adm"; - -for $b in dataset('DBLP') -for $a in dataset('CSX') -where similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_03.aql deleted file mode 100644 index 456ad8c..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_03.aql +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Self joins dataset DBLP, based on the similarity-jaccard function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_04.aql deleted file mode 100644 index 8812d81..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/nested-open-index/inverted-index-join/word-jaccard_04.aql +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPTypetmp as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXTypetmp as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create type DBLPType as closed { - nested : DBLPTypetmp -} - -create type CSXType as closed { - nested : CSXTypetmp -} - -create dataset DBLP(DBLPType) primary key nested.id; - -create dataset CSX(CSXType) primary key nested.id; - -create index keyword_index on DBLP(nested.title: string?) type keyword enforced; - -create index keyword_index on CSX(nested.title: string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title)) >= 0.5f - and $a.nested.id < $b.nested.id -return {"arec": $a, "brec": $b } -