Taewoo Kim has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/740
Change subject: ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field ...................................................................... ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field Change-Id: If58f594c0a7b6f4bca45b13ceaef07b605d2fe22 --- A asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql A asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql A asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql A asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm M asterix-app/src/test/resources/runtimets/testsuite.xml M asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java 6 files changed, 111 insertions(+), 25 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/40/740/1 diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql new file mode 100644 index 0000000..f736ebc --- /dev/null +++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse OpenTinySocial if exists; +create dataverse OpenTinySocial; +use dataverse OpenTinySocial; +create type FacebookMessageType as +{ message-id: int64 } + +create dataset FacebookMessages(FacebookMessageType) +primary key message-id; +create index fbAuthorIdx on FacebookMessages(author-id: int64) type btree enforced; +create index fbSenderLocIndex on FacebookMessages(sender-location: point) type rtree enforced; +create index fbMessageIdx on FacebookMessages(message: string) type keyword enforced; diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql new file mode 100644 index 0000000..f7740dc --- /dev/null +++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse OpenTinySocial; + +load dataset FacebookMessages +using localfs +(("path"="asterix_nc1://data/tinysocial/fbm.adm"),("format"="adm")) pre-sorted; diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql new file mode 100644 index 0000000..d97df77 --- /dev/null +++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse OpenTinySocial; + +count( +for $c in dataset('FacebookMessages') +return $c +); diff --git a/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm new file mode 100644 index 0000000..60d3b2f --- /dev/null +++ b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm @@ -0,0 +1 @@ +15 diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml index 81480fd..115cf92 100644 --- a/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -1764,6 +1764,11 @@ </compilation-unit> </test-case> <test-case FilePath="dml"> + <compilation-unit name="load-with-index-open_02"> + <output-dir compare="Text">load-with-index-open_02</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="dml"> <compilation-unit name="load-with-ngram-index-open"> <output-dir compare="Text">load-with-ngram-index-open</output-dir> </compilation-unit> diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java index 90bec64..5eba66c 100644 --- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java +++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java @@ -589,7 +589,7 @@ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDatasetDataScannerRuntime( JobSpecification jobSpec, IAType itemType, IAdapterFactory adapterFactory, IDataFormat format) - throws AlgebricksException { + throws AlgebricksException { if (itemType.getTypeTag() != ATypeTag.RECORD) { throw new AlgebricksException("Can only scan datasets of records."); } @@ -676,7 +676,8 @@ } Pair<IBinaryComparatorFactory[], ITypeTraits[]> comparatorFactoriesAndTypeTraits = getComparatorFactoriesAndTypeTraitsOfSecondaryBTreeIndex( secondaryIndex.getIndexType(), secondaryIndex.getKeyFieldNames(), - secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType, dataset.getDatasetType()); + secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType, + dataset.getDatasetType()); comparatorFactories = comparatorFactoriesAndTypeTraits.first; typeTraits = comparatorFactoriesAndTypeTraits.second; if (filterTypeTraits != null) { @@ -799,21 +800,21 @@ } for (int j = 0; j < pidxKeyFieldCount; ++j, ++i) { - IAType keyType = null; - try { - switch (dsType) { - case INTERNAL: - keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j)); - break; - case EXTERNAL: - keyType = IndexingConstants.getFieldType(j); - break; - default: - throw new AlgebricksException("Unknown Dataset Type"); - } - } catch (AsterixException e) { - throw new AlgebricksException(e); - } + IAType keyType = null; + try { + switch (dsType) { + case INTERNAL: + keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j)); + break; + case EXTERNAL: + keyType = IndexingConstants.getFieldType(j); + break; + default: + throw new AlgebricksException("Unknown Dataset Type"); + } + } catch (AsterixException e) { + throw new AlgebricksException(e); + } comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType, true); typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType); @@ -1363,7 +1364,7 @@ IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload) - throws AlgebricksException { + throws AlgebricksException { // Sanity checks. if (primaryKeys.size() > 1) { @@ -1467,6 +1468,7 @@ dataset.getDatasetName(), indexName); List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames(); + List<IAType> secondaryKeyTypeEntries = secondaryIndex.getKeyFieldTypes(); int numTokenFields = (!isPartitioned) ? secondaryKeys.size() : secondaryKeys.size() + 1; ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields]; @@ -1476,7 +1478,8 @@ // return the derived type. // e.g. UNORDERED LIST -> return UNORDERED LIST type IAType secondaryKeyType = null; - Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(secondaryKeyExprs.get(0), recType); + Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypeEntries.get(0), + secondaryKeyExprs.get(0), recType); secondaryKeyType = keyPairType.first; List<List<String>> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset); i = 0; @@ -1552,7 +1555,7 @@ IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec) - throws AlgebricksException { + throws AlgebricksException { return getIndexInsertOrDeleteRuntime(IndexOperation.DELETE, dataSourceIndex, propagatedSchema, inputSchemas, typeEnv, primaryKeys, secondaryKeys, additionalNonKeyFields, filterExpr, recordDesc, context, spec, false); @@ -1560,7 +1563,7 @@ private AsterixTupleFilterFactory createTupleFilterFactory(IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, ILogicalExpression filterExpr, JobGenContext context) - throws AlgebricksException { + throws AlgebricksException { // No filtering condition. if (filterExpr == null) { return null; @@ -2233,7 +2236,7 @@ JobSpecification jobSpec, Dataset dataset, Index secondaryIndex, int[] ridIndexes, boolean retainInput, IVariableTypeEnvironment typeEnv, List<LogicalVariable> outputVars, IOperatorSchema opSchema, JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull) - throws AlgebricksException { + throws AlgebricksException { try { // Get data type IAType itemType = null; @@ -2485,7 +2488,7 @@ List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexType indexType, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) - throws AlgebricksException { + throws AlgebricksException { // Check the index is length-partitioned or not. boolean isPartitioned; if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX @@ -2710,7 +2713,7 @@ List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) - throws AlgebricksException { + throws AlgebricksException { try { Dataset dataset = MetadataManager.INSTANCE.getDataset(mdTxnCtx, dataverseName, datasetName); @@ -2864,7 +2867,7 @@ List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) - throws AlgebricksException { + throws AlgebricksException { // we start with the btree Dataset dataset = findDataset(dataverseName, datasetName); if (dataset == null) { -- To view, visit https://asterix-gerrit.ics.uci.edu/740 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If58f594c0a7b6f4bca45b13ceaef07b605d2fe22 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Taewoo Kim <[email protected]>
