>From Ali Alsuliman <[email protected]>: Ali Alsuliman has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388 )
Change subject: [ASTERIXDB-3392] Add UUID type in COPY TO parquet ...................................................................... [ASTERIXDB-3392] Add UUID type in COPY TO parquet Details: Support UUID type in copy to parquet, and error out when type is not supported. Ext-ref: MB-65168 Change-Id: Ib65eaf9c1a16fb9f97bb38ddf9c03bceec1bec46 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388 Integration-Tests: Jenkins <[email protected]> Tested-by: Ali Alsuliman <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java M asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml M asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp 12 files changed, 79 insertions(+), 7 deletions(-) Approvals: Ali Alsuliman: Looks good to me, approved; Verified Jenkins: Verified diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp index d2a376c..8e0b9e8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp @@ -19,5 +19,5 @@ use test; -insert into TestCollection({"id":18, "name": "Virat" , "dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") }); +insert into TestCollection({"id":18, "name": "Virat" , "dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") , "uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp index a95146f..3650f61 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp @@ -24,7 +24,7 @@ ) toWriter TO hdfs PATH ("copy-to-result", "parquet-cover-data-types") -TYPE ( { name : string, id : int, dateType : date, timeType : time, boolType : boolean, doubleType : double, datetimeType : datetime } ) +TYPE ( { name : string, id : int, dateType : date, timeType : time, boolType : boolean, doubleType : double, datetimeType : datetime, uuidType: uuid } ) WITH { "hdfs":"hdfs://127.0.0.1:31888/", "format":"parquet" diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp new file mode 100644 index 0000000..e956812 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + + + +COPY ( +select id,name from TestCollection c + ) toWriter +TO %adapter% +PATH (%pathprefix% "copy-to-result", "parquet-error-checks16") +TYPE ( { id:int, rect: rectangle }) +WITH { + %template_colons%, + %additionalProperties% + "format":"parquet", + "max-schemas" : "2" + } + + + + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp index ec1ac0c..4cd2ec7 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp @@ -29,5 +29,5 @@ insert into TestCollection({"id":`day-time-duration`("-P3829H849.392S"), "name": "Alex"}); */ -insert into TestCollection({"id":18, "name": "Virat" , "dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") }); +insert into TestCollection({"id":18, "name": "Virat" , "dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") , "uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp index a85e188..042fa99 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp @@ -24,7 +24,7 @@ ) toWriter TO %adapter% PATH (%pathprefix% "copy-to-result", "parquet-cover-data-types") -TYPE ( { name : string, id : int, dateType : date, timeType : time, boolType : boolean, doubleType : double, datetimeType : datetime } ) +TYPE ( { name : string, id : int, dateType : date, timeType : time, boolType : boolean, doubleType : double, datetimeType : datetime , uuidType : uuid } ) WITH { %template_colons%, %additionalProperties% diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm index 8fc863e..dd54be4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm @@ -1 +1 @@ -{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": datetime("1900-02-01T00:00:00.000") } \ No newline at end of file +{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": datetime("1900-02-01T00:00:00.000"), "uuidType": uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm index 8fc863e..dd54be4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm @@ -1 +1 @@ -{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": datetime("1900-02-01T00:00:00.000") } \ No newline at end of file +{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": datetime("1900-02-01T00:00:00.000"), "uuidType": uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index da33b2e..1a05334 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -210,6 +210,7 @@ <expected-error>ASX1209: Maximum value allowed for 'max-schemas' is 10. Found 15</expected-error> <expected-error>HYR0133: Schema could not be inferred, empty types found in the result</expected-error> <expected-error>HYR0134: Schema Limit exceeded, maximum number of heterogeneous schemas allowed : '2'</expected-error> + <expected-error>ASX1204: 'rectangle' type not supported in parquet format</expected-error> </compilation-unit> </test-case> <test-case FilePath="copy-to/negative"> diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml index b178efd..f2ec232 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml @@ -258,6 +258,7 @@ <expected-error>ASX1209: Maximum value allowed for 'max-schemas' is 10. Found 15</expected-error> <expected-error>HYR0133: Schema could not be inferred, empty types found in the result</expected-error> <expected-error>HYR0134: Schema Limit exceeded, maximum number of heterogeneous schemas allowed : '2'</expected-error> + <expected-error>ASX1204: 'rectangle' type not supported in parquet format</expected-error> <source-location>false</source-location> </compilation-unit> </test-case> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java index 0dcdb3a..1eb8e84 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java @@ -37,10 +37,12 @@ Map.entry(ATypeTag.DOUBLE, PrimitiveType.PrimitiveTypeName.DOUBLE), Map.entry(ATypeTag.DATE, PrimitiveType.PrimitiveTypeName.INT32), Map.entry(ATypeTag.TIME, PrimitiveType.PrimitiveTypeName.INT32), - Map.entry(ATypeTag.DATETIME, PrimitiveType.PrimitiveTypeName.INT64)); + Map.entry(ATypeTag.DATETIME, PrimitiveType.PrimitiveTypeName.INT64), + Map.entry(ATypeTag.UUID, PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)); public static final Map<ATypeTag, LogicalTypeAnnotation> LOGICAL_TYPE_ANNOTATION_MAP = Map.ofEntries(Map.entry(ATypeTag.STRING, LogicalTypeAnnotation.stringType()), + Map.entry(ATypeTag.UUID, LogicalTypeAnnotation.uuidType()), Map.entry(ATypeTag.DATE, LogicalTypeAnnotation.dateType()), Map.entry(ATypeTag.TIME, LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)), diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java index 055c635..b591175 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java @@ -18,10 +18,12 @@ */ package org.apache.asterix.external.writer.printer.parquet; +import static org.apache.asterix.common.exceptions.ErrorCode.TYPE_UNSUPPORTED_PARQUET_WRITE; import static org.apache.asterix.external.writer.printer.parquet.ParquetSchemaTree.buildParquetSchema; import java.util.Map; +import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable; import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable; import org.apache.asterix.om.lazy.FlatLazyVisitablePointable; @@ -104,6 +106,9 @@ public Void visit(FlatLazyVisitablePointable pointable, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { if (schemaNode.getType() == null) { + if (!AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.containsKey(pointable.getTypeTag())) { + throw RuntimeDataException.create(TYPE_UNSUPPORTED_PARQUET_WRITE, pointable.getTypeTag()); + } schemaNode.setType(new ParquetSchemaTree.FlatType( AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.get(pointable.getTypeTag()), AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP.get(pointable.getTypeTag()))); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java index 0390315..04e11f7 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java @@ -19,6 +19,7 @@ package org.apache.asterix.external.writer.printer.parquet; import java.io.IOException; +import java.util.UUID; import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.common.exceptions.RuntimeDataException; @@ -186,6 +187,10 @@ case DATETIME: long dateTimeValue = ADateTimeSerializerDeserializer.getChronon(b, s); addIntegerType(dateTimeValue, primitiveTypeName, typeTag, recordConsumer); + break; + case UUID: + recordConsumer.addBinary(Binary.fromReusedByteArray(b, s, l)); + break; case NULL: case MISSING: break; -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Ib65eaf9c1a16fb9f97bb38ddf9c03bceec1bec46 Gerrit-Change-Number: 19388 Gerrit-PatchSet: 4 Gerrit-Owner: [email protected] Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-MessageType: merged
