>From <[email protected]>:
[email protected] has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388 )
Change subject: [ASTERIXDB-3392] Add UUID type in COPY TO parquet
......................................................................
[ASTERIXDB-3392] Add UUID type in COPY TO parquet
Ext-ref: MB-65168
Details:
Support UUID type in copy to parquet, and error out when type is not supported.
Change-Id: Ib65eaf9c1a16fb9f97bb38ddf9c03bceec1bec46
---
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M
hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
M
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
M
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
M
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
12 files changed, 72 insertions(+), 6 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/88/19388/1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
index d2a376c..8e0b9e8 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
@@ -19,5 +19,5 @@
use test;
-insert into TestCollection({"id":18, "name": "Virat" ,
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" :
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00")
});
+insert into TestCollection({"id":18, "name": "Virat" ,
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" :
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") ,
"uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") });
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
new file mode 100644
index 0000000..e956812
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+COPY (
+select id,name from TestCollection c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
+TYPE ( { id:int, rect: rectangle })
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet",
+ "max-schemas" : "2"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
index ec1ac0c..4cd2ec7 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
@@ -29,5 +29,5 @@
insert into TestCollection({"id":`day-time-duration`("-P3829H849.392S"),
"name": "Alex"});
*/
-insert into TestCollection({"id":18, "name": "Virat" ,
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" :
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00")
});
+insert into TestCollection({"id":18, "name": "Virat" ,
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" :
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") ,
"uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") });
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
index a85e188..042fa99 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
@@ -24,7 +24,7 @@
) toWriter
TO %adapter%
PATH (%pathprefix% "copy-to-result", "parquet-cover-data-types")
-TYPE ( { name : string, id : int, dateType : date, timeType : time,
boolType : boolean, doubleType : double, datetimeType : datetime } )
+TYPE ( { name : string, id : int, dateType : date, timeType : time,
boolType : boolean, doubleType : double, datetimeType : datetime , uuidType
: uuid } )
WITH {
%template_colons%,
%additionalProperties%
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
index 8fc863e..dd54be4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
@@ -1 +1 @@
-{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType":
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType":
datetime("1900-02-01T00:00:00.000") }
\ No newline at end of file
+{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType":
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType":
datetime("1900-02-01T00:00:00.000"), "uuidType":
uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
index 8fc863e..dd54be4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
@@ -1 +1 @@
-{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType":
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType":
datetime("1900-02-01T00:00:00.000") }
\ No newline at end of file
+{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType":
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType":
datetime("1900-02-01T00:00:00.000"), "uuidType":
uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index da33b2e..1a05334 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -210,6 +210,7 @@
<expected-error>ASX1209: Maximum value allowed for 'max-schemas' is
10. Found 15</expected-error>
<expected-error>HYR0133: Schema could not be inferred, empty types
found in the result</expected-error>
<expected-error>HYR0134: Schema Limit exceeded, maximum number of
heterogeneous schemas allowed : '2'</expected-error>
+ <expected-error>ASX1204: 'rectangle' type not supported in parquet
format</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="copy-to/negative">
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
index 0dcdb3a..1eb8e84 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
@@ -37,10 +37,12 @@
Map.entry(ATypeTag.DOUBLE,
PrimitiveType.PrimitiveTypeName.DOUBLE),
Map.entry(ATypeTag.DATE,
PrimitiveType.PrimitiveTypeName.INT32),
Map.entry(ATypeTag.TIME,
PrimitiveType.PrimitiveTypeName.INT32),
- Map.entry(ATypeTag.DATETIME,
PrimitiveType.PrimitiveTypeName.INT64));
+ Map.entry(ATypeTag.DATETIME,
PrimitiveType.PrimitiveTypeName.INT64),
+ Map.entry(ATypeTag.UUID,
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY));
public static final Map<ATypeTag, LogicalTypeAnnotation>
LOGICAL_TYPE_ANNOTATION_MAP =
Map.ofEntries(Map.entry(ATypeTag.STRING,
LogicalTypeAnnotation.stringType()),
+ Map.entry(ATypeTag.UUID, LogicalTypeAnnotation.uuidType()),
Map.entry(ATypeTag.DATE, LogicalTypeAnnotation.dateType()),
Map.entry(ATypeTag.TIME,
LogicalTypeAnnotation.timeType(true,
LogicalTypeAnnotation.TimeUnit.MILLIS)),
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index 055c635..6dc4a7b 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -104,6 +104,9 @@
public Void visit(FlatLazyVisitablePointable pointable,
ParquetSchemaTree.SchemaNode schemaNode)
throws HyracksDataException {
if (schemaNode.getType() == null) {
+ if
(!AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.containsKey(pointable.getTypeTag()))
{
+ throw new
HyracksDataException(ErrorCode.TYPE_UNSUPPORTED_PARQUET,
pointable.getTypeTag());
+ }
schemaNode.setType(new ParquetSchemaTree.FlatType(
AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.get(pointable.getTypeTag()),
AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP.get(pointable.getTypeTag())));
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 0390315..04e11f7 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -19,6 +19,7 @@
package org.apache.asterix.external.writer.printer.parquet;
import java.io.IOException;
+import java.util.UUID;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.exceptions.RuntimeDataException;
@@ -186,6 +187,10 @@
case DATETIME:
long dateTimeValue =
ADateTimeSerializerDeserializer.getChronon(b, s);
addIntegerType(dateTimeValue, primitiveTypeName, typeTag,
recordConsumer);
+ break;
+ case UUID:
+ recordConsumer.addBinary(Binary.fromReusedByteArray(b, s, l));
+ break;
case NULL:
case MISSING:
break;
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 7cb107d..9fb7177 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -162,6 +162,7 @@
EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA(132),
EMPTY_TYPE_INFERRED(133),
SCHEMA_LIMIT_EXCEEDED(134),
+ TYPE_UNSUPPORTED_PARQUET(135),
// Compilation error codes.
RULECOLLECTION_NOT_INSTANCE_OF_LIST(10000),
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index e1fbe30..78fee94 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -152,6 +152,7 @@
132 = Extra field in the result, field '%1$s' does not exist at '%2$s' in the
schema
133 = Schema could not be inferred, empty types found in the result
134 = Schema Limit exceeded, maximum number of heterogeneous schemas allowed :
'%1$s'
+135 = Type '%1$s' is not supported for parquet writing
10000 = The given rule collection %1$s is not an instance of the List class.
10001 = Cannot compose partition constraint %1$s with %2$s
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ib65eaf9c1a16fb9f97bb38ddf9c03bceec1bec46
Gerrit-Change-Number: 19388
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange