>From Ritik Raj <[email protected]>:

Ritik Raj has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21152?usp=email )


Change subject: [ASTERIXDB-3757][EXT] Handle error in parquet empty schema
......................................................................

[ASTERIXDB-3757][EXT] Handle error in parquet empty schema

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
In case the query returns no result from Copy To,
the parquet writer gets empty schema which is illegal
for parquet writer, hence throws a runtime exception,
which needs to be handled properly as they are
surfaced as internal error.

ext-ref: MB-71111
Change-Id: I4574a96d43cf964e662b2c49aaf4440fc59aeadc
---
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
M asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
7 files changed, 114 insertions(+), 4 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/52/21152/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp
new file mode 100644
index 0000000..8d617e0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE COLLECTION temporalCollection PRIMARY KEY (id: string) WITH {
+    "storage-format": {
+        "format": "column"
+    }
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp
new file mode 100644
index 0000000..16381d6
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+INSERT INTO temporalCollection(
+[
+  {
+    "id": "18",
+    "name": "Virat",
+    "dateType": date("1988-11-05"),
+    "timeType": time("03:10:00.493Z"),
+    "boolType": false,
+    "doubleType": 0.75,
+    "datetimeType": datetime("1900-02-01T00:00:00"),
+    "durationType": duration("P5Y12M1DT24H24M1.012S"),
+    "uuidType": uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344")
+  }
+]);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp
new file mode 100644
index 0000000..21ddc93
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+select c.* from temporalCollection
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-empty-schema")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet"
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 0fa2b99..ca88c3f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -200,6 +200,18 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to">
+      <compilation-unit name="parquet-empty-schema">
+        <placeholder name="adapter" value="S3" />
+        <placeholder name="pathprefix" value="" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additionalProperties" 
value='"container":"playground",' />
+        <placeholder name="additional_Properties" 
value='("container"="playground"),' />
+        <output-dir compare="Text">parquet-empty-schema</output-dir>
+        <expected-error>ASX1244: Parquet writer error: Cannot write a schema 
with an empty group: message asterix_schema {
+}</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="copy-to">
       <compilation-unit name="parquet-type-hierarchy">
         <placeholder name="adapter" value="S3" />
         <placeholder name="pathprefix" value="" />
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 45d7bb2..379e209 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -349,6 +349,7 @@
     INVALID_FRAME_BASED_MEMORY_BUDGET(1241),
     COLLECTION_IS_NOT_AN_ICEBERG_TABLE_COLLECTION(1242),
     NOT_ICEBERG_CATALOG(1243),
+    PARQUET_WRITER_ERROR(1244),
 
     // Feed errors
     DATAFLOW_ILLEGAL_STATE(3001),
diff --git 
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties 
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index a4037ee..425143c 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -351,6 +351,7 @@
 1241 = Invalid `%1$s` "%2$s" for frame size=%3$s. value should be >= %4$s * 
frame size: `%1$s` "%5$s" in %6$s
 1242 = Collection '%1$s' is not an Iceberg table external collection.
 1243 = Catalog '%1$s' is not an Iceberg catalog
+1244 = Parquet writer error: %1$s

 # Feed Errors
 3001 = Illegal state.
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index 1a3aea1..3ef41b9 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -22,6 +22,8 @@
 import java.io.IOException;
 import java.io.OutputStream;

+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
 import 
org.apache.asterix.external.input.record.reader.hdfs.parquet.AsterixParquetRuntimeException;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.writer.printer.parquet.AsterixParquetWriter;
@@ -73,8 +75,8 @@
                     
.withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
                     
.enableDictionaryEncoding().withValidation(false).withWriterVersion(writerVersion).withConf(conf)
                     .build();
-        } catch (IOException e) {
-            throw HyracksDataException.create(e);
+        } catch (Exception e) {
+            throw new RuntimeDataException(ErrorCode.PARQUET_WRITER_ERROR, e, 
e.getMessage());
         }

     }
@@ -95,8 +97,8 @@
         if (this.writer != null) {
             try {
                 this.writer.close();
-            } catch (IOException e) {
-                throw HyracksDataException.create(e);
+            } catch (Exception e) {
+                throw new RuntimeDataException(ErrorCode.PARQUET_WRITER_ERROR, 
e, e.getMessage());
             }
         }
     }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21152?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I4574a96d43cf964e662b2c49aaf4440fc59aeadc
Gerrit-Change-Number: 21152
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>

Reply via email to