>From Ritik Raj <[email protected]>: Ritik Raj has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21145?usp=email )
Change subject: [ASTERIXDB-3757][EXT] Handle error in parquet empty schema ...................................................................... [ASTERIXDB-3757][EXT] Handle error in parquet empty schema - user model changes: no - storage format changes: no - interface changes: no Details: In case the query returns no result from Copy To, the parquet writer gets empty schema which is illegal for parquet writer, hence throws a runtime exception, which needs to be handled properly as they are surfaced as internal error. ext-ref: MB-71111 Change-Id: I4574a96d43cf964e662b2c49aaf4440fc59aeadc --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java M asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java 7 files changed, 114 insertions(+), 4 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/45/21145/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp new file mode 100644 index 0000000..8d617e0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.01.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; +USE test; + +CREATE COLLECTION temporalCollection PRIMARY KEY (id: string) WITH { + "storage-format": { + "format": "column" + } +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp new file mode 100644 index 0000000..16381d6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.02.update.sqlpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +INSERT INTO temporalCollection( +[ + { + "id": "18", + "name": "Virat", + "dateType": date("1988-11-05"), + "timeType": time("03:10:00.493Z"), + "boolType": false, + "doubleType": 0.75, + "datetimeType": datetime("1900-02-01T00:00:00"), + "durationType": duration("P5Y12M1DT24H24M1.012S"), + "uuidType": uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") + } +]); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp new file mode 100644 index 0000000..21ddc93 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-schema/parquet-empty-schema.03.update.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +COPY ( +select c.* from temporalCollection + ) toWriter +TO %adapter% +PATH (%pathprefix% "copy-to-result", "parquet-empty-schema") +WITH { + %template_colons%, + %additionalProperties% + "format":"parquet" +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index ae4154e..b273d4f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -200,6 +200,18 @@ </compilation-unit> </test-case> <test-case FilePath="copy-to"> + <compilation-unit name="parquet-empty-schema"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground"),' /> + <output-dir compare="Text">parquet-empty-schema</output-dir> + <expected-error>ASX1243: Parquet writer error: Cannot write a schema with an empty group: message asterix_schema { +}</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="copy-to"> <compilation-unit name="parquet-type-hierarchy"> <placeholder name="adapter" value="S3" /> <placeholder name="pathprefix" value="" /> diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java index dd8d358..45dbbdc 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java @@ -348,6 +348,7 @@ NO_VALID_CREDENTIALS_PROVIDED_FOR_BIGLAKE_METASTORE_CATALOG(1240), INVALID_FRAME_BASED_MEMORY_BUDGET(1241), COLLECTION_IS_NOT_AN_ICEBERG_TABLE_COLLECTION(1242), + PARQUET_WRITER_ERROR(1243), // Feed errors DATAFLOW_ILLEGAL_STATE(3001), diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties index 23a66ff..253f5c7 100644 --- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties +++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties @@ -350,6 +350,7 @@ 1240 = No valid credentials provided to access Biglake Metastore catalog. 1241 = Invalid `%1$s` "%2$s" for frame size=%3$s. value should be >= %4$s * frame size: `%1$s` "%5$s" in %6$s 1242 = Collection '%1$s' is not an Iceberg table external collection. +1243 = Parquet writer error: %1$s # Feed Errors 3001 = Illegal state. diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java index 1a3aea1..3ef41b9 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.OutputStream; +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.external.input.record.reader.hdfs.parquet.AsterixParquetRuntimeException; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.writer.printer.parquet.AsterixParquetWriter; @@ -73,8 +75,8 @@ .withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE) .enableDictionaryEncoding().withValidation(false).withWriterVersion(writerVersion).withConf(conf) .build(); - } catch (IOException e) { - throw HyracksDataException.create(e); + } catch (Exception e) { + throw new RuntimeDataException(ErrorCode.PARQUET_WRITER_ERROR, e, e.getMessage()); } } @@ -95,8 +97,8 @@ if (this.writer != null) { try { this.writer.close(); - } catch (IOException e) { - throw HyracksDataException.create(e); + } catch (Exception e) { + throw new RuntimeDataException(ErrorCode.PARQUET_WRITER_ERROR, e, e.getMessage()); } } } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21145?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: newchange Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I4574a96d43cf964e662b2c49aaf4440fc59aeadc Gerrit-Change-Number: 21145 Gerrit-PatchSet: 1 Gerrit-Owner: Ritik Raj <[email protected]>
