>From Preetham Poluparthi <[email protected]>: Preetham Poluparthi has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20395?usp=email )
Change subject: [ASTERIXDB-3392][EXT] Default to STRING for null-typed fields during schema inference in COPY TO parquet ...................................................................... [ASTERIXDB-3392][EXT] Default to STRING for null-typed fields during schema inference in COPY TO parquet - user model changes: no - storage format changes: no - interface changes: no Details: When a compute partition consists entirely of null values, schema inference in copy to parquet defaults to STRING type in parquet. Ext-ref: MB-68610 Change-Id: I60069bf3d44c60effb69e5555bbf7e869327cc5b Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20395 Reviewed-by: Ali Alsuliman <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Ali Alsuliman <[email protected]> --- M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp R asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp C asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java 16 files changed, 105 insertions(+), 150 deletions(-) Approvals: Ali Alsuliman: Looks good to me, approved; Verified Jenkins: Verified diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp index a2dbe7f..f504fd5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp @@ -23,6 +23,7 @@ CREATE COLLECTION TestCollection6(ColumnType1) PRIMARY KEY id; CREATE COLLECTION TestCollection7(ColumnType1) PRIMARY KEY id; +CREATE COLLECTION TestCollection8(ColumnType1) PRIMARY KEY id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp index be198a2..0f80378 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp @@ -17,25 +17,12 @@ * under the License. */ +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; USE test; +CREATE TYPE ColumnType2 AS { +}; - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +CREATE DATASET col2 primary key(id:int); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp similarity index 80% rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp index be198a2..3d5158b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp @@ -20,22 +20,21 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - +INSERT INTO col2 ( + [ +{"id": 1, "name": "aqay awil"}, + {"id": 2} + ] +); COPY ( -select c.* from TestCollection c - ) toWriter + select * from col2 +) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") +PATH (%pathprefix% "copy-to-result", "parquet-null1") WITH { %template_colons%, %additionalProperties% - "format":"parquet" - } - + "format":"parquet", + "version" : "2" +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp index be198a2..ab296ea 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp @@ -20,22 +20,12 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null1"), + ("format" = "parquet"), + ("requireVersionChangeDetection"="false"), + ("include"="*.parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp index be198a2..2254069 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp @@ -20,22 +20,6 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +SELECT * +FROM DatasetCopy c +ORDER BY c.col2.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp index be198a2..8bac75e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp @@ -17,25 +17,12 @@ * under the License. */ +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; USE test; +CREATE TYPE ColumnType2 AS { + }; - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +CREATE DATASET col2 primary key(id:int); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp similarity index 75% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp index be198a2..ceafc41 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp @@ -20,22 +20,22 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - +INSERT INTO col2 ( + [ +{"id": 1, "name": "aqay awil", "centuries" : []}, + {"id": 2, "centuries" : []}, + {"id":3 , "name": null, "centuries" : []} + ] +); COPY ( -select c.* from TestCollection c +select * from col2 ) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") +PATH (%pathprefix% "copy-to-result", "parquet-null2") WITH { %template_colons%, %additionalProperties% - "format":"parquet" - } - + "format":"parquet", + "version" : "2" + }; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp index be198a2..2e6e180 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp @@ -20,22 +20,12 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null2"), + ("format" = "parquet"), + ("requireVersionChangeDetection"="false"), + ("include"="*.parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp similarity index 74% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp index be198a2..2254069 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.15.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp @@ -20,22 +20,6 @@ USE test; - - - - -insert into TestCollection({"id":1, "name": []}); - - - -COPY ( -select c.* from TestCollection c - ) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-error-checks15") -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" - } - +SELECT * +FROM DatasetCopy c +ORDER BY c.col2.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp index 76970a5..abff4f0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp @@ -23,3 +23,5 @@ CREATE TYPE ColumnType2 AS { }; + + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp index 9b21be7..d598e52 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp @@ -21,8 +21,8 @@ COPY ( - select "123" as id -) toWriter +select "123" as id + ) toWriter TO %adapter% PATH (%pathprefix% "copy-to-result", "parquet-simple") TYPE ( {id:string} ) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm new file mode 100644 index 0000000..c1aed87 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm @@ -0,0 +1,2 @@ +{ "c": { "col2": { "name": "aqay awil", "id": 1 } } } +{ "c": { "col2": { "id": 2 } } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm new file mode 100644 index 0000000..29ca9ec --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm @@ -0,0 +1,3 @@ +{ "c": { "col2": { "centuries": [ ], "name": "aqay awil", "id": 1 } } } +{ "c": { "col2": { "centuries": [ ], "id": 2 } } } +{ "c": { "col2": { "centuries": [ ], "id": 3 } } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 5b1d265..a267783 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -70,6 +70,26 @@ </compilation-unit> </test-case> <test-case FilePath="copy-to"> + <compilation-unit name="parquet-null1"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground")' /> + <output-dir compare="Text">parquet-null1</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="copy-to"> + <compilation-unit name="parquet-null2"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground")' /> + <output-dir compare="Text">parquet-null2</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="copy-to"> <compilation-unit name="parquet-tweet"> <placeholder name="adapter" value="S3" /> <placeholder name="pathprefix" value="" /> @@ -278,7 +298,6 @@ <expected-error>ASX1205: Invalid Parquet Writer Version provided '3'. Supported values: [1, 2]</expected-error> <expected-error>ASX0039: Expected integer value, got yvghc (in line 22, at column 6)</expected-error> <expected-error>ASX1209: Maximum value allowed for 'max-schemas' is 10. Found 15</expected-error> - <expected-error>HYR0133: Schema could not be inferred, empty types found in the result</expected-error> <expected-error>HYR0134: Schema Limit exceeded, maximum number of heterogeneous schemas allowed : '2'</expected-error> <expected-error>ASX1204: 'rectangle' type not supported in parquet format</expected-error> <expected-error>ASX0072: Parquet does not support arrays containing mixed data types</expected-error> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java index 5fb2ac7..a764262 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java @@ -38,6 +38,12 @@ public class ParquetSchemaTree { private static final Logger LOGGER = LogManager.getLogger(); + private static final SchemaNode DEFAULT_SCHEMA_NODE_FOR_NULL = new SchemaNode(); + private static final FlatType DEFAULT_TYPE_TAG_FOR_NULL = new FlatType(ATypeTag.STRING); + static { + DEFAULT_SCHEMA_NODE_FOR_NULL.setType(DEFAULT_TYPE_TAG_FOR_NULL); + } + public static class SchemaNode { private AbstractType type; @@ -179,12 +185,13 @@ public static void buildParquetSchema(Types.Builder builder, SchemaNode schemaNode, String columnName) throws HyracksDataException { if (schemaNode.getType() == null) { - LOGGER.info( - "Child type not set for record value with column name: " + LogRedactionUtil.userData(columnName)); - throw new HyracksDataException(ErrorCode.EMPTY_TYPE_INFERRED); + buildFlat(builder, DEFAULT_TYPE_TAG_FOR_NULL, columnName); + return; } AbstractType typeClass = schemaNode.getType(); - if (typeClass instanceof RecordType) { + if (typeClass == null) { + buildFlat(builder, DEFAULT_TYPE_TAG_FOR_NULL, columnName); + } else if (typeClass instanceof RecordType) { buildRecord(builder, (RecordType) schemaNode.getType(), columnName); } else if (typeClass instanceof ListType) { buildList(builder, (ListType) schemaNode.getType(), columnName); @@ -206,10 +213,10 @@ Types.BaseListBuilder<?, ?> childBuilder = getListChild(builder); SchemaNode child = type.child; if (child == null) { - LOGGER.info("Child type not set for list with column name: " + LogRedactionUtil.userData(columnName)); - throw new HyracksDataException(ErrorCode.EMPTY_TYPE_INFERRED); + buildParquetSchema(childBuilder, DEFAULT_SCHEMA_NODE_FOR_NULL, columnName); + } else { + buildParquetSchema(childBuilder, child, columnName); } - buildParquetSchema(childBuilder, child, columnName); } private static void buildFlat(Types.Builder builder, FlatType type, String columnName) throws HyracksDataException { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java index 6e03144..5e911a6 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java @@ -113,13 +113,13 @@ @Override public ISchemaChecker.SchemaComparisonType visit(FlatLazyVisitablePointable currentValue, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { - if (schemaNode.getType() == null) { - return SchemaComparisonType.GROWING; - } // SchemaNode.getTypeTag can never be MISSING here if (currentValue.getTypeTag() == ATypeTag.NULL) { return SchemaComparisonType.EQUIVALENT; } + if (schemaNode.getType() == null) { + return SchemaComparisonType.GROWING; + } if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType inferredType)) { return ISchemaChecker.SchemaComparisonType.CONFLICTING; } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20395?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: phoenix Gerrit-Change-Id: I60069bf3d44c60effb69e5555bbf7e869327cc5b Gerrit-Change-Number: 20395 Gerrit-PatchSet: 10 Gerrit-Owner: Preetham Poluparthi <[email protected]> Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Preetham Poluparthi <[email protected]>
