This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new aa8891ad648 HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter) aa8891ad648 is described below commit aa8891ad6480b4a50c91a60a2eacd1871128482b Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Wed Jul 20 08:55:16 2022 +0200 HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter) --- .../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 14 ++++++++------ .../org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java | 2 -- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 5f1c9158aab..25881408a63 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.ddl.table.AlterTableType; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -818,7 +819,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)) || (tableProps.containsKey("metaTable") && isValidMetadataTable(tableProps.getProperty("metaTable"))) || hasOrcTimeInSchema(tableProps, tableSchema) || - !hasParquetListColumnSupport(tableProps, tableSchema)) { + !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema)) { conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false); } } @@ -839,20 +840,21 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H } /** - * Vectorized reads of parquet files from columns with list type is only supported if the element is a primitive type - * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details + * Vectorized reads of parquet files from columns with list or map type is only supported if the nested types are of + * primitive type category + * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details on nested types under lists * @param tableProps iceberg table properties * @param tableSchema iceberg table schema * @return */ - private static boolean hasParquetListColumnSupport(Properties tableProps, Schema tableSchema) { + private static boolean hasParquetNestedTypeWithinListOrMap(Properties tableProps, Schema tableSchema) { if (!FileFormat.PARQUET.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) { return true; } for (Types.NestedField field : tableSchema.columns()) { - if (field.type().isListType()) { - for (Types.NestedField nestedField : field.type().asListType().fields()) { + if (field.type().isListType() || field.type().isMapType()) { + for (Types.NestedField nestedField : field.type().asNestedType().fields()) { if (!nestedField.type().isPrimitiveType()) { return false; } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java index efb08c36d95..31a589a7c96 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java @@ -334,8 +334,6 @@ public class TestHiveIcebergInserts extends HiveIcebergStorageHandlerWithEngineB @Test public void testStructMapWithNull() throws IOException { - Assume.assumeTrue("Vectorized parquet read throws class cast exception", - !(fileFormat == FileFormat.PARQUET && isVectorized)); Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "mapofstructs", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.StructType.of(required(5, "something", Types.StringType.get()),