This is an automated email from the ASF dual-hosted git repository. lpinter pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 57c9f4834cc HIVE-26298: Selecting complex types on migrated iceberg table does not work. (#3361) (Laszlo Pinter, reviewed by Peter Vary) 57c9f4834cc is described below commit 57c9f4834cc6494c2381b630a38e1a3d0d75ce49 Author: László Pintér <47777102+lcspin...@users.noreply.github.com> AuthorDate: Thu Jun 16 08:10:53 2022 +0200 HIVE-26298: Selecting complex types on migrated iceberg table does not work. (#3361) (Laszlo Pinter, reviewed by Peter Vary) --- .../apache/iceberg/hive/HiveSchemaConverter.java | 11 +-- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 44 ++++++++++-- .../hive/vector/ParquetSchemaFieldNameVisitor.java | 16 +++-- .../iceberg/mr/hive/TestHiveIcebergMigration.java | 39 ++++++++++ .../queries/positive/vectorized_iceberg_read_orc.q | 36 +++++++++- .../positive/vectorized_iceberg_read_parquet.q | 36 +++++++++- .../llap/vectorized_iceberg_read_orc.q.out | 83 ++++++++++++++++++++++ .../llap/vectorized_iceberg_read_parquet.q.out | 83 ++++++++++++++++++++++ .../positive/vectorized_iceberg_read_orc.q.out | 83 ++++++++++++++++++++++ .../positive/vectorized_iceberg_read_parquet.q.out | 83 ++++++++++++++++++++++ 10 files changed, 498 insertions(+), 16 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java index 9122577d417..7b8b3f2485a 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java @@ -63,8 +63,10 @@ class HiveSchemaConverter { List<Types.NestedField> convertInternal(List<String> names, List<TypeInfo> typeInfos, List<String> comments) { List<Types.NestedField> result = Lists.newArrayListWithExpectedSize(names.size()); + int outerId = id + names.size(); + id = outerId; for (int i = 0; i < names.size(); ++i) { - result.add(Types.NestedField.optional(id++, names.get(i), convertType(typeInfos.get(i)), + result.add(Types.NestedField.optional(outerId - names.size() + i, names.get(i), convertType(typeInfos.get(i)), comments.isEmpty() || i >= comments.size() ? null : comments.get(i))); } @@ -131,15 +133,16 @@ class HiveSchemaConverter { return Types.StructType.of(fields); case MAP: MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - Type keyType = convertType(mapTypeInfo.getMapKeyTypeInfo()); - Type valueType = convertType(mapTypeInfo.getMapValueTypeInfo()); int keyId = id++; + Type keyType = convertType(mapTypeInfo.getMapKeyTypeInfo()); int valueId = id++; + Type valueType = convertType(mapTypeInfo.getMapValueTypeInfo()); return Types.MapType.ofOptional(keyId, valueId, keyType, valueType); case LIST: ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + int listId = id++; Type listType = convertType(listTypeInfo.getListElementTypeInfo()); - return Types.ListType.ofOptional(id++, listType); + return Types.ListType.ofOptional(listId, listType); case UNION: default: throw new IllegalArgumentException("Unknown type " + typeInfo.getCategory()); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index c693c941e4f..74d75f57416 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -796,29 +796,61 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H * <li>fileformat is set to avro</li> * <li>querying metadata tables</li> * <li>fileformat is set to ORC, and table schema has time type column</li> + * <li>fileformat is set to PARQUET, and table schema has a list type column, that has a complex type element</li> * </ul> * @param tableProps table properties, must be not null */ private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) { + Schema tableSchema = SchemaParser.fromJson(tableProps.getProperty(InputFormatConfig.TABLE_SCHEMA)); if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) || FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)) || (tableProps.containsKey("metaTable") && isValidMetadataTable(tableProps.getProperty("metaTable"))) || - hasOrcTimeInSchema(tableProps)) { + hasOrcTimeInSchema(tableProps, tableSchema) || + !hasParquetListColumnSupport(tableProps, tableSchema)) { conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false); } } - // Iceberg Time type columns are written as longs into ORC files. There is no Time type in Hive, so it is represented - // as String instead. For ORC there's no automatic conversion from long to string during vectorized reading such as - // for example in Parquet (in Parquet files Time type is an int64 with 'time' logical annotation). - private static boolean hasOrcTimeInSchema(Properties tableProps) { + /** + * Iceberg Time type columns are written as longs into ORC files. There is no Time type in Hive, so it is represented + * as String instead. For ORC there's no automatic conversion from long to string during vectorized reading such as + * for example in Parquet (in Parquet files Time type is an int64 with 'time' logical annotation). + * @param tableProps iceberg table properties + * @param tableSchema iceberg table schema + * @return + */ + private static boolean hasOrcTimeInSchema(Properties tableProps, Schema tableSchema) { if (!FileFormat.ORC.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) { return false; } - Schema tableSchema = SchemaParser.fromJson(tableProps.getProperty(InputFormatConfig.TABLE_SCHEMA)); return tableSchema.columns().stream().anyMatch(f -> Types.TimeType.get().typeId() == f.type().typeId()); } + /** + * Vectorized reads of parquet files from columns with list type is only supported if the element is a primitive type + * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details + * @param tableProps iceberg table properties + * @param tableSchema iceberg table schema + * @return + */ + private static boolean hasParquetListColumnSupport(Properties tableProps, Schema tableSchema) { + if (!FileFormat.PARQUET.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) { + return true; + } + + for (Types.NestedField field : tableSchema.columns()) { + if (field.type().isListType()) { + for (Types.NestedField nestedField : field.type().asListType().fields()) { + if (!nestedField.type().isPrimitiveType()) { + return false; + } + } + } + } + + return true; + } + /** * Generates a JobContext for the OutputCommitter for the specific table. * @param configuration The configuration used for as a base of the JobConf diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java index 994fda5bf1f..ff455d8418a 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java @@ -82,7 +82,9 @@ class ParquetSchemaFieldNameVisitor extends TypeWithSchemaVisitor<Type> { if (!isMessageType) { GroupType groupType = new GroupType(Type.Repetition.REPEATED, fieldNames.peek(), types); - typesById.put(struct.getId().intValue(), groupType); + if (struct.getId() != null) { + typesById.put(struct.getId().intValue(), groupType); + } return groupType; } else { return new MessageType("table", types); @@ -98,19 +100,25 @@ class ParquetSchemaFieldNameVisitor extends TypeWithSchemaVisitor<Type> { @Override public Type primitive(org.apache.iceberg.types.Type.PrimitiveType expected, PrimitiveType primitive) { - typesById.put(primitive.getId().intValue(), primitive); + if (primitive.getId() != null) { + typesById.put(primitive.getId().intValue(), primitive); + } return primitive; } @Override public Type list(Types.ListType iList, GroupType array, Type element) { - typesById.put(array.getId().intValue(), array); + if (array.getId() != null) { + typesById.put(array.getId().intValue(), array); + } return array; } @Override public Type map(Types.MapType iMap, GroupType map, Type key, Type value) { - typesById.put(map.getId().intValue(), map); + if (map.getId() != null) { + typesById.put(map.getId().intValue(), map); + } return map; } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java index 59625835dbe..5a684004759 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java @@ -50,6 +50,45 @@ import org.mockito.Mockito; */ public class TestHiveIcebergMigration extends HiveIcebergStorageHandlerWithEngineBase { + @Test + public void testMigrateHiveTableWithComplexTypeColumnsToIceberg() throws TException, InterruptedException { + TableIdentifier identifier = TableIdentifier.of("default", "tbl_complex"); + shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (" + + "a int, " + + "arrayofprimitives array<string>, " + + "arrayofarrays array<array<string>>, " + + "arrayofmaps array<map<string, string>>, " + + "arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, " + + "mapofprimitives map<string, string>, " + + "mapofarrays map<string, array<string>>, " + + "mapofmaps map<string, map<string, string>>, " + + "mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, " + + "structofprimitives struct<something:string, somewhere:string>, " + + "structofarrays struct<names:array<string>, birthdays:array<string>>, " + + "structofmaps struct<map1:map<string, string>, map2:map<string, string>>" + + ") STORED AS %s %s %s", identifier.name(), fileFormat.name(), + testTables.locationForCreateTableSQL(identifier), + testTables.propertiesForCreateTableSQL(ImmutableMap.of()))); + + shell.executeStatement(String.format("INSERT INTO %s VALUES (" + + "1, " + + "array('a','b','c'), " + + "array(array('a'), array('b', 'c')), " + + "array(map('a','b'), map('e','f')), " + + "array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), " + + "named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), " + + "map('a', 'b'), " + + "map('a', array('b','c')), " + + "map('a', map('b','c')), " + + "map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), " + + "named_struct('something', 'a', 'somewhere', 'b'), " + + "named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), " + + "named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) " + + ")", identifier.name())); + + validateMigration(identifier.name()); + } + @Test public void testMigrateHiveTableToIceberg() throws TException, InterruptedException { String tableName = "tbl"; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_orc.q b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_orc.q index 53e6848edf5..ee91b9f64e6 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_orc.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_orc.q @@ -61,6 +61,40 @@ insert into tbl_ice_orc_parted values ('Europe', 'cc', 3, 'Austria'); -- projecting all columns select p1, p2, a, min(b) from tbl_ice_orc_parted group by p1, p2, a; +-- create iceberg table with complex types +create external table tbl_ice_orc_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as orc; + +-- insert some test data +insert into tbl_ice_orc_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))); + +select * from tbl_ice_orc_complex order by a; + drop table tbl_ice_orc; drop table tbl_ice_orc_all_types; -drop table tbl_ice_orc_parted; \ No newline at end of file +drop table tbl_ice_orc_parted; +drop table tbl_ice_orc_complex; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_parquet.q b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_parquet.q index cdf7a1da61a..a5bbe547a0f 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_parquet.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_parquet.q @@ -61,6 +61,40 @@ insert into tbl_ice_parquet_parted values ('Europe', 'cc', 3, 'Austria'); -- projecting all columns select p1, p2, a, min(b) from tbl_ice_parquet_parted group by p1, p2, a; +-- create iceberg table with complex types +create external table tbl_ice_parquet_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as parquet; + +-- insert some test data +insert into tbl_ice_parquet_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))); + +select * from tbl_ice_parquet_complex order by a; + drop table tbl_ice_parquet; drop table tbl_ice_parquet_all_types; -drop table tbl_ice_parquet_parted; \ No newline at end of file +drop table tbl_ice_parquet_parted; +drop table tbl_ice_parquet_complex; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out index be5c7805518..6b2a8780d66 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out @@ -347,6 +347,81 @@ America Canada 2 bb America USA 2 aa Europe Hungary 1 aa Europe Austria 3 cc +PREHOOK: query: create external table tbl_ice_orc_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: create external table tbl_ice_orc_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice_orc_complex +PREHOOK: query: insert into tbl_ice_orc_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: insert into tbl_ice_orc_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_orc_complex +PREHOOK: query: select * from tbl_ice_orc_complex order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_orc_complex +#### A masked pattern was here #### +POSTHOOK: query: select * from tbl_ice_orc_complex order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_orc_complex +#### A masked pattern was here #### +1 ["a","b","c"] [["a"],["b","c"]] [{"a":"b"},{"e":"f"}] [{"something":"a","someone":"b","somewhere":"c"},{"something":"e","someone":"f","somewhere":"g"}] {"a":"b"} {"a":["b","c"]} {"a":{"b":"c"}} {"a":{"something":"b","someone":"c","somewhere":"d"}} {"something":"a","somewhere":"b"} {"names":["a","b"],"birthdays":["c","d","e"]} {"map1":{"a":"b"},"map2":{"c":"d"}} PREHOOK: query: drop table tbl_ice_orc PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_orc @@ -371,3 +446,11 @@ POSTHOOK: query: drop table tbl_ice_orc_parted POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: Output: default@tbl_ice_orc_parted +PREHOOK: query: drop table tbl_ice_orc_complex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice_orc_complex +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: drop table tbl_ice_orc_complex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice_orc_complex +POSTHOOK: Output: default@tbl_ice_orc_complex diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out index 358586d43b2..fce8eaa59f2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out @@ -347,6 +347,81 @@ America Canada 2 bb America USA 2 aa Europe Hungary 1 aa Europe Austria 3 cc +PREHOOK: query: create external table tbl_ice_parquet_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: create external table tbl_ice_parquet_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice_parquet_complex +PREHOOK: query: insert into tbl_ice_parquet_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: insert into tbl_ice_parquet_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_parquet_complex +PREHOOK: query: select * from tbl_ice_parquet_complex order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_parquet_complex +#### A masked pattern was here #### +POSTHOOK: query: select * from tbl_ice_parquet_complex order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_parquet_complex +#### A masked pattern was here #### +1 ["a","b","c"] [["a"],["b","c"]] [{"a":"b"},{"e":"f"}] [{"something":"a","someone":"b","somewhere":"c"},{"something":"e","someone":"f","somewhere":"g"}] {"a":"b"} {"a":["b","c"]} {"a":{"b":"c"}} {"a":{"something":"b","someone":"c","somewhere":"d"}} {"something":"a","somewhere":"b"} {"names":["a","b"],"birthdays":["c","d","e"]} {"map1":{"a":"b"},"map2":{"c":"d"}} PREHOOK: query: drop table tbl_ice_parquet PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_parquet @@ -371,3 +446,11 @@ POSTHOOK: query: drop table tbl_ice_parquet_parted POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: Output: default@tbl_ice_parquet_parted +PREHOOK: query: drop table tbl_ice_parquet_complex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice_parquet_complex +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: drop table tbl_ice_parquet_complex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice_parquet_complex +POSTHOOK: Output: default@tbl_ice_parquet_complex diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out index e1d25d5321e..e6b1ceb69b6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out @@ -271,6 +271,81 @@ America Canada 2 bb America USA 2 aa Europe Hungary 1 aa Europe Austria 3 cc +PREHOOK: query: create external table tbl_ice_orc_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: create external table tbl_ice_orc_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice_orc_complex +PREHOOK: query: insert into tbl_ice_orc_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: insert into tbl_ice_orc_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_orc_complex +PREHOOK: query: select * from tbl_ice_orc_complex order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_orc_complex +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from tbl_ice_orc_complex order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_orc_complex +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 ["a","b","c"] [["a"],["b","c"]] [{"a":"b"},{"e":"f"}] [{"something":"a","someone":"b","somewhere":"c"},{"something":"e","someone":"f","somewhere":"g"}] {"a":"b"} {"a":["b","c"]} {"a":{"b":"c"}} {"a":{"something":"b","someone":"c","somewhere":"d"}} {"something":"a","somewhere":"b"} {"names":["a","b"],"birthdays":["c","d","e"]} {"map1":{"a":"b"},"map2":{"c":"d"}} PREHOOK: query: drop table tbl_ice_orc PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_orc @@ -295,3 +370,11 @@ POSTHOOK: query: drop table tbl_ice_orc_parted POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: Output: default@tbl_ice_orc_parted +PREHOOK: query: drop table tbl_ice_orc_complex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice_orc_complex +PREHOOK: Output: default@tbl_ice_orc_complex +POSTHOOK: query: drop table tbl_ice_orc_complex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice_orc_complex +POSTHOOK: Output: default@tbl_ice_orc_complex diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out index 1a1782d7dd1..34faa886d7a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out @@ -271,6 +271,81 @@ America Canada 2 bb America USA 2 aa Europe Hungary 1 aa Europe Austria 3 cc +PREHOOK: query: create external table tbl_ice_parquet_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: create external table tbl_ice_parquet_complex ( + a int, + arrayofprimitives array<string>, + arrayofarrays array<array<string>>, + arrayofmaps array<map<string, string>>, + arrayofstructs array<struct<something:string, someone:string, somewhere:string>>, + mapofprimitives map<string, string>, + mapofarrays map<string, array<string>>, + mapofmaps map<string, map<string, string>>, + mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>, + structofprimitives struct<something:string, somewhere:string>, + structofarrays struct<names:array<string>, birthdays:array<string>>, + structofmaps struct<map1:map<string, string>, map2:map<string, string>> + ) stored by iceberg stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice_parquet_complex +PREHOOK: query: insert into tbl_ice_parquet_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: insert into tbl_ice_parquet_complex values ( + 1, + array('a','b','c'), + array(array('a'), array('b', 'c')), + array(map('a','b'), map('e','f')), + array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), + map('a', 'b'), + map('a', array('b','c')), + map('a', map('b','c')), + map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), + named_struct('something', 'a', 'somewhere', 'b'), + named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), + named_struct('map1', map('a', 'b'), 'map2', map('c', 'd'))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_parquet_complex +PREHOOK: query: select * from tbl_ice_parquet_complex order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_parquet_complex +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from tbl_ice_parquet_complex order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_parquet_complex +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 ["a","b","c"] [["a"],["b","c"]] [{"a":"b"},{"e":"f"}] [{"something":"a","someone":"b","somewhere":"c"},{"something":"e","someone":"f","somewhere":"g"}] {"a":"b"} {"a":["b","c"]} {"a":{"b":"c"}} {"a":{"something":"b","someone":"c","somewhere":"d"}} {"something":"a","somewhere":"b"} {"names":["a","b"],"birthdays":["c","d","e"]} {"map1":{"a":"b"},"map2":{"c":"d"}} PREHOOK: query: drop table tbl_ice_parquet PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_parquet @@ -295,3 +370,11 @@ POSTHOOK: query: drop table tbl_ice_parquet_parted POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: Output: default@tbl_ice_parquet_parted +PREHOOK: query: drop table tbl_ice_parquet_complex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_ice_parquet_complex +PREHOOK: Output: default@tbl_ice_parquet_complex +POSTHOOK: query: drop table tbl_ice_parquet_complex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_ice_parquet_complex +POSTHOOK: Output: default@tbl_ice_parquet_complex