This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 434d7888cde HIVE-27141: Iceberg: Add more iceberg table metadata (#4119). (zhangbutao, reviewed by Ayush Saxena) 434d7888cde is described below commit 434d7888cde077fe49a1df2bf26dc0a517ee3fbb Author: Butao Zhang <zhangbu...@cmss.chinamobile.com> AuthorDate: Tue Mar 21 10:14:29 2023 +0800 HIVE-27141: Iceberg: Add more iceberg table metadata (#4119). (zhangbutao, reviewed by Ayush Saxena) --- .../iceberg/mr/hive/IcebergMetadataTables.java | 3 +- .../positive/describe_iceberg_metadata_tables.q | 20 +- .../query_iceberg_metadata_of_partitioned_table.q | 24 ++ ...query_iceberg_metadata_of_unpartitioned_table.q | 13 + .../describe_iceberg_metadata_tables.q.out | 349 ++++++++++++++++++++- ...ery_iceberg_metadata_of_partitioned_table.q.out | 254 +++++++++++++++ ...y_iceberg_metadata_of_unpartitioned_table.q.out | Bin 29934 -> 34884 bytes 7 files changed, 644 insertions(+), 19 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergMetadataTables.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergMetadataTables.java index 2ea162fc720..d9954b696a3 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergMetadataTables.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergMetadataTables.java @@ -21,7 +21,8 @@ package org.apache.iceberg.mr.hive; public enum IcebergMetadataTables { - ENTRIES, FILES, HISTORY, SNAPSHOTS, MANIFESTS, PARTITIONS, ALL_DATA_FILES, ALL_MANIFESTS, ALL_ENTRIES; + ENTRIES, FILES, HISTORY, SNAPSHOTS, MANIFESTS, PARTITIONS, ALL_DATA_FILES, ALL_MANIFESTS, ALL_ENTRIES, + DATA_FILES, DELETE_FILES, METADATA_LOG_ENTRIES, REFS, ALL_DELETE_FILES, ALL_FILES; public static boolean isValidMetaTable(String metaTableName) { for (IcebergMetadataTables metaTable : IcebergMetadataTables.values()) { diff --git a/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_metadata_tables.q b/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_metadata_tables.q index fbab1c9b890..86d6912b88b 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_metadata_tables.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/describe_iceberg_metadata_tables.q @@ -6,8 +6,6 @@ create external table ice_meta_desc (id int, value string) stored by iceberg sto insert into ice_meta_desc values (1, 'one'),(2,'two'),(3,'three'),(4,'four'),(5,'five'); insert into ice_meta_desc values (3,'three'),(4,'four'),(5,'five'); -describe default.ice_meta_desc; -describe default.ice_meta_desc; describe default.ice_meta_desc; describe default.ice_meta_desc.files; @@ -19,6 +17,12 @@ describe default.ice_meta_desc.partitions; describe default.ice_meta_desc.all_manifests; describe default.ice_meta_desc.all_data_files; describe default.ice_meta_desc.all_entries; +describe default.ice_meta_desc.data_files; +describe default.ice_meta_desc.delete_files; +describe default.ice_meta_desc.metadata_log_entries; +describe default.ice_meta_desc.refs; +describe default.ice_meta_desc.all_delete_files; +describe default.ice_meta_desc.all_files; describe formatted default.ice_meta_desc.files; describe formatted default.ice_meta_desc.entries; @@ -29,6 +33,12 @@ describe formatted default.ice_meta_desc.partitions; describe formatted default.ice_meta_desc.all_manifests; describe formatted default.ice_meta_desc.all_data_files; describe formatted default.ice_meta_desc.all_entries; +describe formatted default.ice_meta_desc.data_files; +describe formatted default.ice_meta_desc.delete_files; +describe formatted default.ice_meta_desc.metadata_log_entries; +describe formatted default.ice_meta_desc.refs; +describe formatted default.ice_meta_desc.all_delete_files; +describe formatted default.ice_meta_desc.all_files; describe extended default.ice_meta_desc.files; describe extended default.ice_meta_desc.entries; @@ -39,5 +49,11 @@ describe extended default.ice_meta_desc.partitions; describe extended default.ice_meta_desc.all_manifests; describe extended default.ice_meta_desc.all_data_files; describe extended default.ice_meta_desc.all_entries; +describe formatted default.ice_meta_desc.data_files; +describe formatted default.ice_meta_desc.delete_files; +describe formatted default.ice_meta_desc.metadata_log_entries; +describe formatted default.ice_meta_desc.refs; +describe formatted default.ice_meta_desc.all_delete_files; +describe formatted default.ice_meta_desc.all_files; drop table ice_meta_desc; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_partitioned_table.q b/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_partitioned_table.q index 90ce35b52df..4f6bbdf397e 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_partitioned_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_partitioned_table.q @@ -49,6 +49,18 @@ select * from default.ice_meta_3.partitions; select `partition` from default.ice_meta_2.partitions where `partition`.b='four'; select * from default.ice_meta_3.partitions where `partition`.b='two' and `partition`.c='Tuesday'; select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday'; +select file_format, spec_id from default.ice_meta_2.data_files; +select file_format, spec_id from default.ice_meta_3.data_files; +select content, upper_bounds from default.ice_meta_2.delete_files; +select content, upper_bounds from default.ice_meta_3.delete_files; +select file from default.ice_meta_2.metadata_log_entries; +select file from default.ice_meta_3.metadata_log_entries; +select name, type from default.ice_meta_2.refs; +select name, type from default.ice_meta_3.refs; +select content, file_format from default.ice_meta_2.all_delete_files; +select content, file_format from default.ice_meta_3.all_delete_files; +select file_format, value_counts from default.ice_meta_2.all_files; +select file_format, value_counts from default.ice_meta_3.all_files; set hive.fetch.task.conversion=more; @@ -68,6 +80,18 @@ select * from default.ice_meta_3.partitions; select `partition` from default.ice_meta_2.partitions where `partition`.b='four'; select * from default.ice_meta_3.partitions where `partition`.b='two' and `partition`.c='Tuesday'; select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday'; +select file_format, spec_id from default.ice_meta_2.data_files; +select file_format, spec_id from default.ice_meta_3.data_files; +select content, upper_bounds from default.ice_meta_2.delete_files; +select content, upper_bounds from default.ice_meta_3.delete_files; +select file from default.ice_meta_2.metadata_log_entries; +select file from default.ice_meta_3.metadata_log_entries; +select name, type from default.ice_meta_2.refs; +select name, type from default.ice_meta_3.refs; +select content, file_format from default.ice_meta_2.all_delete_files; +select content, file_format from default.ice_meta_3.all_delete_files; +select file_format, value_counts from default.ice_meta_2.all_files; +select file_format, value_counts from default.ice_meta_3.all_files; drop table ice_meta_2; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_unpartitioned_table.q b/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_unpartitioned_table.q index 03efa29f2dd..6e416cb347b 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_unpartitioned_table.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/query_iceberg_metadata_of_unpartitioned_table.q @@ -39,6 +39,12 @@ select null_value_counts[1] from default.ice_meta_1.all_data_files; select lower_bounds[2], upper_bounds[2] from default.ice_meta_1.all_data_files; select data_file.record_count as record_count, data_file.value_counts[2] as value_counts, data_file.null_value_counts[2] as null_value_counts from default.ice_meta_1.all_entries; +select file_format, spec_id from default.ice_meta_1.data_files; +select content, upper_bounds from default.ice_meta_1.delete_files; +select file from default.ice_meta_1.metadata_log_entries; +select name, type from default.ice_meta_1.refs; +select content, file_format from default.ice_meta_1.all_delete_files; +select file_format, value_counts from default.ice_meta_1.all_files; set hive.fetch.task.conversion=more; @@ -60,4 +66,11 @@ select null_value_counts[1] from default.ice_meta_1.all_data_files; select lower_bounds[2], upper_bounds[2] from default.ice_meta_1.all_data_files; select data_file.record_count as record_count, data_file.value_counts[2] as value_counts, data_file.null_value_counts[2] as null_value_counts from default.ice_meta_1.all_entries; +select file_format, spec_id from default.ice_meta_1.data_files; +select content, upper_bounds from default.ice_meta_1.delete_files; +select file from default.ice_meta_1.metadata_log_entries; +select name, type from default.ice_meta_1.refs; +select content, file_format from default.ice_meta_1.all_delete_files; +select file_format, value_counts from default.ice_meta_1.all_files; + drop table ice_meta_1; diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out index 656f0a0954d..360e809471c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out @@ -34,22 +34,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc id int value string -PREHOOK: query: describe default.ice_meta_desc -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@ice_meta_desc -POSTHOOK: query: describe default.ice_meta_desc -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@ice_meta_desc -id int -value string -PREHOOK: query: describe default.ice_meta_desc -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@ice_meta_desc -POSTHOOK: query: describe default.ice_meta_desc -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@ice_meta_desc -id int -value string PREHOOK: query: describe default.ice_meta_desc.files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -181,6 +165,113 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct<content:int,file_path:string,file_format:string,spec_id:int,record_count:bigint,file_size_in_bytes:bigint,column_sizes:map<int,bigint>,value_counts:map<int,bigint>,null_value_counts:map<int,bigint>,nan_value_counts:map<int,bigint>,lower_bounds:map<int,binary>,upper_bounds:map<int,binary>,key_metadata:binary,split_offsets:array<bigint>,equality_ids:array<int>,sort_order_id:int> +PREHOOK: query: describe default.ice_meta_desc.data_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.data_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe default.ice_meta_desc.delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe default.ice_meta_desc.metadata_log_entries +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.metadata_log_entries +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +timestamp timestamp with local time zone +file string +latest_snapshot_id bigint +latest_schema_id int +latest_sequence_number bigint +PREHOOK: query: describe default.ice_meta_desc.refs +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.refs +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +name string +type string +snapshot_id bigint +max_reference_age_in_ms bigint +min_snapshots_to_keep int +max_snapshot_age_in_ms bigint +PREHOOK: query: describe default.ice_meta_desc.all_delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.all_delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe default.ice_meta_desc.all_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe default.ice_meta_desc.all_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID PREHOOK: query: describe formatted default.ice_meta_desc.files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -321,6 +412,119 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct<content:int,file_path:string,file_format:string,spec_id:int,record_count:bigint,file_size_in_bytes:bigint,column_sizes:map<int,bigint>,value_counts:map<int,bigint>,null_value_counts:map<int,bigint>,nan_value_counts:map<int,bigint>,lower_bounds:map<int,binary>,upper_bounds:map<int,binary>,key_metadata:binary,split_offsets:array<bigint>,equality_ids:array<int>,sort_order_id:int> +PREHOOK: query: describe formatted default.ice_meta_desc.data_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.data_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +timestamp timestamp with local time zone +file string +latest_snapshot_id bigint +latest_schema_id int +latest_sequence_number bigint +PREHOOK: query: describe formatted default.ice_meta_desc.refs +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.refs +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +name string +type string +snapshot_id bigint +max_reference_age_in_ms bigint +min_snapshots_to_keep int +max_snapshot_age_in_ms bigint +PREHOOK: query: describe formatted default.ice_meta_desc.all_delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.all_delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.all_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.all_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID PREHOOK: query: describe extended default.ice_meta_desc.files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -452,6 +656,119 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct<content:int,file_path:string,file_format:string,spec_id:int,record_count:bigint,file_size_in_bytes:bigint,column_sizes:map<int,bigint>,value_counts:map<int,bigint>,null_value_counts:map<int,bigint>,nan_value_counts:map<int,bigint>,lower_bounds:map<int,binary>,upper_bounds:map<int,binary>,key_metadata:binary,split_offsets:array<bigint>,equality_ids:array<int>,sort_order_id:int> +PREHOOK: query: describe formatted default.ice_meta_desc.data_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.data_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +timestamp timestamp with local time zone +file string +latest_snapshot_id bigint +latest_schema_id int +latest_sequence_number bigint +PREHOOK: query: describe formatted default.ice_meta_desc.refs +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.refs +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +name string +type string +snapshot_id bigint +max_reference_age_in_ms bigint +min_snapshots_to_keep int +max_snapshot_age_in_ms bigint +PREHOOK: query: describe formatted default.ice_meta_desc.all_delete_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.all_delete_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID +PREHOOK: query: describe formatted default.ice_meta_desc.all_files +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice_meta_desc +POSTHOOK: query: describe formatted default.ice_meta_desc.all_files +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice_meta_desc +# col_name data_type comment +#### A masked pattern was here #### +file_format string File format name: avro, orc, or parquet +spec_id int Partition spec ID +record_count bigint Number of records in the file +file_size_in_bytes bigint Total file size in bytes +column_sizes map<int,bigint> Map of column id to total size on disk +value_counts map<int,bigint> Map of column id to total count, including null and NaN +null_value_counts map<int,bigint> Map of column id to null value count +nan_value_counts map<int,bigint> Map of column id to number of NaN values in the column +lower_bounds map<int,binary> Map of column id to lower bound +upper_bounds map<int,binary> Map of column id to upper bound +key_metadata binary Encryption key metadata blob +split_offsets array<bigint> Splittable offsets +equality_ids array<int> Equality comparison field IDs +sort_order_id int Sort order ID PREHOOK: query: drop table ice_meta_desc PREHOOK: type: DROPTABLE PREHOOK: Input: default@ice_meta_desc diff --git a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out index a5a04313c9c..923d7f93679 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out @@ -341,6 +341,133 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### [{"contains_null":false,"contains_nan":false,"lower_bound":"four","upper_bound":"two"},{"contains_null":false,"contains_nan":false,"lower_bound":"Friday","upper_bound":"Wednesday"}] +PREHOOK: query: select file_format, spec_id from default.ice_meta_2.data_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, spec_id from default.ice_meta_2.data_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC 0 +ORC 0 +PREHOOK: query: select file_format, spec_id from default.ice_meta_3.data_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, spec_id from default.ice_meta_3.data_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +PREHOOK: query: select content, upper_bounds from default.ice_meta_2.delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, upper_bounds from default.ice_meta_2.delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select content, upper_bounds from default.ice_meta_3.delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, upper_bounds from default.ice_meta_3.delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select file from default.ice_meta_2.metadata_log_entries +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file from default.ice_meta_2.metadata_log_entries +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +PREHOOK: query: select file from default.ice_meta_3.metadata_log_entries +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file from default.ice_meta_3.metadata_log_entries +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +PREHOOK: query: select name, type from default.ice_meta_2.refs +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select name, type from default.ice_meta_2.refs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +main BRANCH +PREHOOK: query: select name, type from default.ice_meta_3.refs +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select name, type from default.ice_meta_3.refs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +main BRANCH +PREHOOK: query: select content, file_format from default.ice_meta_2.all_delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, file_format from default.ice_meta_2.all_delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select content, file_format from default.ice_meta_3.all_delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, file_format from default.ice_meta_3.all_delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select file_format, value_counts from default.ice_meta_2.all_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, value_counts from default.ice_meta_2.all_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC {1:1,2:1} +ORC {1:2,2:2} +ORC {1:3,2:3} +ORC {1:3,2:3} +PREHOOK: query: select file_format, value_counts from default.ice_meta_3.all_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, value_counts from default.ice_meta_3.all_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC {1:1} +ORC {1:1} +ORC {1:2} +ORC {1:2} +ORC {1:3} +ORC {1:3} +ORC {1:3} PREHOOK: query: select `partition` from default.ice_meta_2.files PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_2 @@ -510,6 +637,133 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### [{"contains_null":false,"contains_nan":false,"lower_bound":"four","upper_bound":"two"},{"contains_null":false,"contains_nan":false,"lower_bound":"Friday","upper_bound":"Wednesday"}] +PREHOOK: query: select file_format, spec_id from default.ice_meta_2.data_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, spec_id from default.ice_meta_2.data_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC 0 +ORC 0 +PREHOOK: query: select file_format, spec_id from default.ice_meta_3.data_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, spec_id from default.ice_meta_3.data_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +ORC 0 +PREHOOK: query: select content, upper_bounds from default.ice_meta_2.delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, upper_bounds from default.ice_meta_2.delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select content, upper_bounds from default.ice_meta_3.delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, upper_bounds from default.ice_meta_3.delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select file from default.ice_meta_2.metadata_log_entries +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file from default.ice_meta_2.metadata_log_entries +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +PREHOOK: query: select file from default.ice_meta_3.metadata_log_entries +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file from default.ice_meta_3.metadata_log_entries +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +hdfs://### HDFS PATH ### +PREHOOK: query: select name, type from default.ice_meta_2.refs +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select name, type from default.ice_meta_2.refs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +main BRANCH +PREHOOK: query: select name, type from default.ice_meta_3.refs +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select name, type from default.ice_meta_3.refs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +main BRANCH +PREHOOK: query: select content, file_format from default.ice_meta_2.all_delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, file_format from default.ice_meta_2.all_delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select content, file_format from default.ice_meta_3.all_delete_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select content, file_format from default.ice_meta_3.all_delete_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select file_format, value_counts from default.ice_meta_2.all_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, value_counts from default.ice_meta_2.all_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC {1:1,2:1} +ORC {1:2,2:2} +ORC {1:3,2:3} +ORC {1:3,2:3} +PREHOOK: query: select file_format, value_counts from default.ice_meta_3.all_files +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_meta_3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select file_format, value_counts from default.ice_meta_3.all_files +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_meta_3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +ORC {1:1} +ORC {1:1} +ORC {1:2} +ORC {1:2} +ORC {1:3} +ORC {1:3} +ORC {1:3} PREHOOK: query: drop table ice_meta_2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@ice_meta_2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out index ebe67e84e5c..c69dfd0b899 100644 Binary files a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out and b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out differ