This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new e5c4c2bf5e0 HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan) e5c4c2bf5e0 is described below commit e5c4c2bf5e09674c5a1ca3d8f4d70616f0cdc0fe Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Mon Jan 30 23:11:18 2023 +0530 HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan) --- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 25 ++++ .../src/test/queries/positive/ctlt_iceberg.q | 23 ++++ .../src/test/results/positive/ctlt_iceberg.q.out | 136 +++++++++++++++++++++ .../create/like/CreateTableLikeOperation.java | 19 ++- .../hive/ql/metadata/HiveStorageHandler.java | 10 ++ .../org/apache/hadoop/hive/ql/metadata/Table.java | 12 ++ 6 files changed, 219 insertions(+), 6 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index e01a2587cfe..fc54f826e63 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec; +import org.apache.hadoop.hive.ql.parse.PartitionTransform; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TransformSpec; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -1127,5 +1128,29 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H tbl.getParameters().put("TRANSLATED_TO_EXTERNAL", "TRUE"); desc.setIsExternal(true); } + + // If source is Iceberg table set the schema and the partition spec + if ("ICEBERG".equalsIgnoreCase(origParams.get("table_type"))) { + tbl.getParameters() + .put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA)); + tbl.getParameters() + .put(InputFormatConfig.PARTITION_SPEC, origParams.get(InputFormatConfig.PARTITION_SPEC)); + } else { + // if the source is partitioned non-iceberg table set the partition transform spec and set the table as + // unpartitioned + List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(tbl.getPartitionKeys()); + SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec); + tbl.getSd().getCols().addAll(tbl.getPartitionKeys()); + tbl.getTTable().setPartitionKeysIsSet(false); + } + } + + @Override + public Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) { + Table origTable = IcebergTableUtil.getTable(conf, table.getTTable()); + Map<String, String> props = Maps.newHashMap(); + props.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(origTable.schema())); + props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(origTable.spec())); + return props; } } diff --git a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q index b235ca9c2df..8d77812a729 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q @@ -22,3 +22,26 @@ select a from target order by a; delete from target where a=2; select a from target order by a; + +--create a partitioned iceberg table +create table emp_iceberg (id int) partitioned by (company string) stored by iceberg; + +show create table emp_iceberg; + +-- CTLT with the source as the partitioned iceberg table +create table emp_like1 like emp_iceberg stored by iceberg; + +-- Partition column should be there +show create table emp_like1; + +--create a partitioned non iceberg table +create table emp (id int) partitioned by (company string); + +show create table emp; + +-- CTLT with the source as the partitioned iceberg table +create table emp_like2 like emp stored by iceberg; + +-- Partition column should be there +show create table emp_like2; + diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out index 157c018222c..389f59df599 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out @@ -95,3 +95,139 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@target POSTHOOK: Output: hdfs://### HDFS PATH ### 1 +PREHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp_iceberg +POSTHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp_iceberg +PREHOOK: query: show create table emp_iceberg +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@emp_iceberg +POSTHOOK: query: show create table emp_iceberg +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@emp_iceberg +CREATE TABLE `emp_iceberg`( + `id` int, + `company` string) +PARTITIONED BY SPEC ( +company) +ROW FORMAT SERDE + 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' +STORED BY + 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' + +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'engine.hive.enabled'='true', + 'iceberg.orc.files.only'='false', + 'metadata_location'='hdfs://### HDFS PATH ###', + 'serialization.format'='1', + 'table_type'='ICEBERG', +#### A masked pattern was here #### + 'uuid'='#Masked#') +PREHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp_like1 +POSTHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp_like1 +PREHOOK: query: show create table emp_like1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@emp_like1 +POSTHOOK: query: show create table emp_like1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@emp_like1 +CREATE EXTERNAL TABLE `emp_like1`( + `id` int, + `company` string) +PARTITIONED BY SPEC ( +company) +ROW FORMAT SERDE + 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' +STORED BY + 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' + +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'TRANSLATED_TO_EXTERNAL'='TRUE', + 'bucketing_version'='2', + 'created_with_ctlt'='true', + 'engine.hive.enabled'='true', + 'iceberg.orc.files.only'='false', + 'metadata_location'='hdfs://### HDFS PATH ###', + 'table_type'='ICEBERG', +#### A masked pattern was here #### + 'uuid'='#Masked#') +PREHOOK: query: create table emp (id int) partitioned by (company string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp +POSTHOOK: query: create table emp (id int) partitioned by (company string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp +PREHOOK: query: show create table emp +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@emp +POSTHOOK: query: show create table emp +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@emp +CREATE TABLE `emp`( + `id` int) +PARTITIONED BY ( + `company` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: create table emp_like2 like emp stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp_like2 +POSTHOOK: query: create table emp_like2 like emp stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp_like2 +PREHOOK: query: show create table emp_like2 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@emp_like2 +POSTHOOK: query: show create table emp_like2 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@emp_like2 +CREATE EXTERNAL TABLE `emp_like2`( + `id` int, + `company` string) +PARTITIONED BY SPEC ( +company) +ROW FORMAT SERDE + 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' +STORED BY + 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' + +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'TRANSLATED_TO_EXTERNAL'='TRUE', + 'bucketing_version'='2', + 'created_with_ctlt'='true', + 'engine.hive.enabled'='true', + 'iceberg.orc.files.only'='false', + 'metadata_location'='hdfs://### HDFS PATH ###', + 'table_type'='ICEBERG', +#### A masked pattern was here #### + 'uuid'='#Masked#') diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java index cde116b83f8..6534d70e326 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java @@ -55,7 +55,13 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc> if (oldTable.getTableType() == TableType.VIRTUAL_VIEW || oldTable.getTableType() == TableType.MATERIALIZED_VIEW) { tbl = createViewLikeTable(oldTable); } else { - tbl = createTableLikeTable(oldTable); + Map<String, String> originalProperties = new HashMap<>(); + // Get the storage handler without caching, since the storage handler can get changed when copying the + // properties of the target table and + if (oldTable.getStorageHandlerWithoutCaching() != null) { + originalProperties = new HashMap<>(oldTable.getStorageHandlerWithoutCaching().getNativeProperties(oldTable)); + } + tbl = createTableLikeTable(oldTable, originalProperties); } // If location is specified - ensure that it is a full qualified name @@ -104,7 +110,8 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc> return table; } - private Table createTableLikeTable(Table table) throws SemanticException, HiveException { + private Table createTableLikeTable(Table table, Map<String, String> originalProperties) + throws SemanticException, HiveException { String[] names = Utilities.getDbTableName(desc.getTableName()); table.setDbName(names[0]); table.setTableName(names[1]); @@ -112,7 +119,7 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc> setUserSpecifiedLocation(table); - setTableParameters(table); + setTableParameters(table, originalProperties); if (desc.isUserStorageFormat() || (table.getInputFormatClass() == null) || (table.getOutputFormatClass() == null)) { setStorage(table); @@ -139,18 +146,18 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc> } } - private void setTableParameters(Table tbl) throws HiveException { + private void setTableParameters(Table tbl, Map<String, String> originalProperties) throws HiveException { // With Hive-25813, we'll not copy over table properties from the source. // CTLT should should copy column schema but not table properties. It is also consistent // with other query engines like mysql, redshift. - Map<String, String> origParams = new HashMap<>(tbl.getParameters()); + originalProperties.putAll(tbl.getParameters()); tbl.getParameters().clear(); if (desc.getTblProps() != null) { tbl.setParameters(desc.getTblProps()); } HiveStorageHandler storageHandler = tbl.getStorageHandler(); if (storageHandler != null) { - storageHandler.setTableParametersForCTLT(tbl, desc, origParams); + storageHandler.setTableParametersForCTLT(tbl, desc, originalProperties); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 15a3c823118..7f765270c03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -55,6 +55,7 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -281,6 +282,15 @@ public interface HiveStorageHandler extends Configurable { Map<String, String> origParams) { } + /** + * Extract the native properties of the table which aren't stored in the HMS + * @param table the table + * @return map with native table level properties + */ + default Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) { + return new HashMap<>(); + } + enum AcidSupportType { NONE, WITH_TRANSACTIONS, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 493cf627f4c..57e02cf3699 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -368,6 +368,18 @@ public class Table implements Serializable { return storageHandler; } + public HiveStorageHandler getStorageHandlerWithoutCaching() { + if (storageHandler != null || !isNonNative()) { + return storageHandler; + } + try { + return HiveUtils.getStorageHandler(SessionState.getSessionConf(), + getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void setStorageHandler(HiveStorageHandler sh){ storageHandler = sh; }