This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new a79097c246d HIVE-26950: Iceberg: Addendum: CTLT sets incorrect 'TRANSLATED_TO_EXTERNAL' value (Denys Kuzmenko, reviewed by Ayush Saxena) a79097c246d is described below commit a79097c246d77224d8c25f9450a0fb5f842c864e Author: Denys Kuzmenko <denisk...@gmail.com> AuthorDate: Wed Nov 22 17:22:45 2023 +0200 HIVE-26950: Iceberg: Addendum: CTLT sets incorrect 'TRANSLATED_TO_EXTERNAL' value (Denys Kuzmenko, reviewed by Ayush Saxena) Closes #4890 --- .../org/apache/hadoop/hive/conf/Constants.java | 7 ++---- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 19 +++++++-------- .../src/test/queries/positive/ctlt_iceberg.q | 5 ++-- .../src/test/results/positive/ctlt_iceberg.q.out | 3 +++ .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 ++-- .../ql/parse/repl/dump/io/PartitionSerializer.java | 6 ----- .../apache/hadoop/hive/metastore/HiveMetaHook.java | 27 ++++++++++++---------- .../hive/metastore/utils/MetaStoreUtils.java | 9 ++++++-- .../hadoop/hive/metastore/HiveAlterHandler.java | 4 ++-- .../metastore/MetastoreDefaultTransformer.java | 8 +++---- 10 files changed, 48 insertions(+), 45 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index 5d2550cbbd6..7bd736c130b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -98,16 +98,13 @@ public class Constants { public static final String ORC_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; public static final String ORC_OUTPUT_FORMAT = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"; - - + public static final Pattern COMPACTION_POOLS_PATTERN = Pattern.compile("hive\\.compactor\\.worker\\.(.*)\\.threads"); public static final String HIVE_COMPACTOR_WORKER_POOL = "hive.compactor.worker.pool"; - public static final String HIVE_COMPACTOR_REBALANCE_ORDERBY = "hive.compactor.rebalance.orderby"; public static final String HTTP_HEADER_REQUEST_TRACK = "X-Request-ID"; public static final String TIME_POSTFIX_REQUEST_TRACK = "_TIME"; - - public static final String ICEBERG = "iceberg"; + public static final String ICEBERG_PARTITION_TABLE_SCHEMA = "partition,spec_id,record_count,file_count," + "position_delete_record_count,position_delete_file_count,equality_delete_record_count," + "equality_delete_file_count,last_updated_at,total_data_file_size_in_bytes,last_updated_snapshot_id"; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 1088d6d4302..604f078f79c 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.Context.Operation; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -441,7 +442,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H // For write queries where rows got modified, don't fetch from cache as values could have changed. Table table = getTable(hmsTable); Map<String, String> stats = Maps.newHashMap(); - if (getStatsSource().equals(Constants.ICEBERG)) { + if (getStatsSource().equals(HiveMetaHook.ICEBERG)) { if (table.currentSnapshot() != null) { Map<String, String> summary = table.currentSnapshot().summary(); if (summary != null) { @@ -492,7 +493,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H @Override public boolean canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); - return table.currentSnapshot() != null && getStatsSource().equals(Constants.ICEBERG); + return table.currentSnapshot() != null && getStatsSource().equals(HiveMetaHook.ICEBERG); } @Override @@ -568,7 +569,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H @Override public boolean canComputeQueryUsingStats(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { - if (getStatsSource().equals(Constants.ICEBERG)) { + if (getStatsSource().equals(HiveMetaHook.ICEBERG)) { Table table = getTable(hmsTable); if (table.currentSnapshot() != null) { Map<String, String> summary = table.currentSnapshot().summary(); @@ -585,7 +586,8 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H } private String getStatsSource() { - return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE, Constants.ICEBERG).toLowerCase(); + return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE, HiveMetaHook.ICEBERG) + .toUpperCase(); } private Path getColStatsPath(Table table) { @@ -1581,9 +1583,8 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H public void setTableParametersForCTLT(org.apache.hadoop.hive.ql.metadata.Table tbl, CreateTableLikeDesc desc, Map<String, String> origParams) { // Preserve the format-version of the iceberg table and filter out rest. - String formatVersion = origParams.get(TableProperties.FORMAT_VERSION); - if ("2".equals(formatVersion)) { - tbl.getParameters().put(TableProperties.FORMAT_VERSION, formatVersion); + if (IcebergTableUtil.isV2Table(origParams)) { + tbl.getParameters().put(TableProperties.FORMAT_VERSION, "2"); tbl.getParameters().put(TableProperties.DELETE_MODE, MERGE_ON_READ); tbl.getParameters().put(TableProperties.UPDATE_MODE, MERGE_ON_READ); tbl.getParameters().put(TableProperties.MERGE_MODE, MERGE_ON_READ); @@ -1591,12 +1592,12 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H // check if the table is being created as managed table, in that case we translate it to external if (!desc.isExternal()) { - tbl.getParameters().put("TRANSLATED_TO_EXTERNAL", "TRUE"); + tbl.getParameters().put(HiveMetaHook.TRANSLATED_TO_EXTERNAL, "TRUE"); desc.setIsExternal(true); } // If source is Iceberg table set the schema and the partition spec - if ("ICEBERG".equalsIgnoreCase(origParams.get("table_type"))) { + if (MetaStoreUtils.isIcebergTable(origParams)) { tbl.getParameters() .put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA)); tbl.getParameters() diff --git a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q index 895f51a4ff2..13d97f1ca48 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q @@ -3,8 +3,6 @@ -- Mask random uuid --! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/ -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.explain.user=false; create table source(a int) stored by iceberg tblproperties ('format-version'='2') ; @@ -47,6 +45,9 @@ create table emp_like2 like emp stored by iceberg; -- Partition column should be there show create table emp_like2; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + -- create a managed table create managed table man_table (id int) Stored as orc TBLPROPERTIES ('transactional'='true'); diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out index d25fde849cd..7fe78cad78a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out @@ -38,6 +38,7 @@ STORED BY LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( + 'TRANSLATED_TO_EXTERNAL'='TRUE', 'bucketing_version'='2', 'created_with_ctlt'='true', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"a","required":false,"type":"int"}]}', @@ -162,6 +163,7 @@ STORED BY LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( + 'TRANSLATED_TO_EXTERNAL'='TRUE', 'bucketing_version'='2', 'created_with_ctlt'='true', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"company","required":false,"type":"string"}]}', @@ -233,6 +235,7 @@ STORED BY LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( + 'TRANSLATED_TO_EXTERNAL'='TRUE', 'bucketing_version'='2', 'created_with_ctlt'='true', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"company","required":false,"type":"string"}]}', diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ca0cc179876..ba1fd1b49b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -14263,14 +14263,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); tblProps.put(hive_metastoreConstants.TABLE_IS_CTLT, "true"); - isExt = isIcebergTable(tblProps) || - isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); + isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); Table likeTable = getTable(likeTableName, false); if (likeTable != null) { - if (isTemporary || isExt) { + if (isTemporary || isExt || isIcebergTable(tblProps)) { updateDefaultTblProps(likeTable.getParameters(), tblProps, new ArrayList<>(Arrays.asList(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/PartitionSerializer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/PartitionSerializer.java index 281bb37d32c..68a9202433c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/PartitionSerializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/PartitionSerializer.java @@ -67,10 +67,4 @@ public class PartitionSerializer implements JsonWriter.Serializer { throw new SemanticException(ErrorMsg.ERROR_SERIALIZE_METASTORE.getMsg(), e); } } - - private boolean isPartitionExternal() { - Map<String, String> params = partition.getParameters(); - return params.containsKey("EXTERNAL") - && params.get("EXTERNAL").equalsIgnoreCase("TRUE"); - } } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 8a0970a474b..695a3282838 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -42,18 +42,21 @@ import java.util.List; @InterfaceStability.Stable public interface HiveMetaHook { - public String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType"; + String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType"; // These should remain in sync with AlterTableType enum - public List<String> allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS"); + List<String> allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS"); String ALTERLOCATION = "ALTERLOCATION"; String ALLOW_PARTITION_KEY_CHANGE = "allow_partition_key_change"; String SET_PROPERTIES = "set_properties"; String UNSET_PROPERTIES = "unset_properties"; - String TABLE_TYPE = "table_type"; + String TRANSLATED_TO_EXTERNAL = "TRANSLATED_TO_EXTERNAL"; + String TABLE_TYPE = "table_type"; + String EXTERNAL = "EXTERNAL"; String ICEBERG = "ICEBERG"; + String PROPERTIES_SEPARATOR = "'"; String MIGRATE_HIVE_TO_ICEBERG = "migrate_hive_to_iceberg"; String INITIALIZE_ROLLBACK_MIGRATION = "initialize_rollback_migration"; @@ -70,7 +73,7 @@ public interface HiveMetaHook { * * @param table new table definition */ - public void preCreateTable(Table table) + void preCreateTable(Table table) throws MetaException; /** @@ -79,7 +82,7 @@ public interface HiveMetaHook { * * @param table new table definition */ - public void rollbackCreateTable(Table table) + void rollbackCreateTable(Table table) throws MetaException; /** @@ -88,7 +91,7 @@ public interface HiveMetaHook { * * @param table new table definition */ - public void commitCreateTable(Table table) + void commitCreateTable(Table table) throws MetaException; /** @@ -97,7 +100,7 @@ public interface HiveMetaHook { * * @param table table definition */ - public void preDropTable(Table table) + void preDropTable(Table table) throws MetaException; /** @@ -118,7 +121,7 @@ public interface HiveMetaHook { * * @param table table definition */ - public void rollbackDropTable(Table table) + void rollbackDropTable(Table table) throws MetaException; /** @@ -130,7 +133,7 @@ public interface HiveMetaHook { * @param deleteData whether to delete data as well; this should typically * be ignored in the case of an external table */ - public void commitDropTable(Table table, boolean deleteData) + void commitDropTable(Table table, boolean deleteData) throws MetaException; /** @@ -139,7 +142,7 @@ public interface HiveMetaHook { * * @param table new table definition */ - public default void preAlterTable(Table table, EnvironmentContext context) throws MetaException { + default void preAlterTable(Table table, EnvironmentContext context) throws MetaException { String alterOpType = (context == null || context.getProperties() == null) ? null : context.getProperties().get(ALTER_TABLE_OPERATION_TYPE); // By default allow only ADDPROPS and DROPPROPS. @@ -175,11 +178,11 @@ public interface HiveMetaHook { * @param context context of the truncate operation * @throws MetaException */ - public default void preTruncateTable(Table table, EnvironmentContext context) throws MetaException { + default void preTruncateTable(Table table, EnvironmentContext context) throws MetaException { preTruncateTable(table, context, null); } - public default void preTruncateTable(Table table, EnvironmentContext context, List<String> partNames) throws MetaException { + default void preTruncateTable(Table table, EnvironmentContext context, List<String> partNames) throws MetaException { // Do nothing } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 1491b45a353..1aa98d9f1d1 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.common.repl.ReplConst; import org.apache.hadoop.hive.metastore.ColumnType; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -286,10 +287,14 @@ public class MetaStoreUtils { return isExternal(params); } + public static boolean isIcebergTable(Map<String, String> params) { + return HiveMetaHook.ICEBERG.equalsIgnoreCase(params.get(HiveMetaHook.TABLE_TYPE)); + } + public static boolean isTranslatedToExternalTable(Table table) { Map<String, String> params = table.getParameters(); - return params != null && MetaStoreUtils.isPropertyTrue(params, "EXTERNAL") - && MetaStoreUtils.isPropertyTrue(params, "TRANSLATED_TO_EXTERNAL") && table.getSd() != null + return params != null && MetaStoreUtils.isPropertyTrue(params, HiveMetaHook.EXTERNAL) + && MetaStoreUtils.isPropertyTrue(params, HiveMetaHook.TRANSLATED_TO_EXTERNAL) && table.getSd() != null && table.getSd().isSetLocation(); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 9bce99ad680..1c18631e1cc 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -254,9 +254,9 @@ public class HiveAlterHandler implements AlterHandler { boolean renamedTranslatedToExternalTable = rename && MetaStoreUtils.isTranslatedToExternalTable(oldt) && MetaStoreUtils.isTranslatedToExternalTable(newt); boolean renamedExternalTable = rename && MetaStoreUtils.isExternalTable(oldt) - && !MetaStoreUtils.isPropertyTrue(oldt.getParameters(), "TRANSLATED_TO_EXTERNAL"); + && !MetaStoreUtils.isPropertyTrue(oldt.getParameters(), HiveMetaHook.TRANSLATED_TO_EXTERNAL); boolean isRenameIcebergTable = - rename && HiveMetaHook.ICEBERG.equalsIgnoreCase(newt.getParameters().get(HiveMetaHook.TABLE_TYPE)); + rename && MetaStoreUtils.isIcebergTable(newt.getParameters()); List<ColumnStatistics> columnStatistics = getColumnStats(msdb, oldt); columnStatistics = deleteTableColumnStats(msdb, oldt, newt, columnStatistics); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java index 48174300516..ff92ab86d42 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java @@ -652,9 +652,9 @@ public class MetastoreDefaultTransformer implements IMetaStoreMetadataTransforme newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); params.remove(TABLE_IS_TRANSACTIONAL); params.remove(TABLE_TRANSACTIONAL_PROPERTIES); - params.put("EXTERNAL", "TRUE"); + params.put(HiveMetaHook.EXTERNAL, "TRUE"); params.put(EXTERNAL_TABLE_PURGE, "TRUE"); - params.put("TRANSLATED_TO_EXTERNAL", "TRUE"); + params.put(HiveMetaHook.TRANSLATED_TO_EXTERNAL, "TRUE"); newTable.setParameters(params); LOG.info("Modified table params are:" + params.toString()); @@ -782,8 +782,8 @@ public class MetastoreDefaultTransformer implements IMetaStoreMetadataTransforme private boolean isTranslatedToExternalTable(Table table) { Map<String, String> p = table.getParameters(); - return p != null && MetaStoreUtils.isPropertyTrue(p, "EXTERNAL") - && MetaStoreUtils.isPropertyTrue(p, "TRANSLATED_TO_EXTERNAL") && table.getSd() != null + return p != null && MetaStoreUtils.isPropertyTrue(p, HiveMetaHook.EXTERNAL) + && MetaStoreUtils.isPropertyTrue(p, HiveMetaHook.TRANSLATED_TO_EXTERNAL) && table.getSd() != null && table.getSd().isSetLocation(); }