This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 778ce817fb HIVE-26158: TRANSLATED_TO_EXTERNAL partition tables cannot query partition data after rename table (#3255) (Zoltan Haindrich reviewed by Saihemanth Gantasala) 778ce817fb is described below commit 778ce817fb50ca6dc8896ebaa258e434964d7639 Author: Zoltan Haindrich <k...@rxd.hu> AuthorDate: Wed May 11 17:10:51 2022 +0200 HIVE-26158: TRANSLATED_TO_EXTERNAL partition tables cannot query partition data after rename table (#3255) (Zoltan Haindrich reviewed by Saihemanth Gantasala) --- .../clientpositive/translated_external_rename4.q | 21 ++++ .../llap/translated_external_rename4.q.out | 126 +++++++++++++++++++++ .../hive/metastore/utils/MetaStoreUtils.java | 22 +++- .../hadoop/hive/metastore/HiveAlterHandler.java | 39 ++++--- .../metastore/MetastoreDefaultTransformer.java | 3 - 5 files changed, 189 insertions(+), 22 deletions(-) diff --git a/ql/src/test/queries/clientpositive/translated_external_rename4.q b/ql/src/test/queries/clientpositive/translated_external_rename4.q new file mode 100644 index 0000000000..30768d1298 --- /dev/null +++ b/ql/src/test/queries/clientpositive/translated_external_rename4.q @@ -0,0 +1,21 @@ +set metastore.metadata.transformer.class=org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer; +set metastore.metadata.transformer.location.mode=prohibit; + +set hive.fetch.task.conversion=none; +set hive.compute.query.using.stats=false; + +set hive.create.as.external.legacy=true; + +CREATE TABLE part_test( +c1 string +,c2 string +)PARTITIONED BY (dat string); + +insert into part_test values ("11","th","20220101"); +insert into part_test values ("22","th","20220102"); + +alter table part_test rename to part_test11; + + +desc formatted part_test11; +desc formatted part_test11 partition(dat="20220101"); diff --git a/ql/src/test/results/clientpositive/llap/translated_external_rename4.q.out b/ql/src/test/results/clientpositive/llap/translated_external_rename4.q.out new file mode 100644 index 0000000000..67c73b49a9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/translated_external_rename4.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: CREATE TABLE part_test( +c1 string +,c2 string +)PARTITIONED BY (dat string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_test +POSTHOOK: query: CREATE TABLE part_test( +c1 string +,c2 string +)PARTITIONED BY (dat string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_test +PREHOOK: query: insert into part_test values ("11","th","20220101") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_test +POSTHOOK: query: insert into part_test values ("11","th","20220101") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_test +POSTHOOK: Output: default@part_test@dat=20220101 +POSTHOOK: Lineage: part_test PARTITION(dat=20220101).c1 SCRIPT [] +POSTHOOK: Lineage: part_test PARTITION(dat=20220101).c2 SCRIPT [] +PREHOOK: query: insert into part_test values ("22","th","20220102") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_test +POSTHOOK: query: insert into part_test values ("22","th","20220102") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_test +POSTHOOK: Output: default@part_test@dat=20220102 +POSTHOOK: Lineage: part_test PARTITION(dat=20220102).c1 SCRIPT [] +POSTHOOK: Lineage: part_test PARTITION(dat=20220102).c2 SCRIPT [] +PREHOOK: query: alter table part_test rename to part_test11 +PREHOOK: type: ALTERTABLE_RENAME +PREHOOK: Input: default@part_test +PREHOOK: Output: default@part_test +POSTHOOK: query: alter table part_test rename to part_test11 +POSTHOOK: type: ALTERTABLE_RENAME +POSTHOOK: Input: default@part_test +POSTHOOK: Output: default@part_test +POSTHOOK: Output: default@part_test11 +PREHOOK: query: desc formatted part_test11 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@part_test11 +POSTHOOK: query: desc formatted part_test11 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@part_test11 +# col_name data_type comment +c1 string +c2 string + +# Partition Information +# col_name data_type comment +dat string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + TRANSLATED_TO_EXTERNAL TRUE + bucketing_version 2 + external.table.purge TRUE +#### A masked pattern was here #### + numFiles 2 + numPartitions 2 + numRows 2 + rawDataSize 10 + totalSize 12 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted part_test11 partition(dat="20220101") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@part_test11 +POSTHOOK: query: desc formatted part_test11 partition(dat="20220101") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@part_test11 +# col_name data_type comment +c1 string +c2 string + +# Partition Information +# col_name data_type comment +dat string + +# Detailed Partition Information +Partition Value: [20220101] +Database: default +Table: part_test11 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 5 + totalSize 6 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index d4bcb5b5e9..a6272071ca 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -244,6 +244,13 @@ public class MetaStoreUtils { return isExternal(params); } + public static boolean isTranslatedToExternalTable(Table table) { + Map<String, String> params = table.getParameters(); + return params != null && MetaStoreUtils.isPropertyTrue(params, "EXTERNAL") + && MetaStoreUtils.isPropertyTrue(params, "TRANSLATED_TO_EXTERNAL") && table.getSd() != null + && table.getSd().isSetLocation(); + } + public static String getDbNameFromReplPolicy(String replPolicy) { assert replPolicy != null; return replPolicy.split(Pattern.quote("."))[0]; @@ -923,14 +930,19 @@ public class MetaStoreUtils { */ public static String prependCatalogToDbName(@Nullable String catalogName, @Nullable String dbName, Configuration conf) { - if (catalogName == null) catalogName = getDefaultCatalog(conf); + if (catalogName == null) { + catalogName = getDefaultCatalog(conf); + } StringBuilder buf = new StringBuilder() .append(CATALOG_DB_THRIFT_NAME_MARKER) .append(catalogName) .append(CATALOG_DB_SEPARATOR); if (dbName != null) { - if (dbName.isEmpty()) buf.append(DB_EMPTY_MARKER); - else buf.append(dbName); + if (dbName.isEmpty()) { + buf.append(DB_EMPTY_MARKER); + } else { + buf.append(dbName); + } } return buf.toString(); } @@ -1009,7 +1021,9 @@ public class MetaStoreUtils { return Warehouse.DEFAULT_CATALOG_NAME; } String catName = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT); - if (catName == null || "".equals(catName)) catName = Warehouse.DEFAULT_CATALOG_NAME; + if (catName == null || "".equals(catName)) { + catName = Warehouse.DEFAULT_CATALOG_NAME; + } return catName; } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 66d62f0530..25132a1d13 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -226,12 +226,17 @@ public class HiveAlterHandler implements AlterHandler { // 2) the table is not an external table, and // 3) the user didn't change the default location (or new location is empty), and // 4) the table was not initially created with a specified location - if (replDataLocationChanged - || (rename - && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString()) - && (oldt.getSd().getLocation().compareTo(newt.getSd().getLocation()) == 0 + boolean renamedManagedTable = rename && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString()) + && (oldt.getSd().getLocation().compareTo(newt.getSd().getLocation()) == 0 || StringUtils.isEmpty(newt.getSd().getLocation())) - && !MetaStoreUtils.isExternalTable(oldt))) { + && (!MetaStoreUtils.isExternalTable(oldt)); + + Database db = msdb.getDatabase(catName, newDbName); + + boolean renamedTranslatedToExternalTable = rename && MetaStoreUtils.isTranslatedToExternalTable(oldt) + && MetaStoreUtils.isTranslatedToExternalTable(newt); + if (replDataLocationChanged + || renamedManagedTable || renamedTranslatedToExternalTable) { srcPath = new Path(oldt.getSd().getLocation()); if (replDataLocationChanged) { @@ -249,22 +254,27 @@ public class HiveAlterHandler implements AlterHandler { // in the table rename, its data location should not be changed. We can check // if the table directory was created directly under its database directory to tell // if it is such a table - // Same applies to the ACID tables suffixed with the `txnId`, case with `lockless reads`. + // Same applies to the ACID tables suffixed with the `txnId`, case with `lockless reads`. String oldtRelativePath = wh.getDatabaseManagedPath(olddb).toUri() .relativize(srcPath.toUri()).toString(); boolean tableInSpecifiedLoc = !oldtRelativePath.equalsIgnoreCase(name) && !oldtRelativePath.equalsIgnoreCase(name + Path.SEPARATOR); - if (!tableInSpecifiedLoc) { + + + if (renamedTranslatedToExternalTable || !tableInSpecifiedLoc) { srcFs = wh.getFs(srcPath); // get new location - Database db = msdb.getDatabase(catName, newDbName); assert(isReplicated == HMSHandler.isDbReplicationTarget(db)); - Path databasePath = constructRenamedPath(wh.getDatabaseManagedPath(db), srcPath); - destPath = new Path(databasePath, newTblName); - destFs = wh.getFs(destPath); + if (renamedTranslatedToExternalTable) { + destPath = new Path(newt.getSd().getLocation()); + } else { + Path databasePath = constructRenamedPath(wh.getDatabaseManagedPath(db), srcPath); + destPath = new Path(databasePath, newTblName); + newt.getSd().setLocation(destPath.toString()); + } - newt.getSd().setLocation(destPath.toString()); + destFs = wh.getFs(destPath); // check that destination does not exist otherwise we will be // overwriting data @@ -365,7 +375,6 @@ public class HiveAlterHandler implements AlterHandler { // operations other than table rename if (MetaStoreServerUtils.requireCalStats(null, null, newt, environmentContext) && !isPartitionedTable) { - Database db = msdb.getDatabase(catName, newDbName); assert(isReplicated == HMSHandler.isDbReplicationTarget(db)); // Update table stats. For partitioned table, we update stats in alterPartition() MetaStoreServerUtils.updateTableStatsSlow(db, newt, wh, false, true, environmentContext); @@ -677,7 +686,7 @@ public class HiveAlterHandler implements AlterHandler { if (!wh.mkdirs(destParentPath)) { throw new MetaException("Unable to create path " + destParentPath); } - + boolean clonePart = Optional.ofNullable(environmentContext) .map(EnvironmentContext::getProperties) .map(prop -> prop.get(RENAME_PARTITION_MAKE_COPY)) @@ -687,7 +696,7 @@ public class HiveAlterHandler implements AlterHandler { if (writeId > 0 && clonePart) { LOG.debug("Making a copy of the partition directory: {} under a new location: {}", srcPath, destPath); - + if (!wh.copyDir(srcPath, destPath, ReplChangeManager.shouldEnableCm(db, tbl))) { LOG.error("Copy failed for source: " + srcPath + " to destination: " + destPath); throw new IOException("File copy failed."); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java index 80847cc4a6..4817430051 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java @@ -760,7 +760,6 @@ public class MetastoreDefaultTransformer implements IMetaStoreMetadataTransforme if (oldTable.getSd().getLocation().equals(oldPath.toString())) { Path newPath = getTranslatedToExternalTableDefaultLocation(newDb, newTable); newTable.getSd().setLocation(newPath.toString()); - hmsHandler.getWh().renameDir(oldPath, newPath, ReplChangeManager.shouldEnableCm(oldDb, oldTable)); } } @@ -783,11 +782,9 @@ public class MetastoreDefaultTransformer implements IMetaStoreMetadataTransforme private boolean isTranslatedToExternalTable(Table table) { Map<String, String> p = table.getParameters(); - ; return p != null && MetaStoreUtils.isPropertyTrue(p, "EXTERNAL") && MetaStoreUtils.isPropertyTrue(p, "TRANSLATED_TO_EXTERNAL") && table.getSd() != null && table.getSd().isSetLocation(); - } private boolean tableLocationChanged(Table oldTable, Table newTable) throws MetaException {