This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 817ff27 HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan) 817ff27 is described below commit 817ff27e96ecfce6c70c5850830e55a6e6f37da6 Author: Ashish Kumar Sharma <ashishkumarsharm...@gmail.com> AuthorDate: Thu Sep 23 11:03:18 2021 +0530 HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#2651) --- .../hadoop/hive/ql/txn/compactor/Cleaner.java | 14 +++ .../hive/ql/txn/compactor/CompactorTest.java | 7 +- .../hadoop/hive/ql/txn/compactor/TestCleaner.java | 113 +++++++++++++++++++++ .../thrift/gen-cpp/hive_metastore_constants.cpp | 4 + .../gen/thrift/gen-cpp/hive_metastore_constants.h | 2 + .../metastore/api/hive_metastoreConstants.java | 3 + .../src/gen/thrift/gen-php/metastore/Constant.php | 12 +++ .../gen/thrift/gen-py/hive_metastore/constants.py | 2 + .../gen/thrift/gen-rb/hive_metastore_constants.rb | 4 + .../hive/metastore/utils/MetaStoreUtils.java | 8 ++ .../src/main/thrift/hive_metastore.thrift | 2 + 11 files changed, 170 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java index 13fc8bc..8149494 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.metrics.PerfLogger; import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.AcidDirectory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -179,6 +180,13 @@ public class Cleaner extends MetaStoreCompactorThread { txnHandler.markCleaned(ci); return; } + if (MetaStoreUtils.isNoCleanUpSet(t.getParameters())) { + // The table was marked no clean up true. + LOG.info("Skipping table " + ci.getFullTableName() + " clean up, as NO_CLEANUP set to true"); + txnHandler.markCleaned(ci); + return; + } + Partition p = null; if (ci.partName != null) { p = resolvePartition(ci); @@ -189,6 +197,12 @@ public class Cleaner extends MetaStoreCompactorThread { txnHandler.markCleaned(ci); return; } + if (MetaStoreUtils.isNoCleanUpSet(p.getParameters())) { + // The partition was marked no clean up true. + LOG.info("Skipping partition " + ci.getFullPartitionName() + " clean up, as NO_CLEANUP set to true"); + txnHandler.markCleaned(ci); + return; + } } StorageDescriptor sd = resolveStorageDescriptor(t, p); final String location = sd.getLocation(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java index 66335c9..b3788e4 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java @@ -201,12 +201,17 @@ public abstract class CompactorTest { } protected Partition newPartition(Table t, String value, List<Order> sortCols) throws Exception { + return newPartition(t, value, sortCols, new HashMap<>()); + } + + protected Partition newPartition(Table t, String value, List<Order> sortCols, Map<String, String> parameters) + throws Exception { Partition part = new Partition(); part.addToValues(value); part.setDbName(t.getDbName()); part.setTableName(t.getTableName()); part.setSd(newStorageDescriptor(getLocation(t.getTableName(), value), sortCols)); - part.setParameters(new HashMap<String, String>()); + part.setParameters(parameters); ms.add_partition(part); return part; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java index 3c02606..665d47c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; import org.apache.hadoop.hive.metastore.api.CompactionRequest; import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.api.GetPartitionRequest; +import org.apache.hadoop.hive.metastore.api.GetTableRequest; import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest; import org.apache.hadoop.hive.metastore.api.FindNextCompactRequest; import org.apache.hadoop.hive.metastore.api.Partition; @@ -43,7 +45,9 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RETENTION_TIME; @@ -604,4 +608,113 @@ public class TestCleaner extends CompactorTest { compactorTestCleanup(); } + @Test + public void NoCleanupAfterMajorCompaction() throws Exception { + Map<String, String> parameters = new HashMap<>(); + + //With no cleanup true + parameters.put("no_cleanup", "true"); + Table t = newTable("default", "dcamc", false, parameters); + + addBaseFile(t, null, 20L, 20); + addDeltaFile(t, null, 21L, 22L, 2); + addDeltaFile(t, null, 23L, 24L, 2); + addBaseFile(t, null, 25L, 25); + + burnThroughTransactions("default", "dcamc", 25); + + CompactionRequest rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR); + compactInTxn(rqst); + + startCleaner(); + // Check there are no compactions requests left. + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals(1, rsp.getCompactsSize()); + Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState()); + + // Check that the files are not removed + List<Path> paths = getDirectories(conf, t, null); + Assert.assertEquals(4, paths.size()); + + //With no clean up false + t = ms.getTable(new GetTableRequest("default", "dcamc")); + t.getParameters().put("no_cleanup", "false"); + ms.alter_table("default", "dcamc", t); + rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR); + compactInTxn(rqst); + + startCleaner(); + // Check there are no compactions requests left. + rsp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals(2, rsp.getCompactsSize()); + Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState()); + + // Check that the files are not removed + paths = getDirectories(conf, t, null); + Assert.assertEquals(1, paths.size()); + Assert.assertEquals("base_25", paths.get(0).getName()); + } + + @Test + public void noCleanupAfterMinorCompactionOnPartition() throws Exception { + Map<String, String> parameters = new HashMap<>(); + parameters.put("NO_CLEANUP", "True"); + Table t = newTable("default", "dcamicop", true); + Partition p = newPartition(t, "today", null, parameters); + + addBaseFile(t, p, 20L, 20); + addDeltaFile(t, p, 21L, 22L, 2); + addDeltaFile(t, p, 23L, 24L, 2); + addDeltaFile(t, p, 21L, 24L, 4); + + burnThroughTransactions("default", "dcamicop", 25); + + CompactionRequest rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR); + rqst.setPartitionname("ds=today"); + compactInTxn(rqst); + + startCleaner(); + + // Check there are no compactions requests left. + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals(1, rsp.getCompactsSize()); + Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState()); + + // Check that the files are not removed + List<Path> paths = getDirectories(conf, t, p); + Assert.assertEquals(4, paths.size()); + + // compaction with no cleanup false + ArrayList<String> list = new ArrayList<>(); + list.add("ds=today"); + p = ms.getPartition("default", "dcamicop", "ds=today"); + p.getParameters().put("NO_CLEANUP", "false"); + ms.alter_partition("default", "dcamicop", p); + rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR); + rqst.setPartitionname("ds=today"); + compactInTxn(rqst); + + startCleaner(); + + // Check there are no compactions requests left. + rsp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals(2, rsp.getCompactsSize()); + Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState()); + + // Check that the files are removed + paths = getDirectories(conf, t, p); + Assert.assertEquals(2, paths.size()); + boolean sawBase = false, sawDelta = false; + for (Path path : paths) { + if (path.getName().equals("base_20")) { + sawBase = true; + } else if (path.getName().equals(makeDeltaDirNameCompacted(21, 24))) { + sawDelta = true; + } else { + Assert.fail("Unexpected file " + path.getName()); + } + } + Assert.assertTrue(sawBase); + Assert.assertTrue(sawDelta); + } } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp index eee3633..1b981f5 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp @@ -77,6 +77,10 @@ hive_metastoreConstants::hive_metastoreConstants() { PARTITION_TRANSFORM_SPEC = "partition_transform_spec"; + NO_CLEANUP = "no_cleanup"; + + CTAS_LEGACY_CONFIG = "create_table_as_external"; + } }}} // namespace diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h index 284f082..d5f7cdf 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h @@ -48,6 +48,8 @@ class hive_metastoreConstants { std::string JDBC_CONFIG_PREFIX; std::string TABLE_IS_CTAS; std::string PARTITION_TRANSFORM_SPEC; + std::string NO_CLEANUP; + std::string CTAS_LEGACY_CONFIG; }; extern const hive_metastoreConstants g_hive_metastore_constants; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java index 1a42b52..2851b22 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java @@ -75,5 +75,8 @@ package org.apache.hadoop.hive.metastore.api; public static final java.lang.String PARTITION_TRANSFORM_SPEC = "partition_transform_spec"; + public static final java.lang.String NO_CLEANUP = "no_cleanup"; + public static final java.lang.String CTAS_LEGACY_CONFIG = "create_table_as_external"; + } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php index 1335e99..b6f4e80 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php @@ -51,6 +51,8 @@ final class Constant extends \Thrift\Type\TConstant static protected $JDBC_CONFIG_PREFIX; static protected $TABLE_IS_CTAS; static protected $PARTITION_TRANSFORM_SPEC; + static protected $NO_CLEANUP; + static protected $CTAS_LEGACY_CONFIG; protected static function init_DDL_TIME() { @@ -216,4 +218,14 @@ final class Constant extends \Thrift\Type\TConstant { return "partition_transform_spec"; } + + protected static function init_NO_CLEANUP() + { + return "no_cleanup"; + } + + protected static function init_CTAS_LEGACY_CONFIG() + { + return "create_table_as_external"; + } } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py index 9b7332b..ef5a2b1 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py @@ -45,3 +45,5 @@ DRUID_CONFIG_PREFIX = "druid." JDBC_CONFIG_PREFIX = "hive.sql." TABLE_IS_CTAS = "created_with_ctas" PARTITION_TRANSFORM_SPEC = "partition_transform_spec" +NO_CLEANUP = "no_cleanup" +CTAS_LEGACY_CONFIG = "create_table_as_external" diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb index 042e65e..d441f3a 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb @@ -73,3 +73,7 @@ TABLE_IS_CTAS = %q"created_with_ctas" PARTITION_TRANSFORM_SPEC = %q"partition_transform_spec" +NO_CLEANUP = %q"no_cleanup" + +CTAS_LEGACY_CONFIG = %q"create_table_as_external" + diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index cb2f425..48d2bb1 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -1119,4 +1119,12 @@ public class MetaStoreUtils { */ SOURCE, TARGET; } + + public static boolean isNoCleanUpSet(Map<String, String> parameters) { + String noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP); + if (noCleanUp == null) { + noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP.toUpperCase()); + } + return noCleanUp != null && noCleanUp.equalsIgnoreCase("true"); + } } diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index c81ba2e..941fe58 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -3083,3 +3083,5 @@ const string DRUID_CONFIG_PREFIX = "druid.", const string JDBC_CONFIG_PREFIX = "hive.sql.", const string TABLE_IS_CTAS = "created_with_ctas", const string PARTITION_TRANSFORM_SPEC = "partition_transform_spec", +const string NO_CLEANUP = "no_cleanup", +const string CTAS_LEGACY_CONFIG = "create_table_as_external", \ No newline at end of file