This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8d7adb59105 HIVE-25324: Provide a config to disable
PartitionManagementTask (#5991)
8d7adb59105 is described below
commit 8d7adb59105a0db027ff5bdbf2c077cb3ecf1285
Author: Sai Hemanth Gantasala
<[email protected]>
AuthorDate: Wed Aug 13 04:51:23 2025 -0700
HIVE-25324: Provide a config to disable PartitionManagementTask (#5991)
---
.../hadoop/hive/metastore/conf/MetastoreConf.java | 3 ++-
.../hive/metastore/PartitionManagementTask.java | 8 +++----
.../hive/metastore/leader/HouseKeepingTasks.java | 6 +++--
.../hive/metastore/TestPartitionManagement.java | 26 +++++++++++++++++++++-
4 files changed, 35 insertions(+), 8 deletions(-)
diff --git
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 521cbcc2eac..91e68d7921a 100644
---
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -1265,7 +1265,8 @@ public enum ConfVars {
"Similarly if partition object exists in metastore and partition
location does not exist, partition object\n" +
"will be dropped. The second piece in partition management is retention
period. When 'discover.partition'\n" +
"is set to true and if 'partition.retention.period' table property is
defined, partitions that are older\n" +
- "than the specified retention period will be automatically dropped from
metastore along with the data."),
+ "than the specified retention period will be automatically dropped from
metastore along with the data.\n" +
+ "Set this value to 0 inorder to disable Partition Management Task"),
PARTITION_MANAGEMENT_TABLE_TYPES("metastore.partition.management.table.types",
"metastore.partition.management.table.types",
"MANAGED_TABLE,EXTERNAL_TABLE",
"Comma separated list of table types to use for partition management"),
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
index ca396fad70c..0749985392f 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
@@ -112,10 +112,10 @@ public void run() {
// will not be safe unless synchronized MSC is used. Using
synchronized MSC in multi-threaded context also
// defeats the purpose of thread pooled msck repair.
int threadPoolSize = MetastoreConf.getIntVar(conf,
- MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE);
+ MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE);
final ExecutorService executorService = Executors
- .newFixedThreadPool(Math.min(candidates.size(), threadPoolSize),
- new
ThreadFactoryBuilder().setDaemon(true).setNameFormat("PartitionDiscoveryTask-%d").build());
+ .newFixedThreadPool(Math.min(candidates.size(), threadPoolSize),
+ new
ThreadFactoryBuilder().setDaemon(true).setNameFormat("PartitionDiscoveryTask-%d").build());
CountDownLatch countDownLatch = new CountDownLatch(candidates.size());
LOG.info("Found {} candidate tables for partition discovery",
candidates.size());
setupMsckPathInvalidation();
@@ -123,7 +123,7 @@ public void run() {
for (TableName table : candidates) {
// this always runs in 'sync' mode where partitions can be added and
dropped
MsckInfo msckInfo = new MsckInfo(table.getCat(), table.getDb(),
table.getTable(),
- null, null, true, true, true, -1);
+ null, null, true, true, true, -1);
executorService.submit(new MsckThread(msckInfo, msckConf,
qualifiedTableName, countDownLatch));
}
countDownLatch.await();
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java
index f09be0966ce..3220b56f7d3 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java
@@ -114,8 +114,10 @@ public void takeLeadership(LeaderElection election) throws
Exception {
task.setConf(configuration);
task.enforceMutex(election.enforceMutex());
long freq = task.runFrequency(TimeUnit.MILLISECONDS);
- runningTasks.add(task);
- metastoreTaskThreadPool.getPool().scheduleAtFixedRate(task, freq,
freq, TimeUnit.MILLISECONDS);
+ if (freq > 0) {
+ runningTasks.add(task);
+ metastoreTaskThreadPool.getPool().scheduleAtFixedRate(task, freq,
freq, TimeUnit.MILLISECONDS);
+ }
}
}
diff --git
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
index 2b1f65a6724..e2fd7bf9cc5 100644
---
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
+++
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
@@ -49,6 +49,8 @@
import org.apache.hadoop.hive.metastore.client.builder.TableBuilder;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
+import org.apache.hadoop.hive.metastore.leader.HouseKeepingTasks;
+import org.apache.hadoop.hive.metastore.leader.StaticLeaderElection;
import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
@@ -237,7 +239,7 @@ public void testPartitionDiscoveryDisabledByDefault()
throws TException, IOExcep
}
@Test
- public void testPartitionDiscoveryEnabledBothTableTypes() throws TException,
IOException {
+ public void testPartitionDiscoveryEnabledBothTableTypes() throws Exception {
String dbName = "db2";
String tableName = "tbl2";
Map<String, Column> colMap = buildAllColumns();
@@ -318,6 +320,28 @@ public void testPartitionDiscoveryEnabledBothTableTypes()
throws TException, IOE
runPartitionManagementTask(conf);
partitions = client.listPartitions(dbName, tableName, (short) -1);
assertEquals(4, partitions.size());
+
+ // disable partition management task by default. Currently, there are 4
directories
+ // this test adds two additional paths and verifies that partitions are
not added to
+ // metastore when partition management task is disabled.
+ Assert.assertTrue(fs.mkdirs(new Path(tablePath,
"state=AZ/dt=2025-07-01")));
+ Assert.assertTrue(fs.mkdirs(new Path(tablePath,
"state=NV/dt=2025-07-02")));
+ assertEquals(6, fs.listStatus(tablePath).length);
+
conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_FREQUENCY.getVarname(),
"0");
+ conf.set(MetastoreConf.ConfVars.TASK_THREADS_REMOTE_ONLY.getVarname(),
+ "org.apache.hadoop.hive.metastore.PartitionManagementTask");
+ HouseKeepingTasks listener = new HouseKeepingTasks(conf, true);
+ StaticLeaderElection election = new StaticLeaderElection();
+ election.setName("TestPartitionManagement");
+ listener.takeLeadership(election);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(4, partitions.size());
+
+ // Re-enable PMT and verify 6 partitions
+
conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_FREQUENCY.getVarname(),
"1");
+ runPartitionManagementTask(conf);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(6, partitions.size());
}
@Test