This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e9abc71ed03 feat(table-services): Add config to filter partitions 
during full clean (#17550)
7e9abc71ed03 is described below

commit 7e9abc71ed03ebebdfee63f7010aed8c6e4aac2a
Author: Prashant Wason <[email protected]>
AuthorDate: Fri Feb 27 15:11:52 2026 -0800

    feat(table-services): Add config to filter partitions during full clean 
(#17550)
    
    When incremental cleaning is disabled, users can now use regex or a
    static list to filter which partitions are cleaned during full clean
    operations. This helps manage memory usage on large tables.
    
    New configs:
    - hoodie.clean.partition.filter.regex: Regex pattern to match partitions
    - hoodie.clean.partition.filter.selected: Comma-separated list of partitions
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 .../org/apache/hudi/config/HoodieCleanConfig.java  | 25 +++++++++++++++++
 .../org/apache/hudi/config/HoodieWriteConfig.java  |  8 ++++++
 .../hudi/table/action/clean/CleanPlanner.java      | 32 +++++++++++++++++++++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java
index 178495b07836..e5a638956575 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java
@@ -212,6 +212,31 @@ public class HoodieCleanConfig extends HoodieConfig {
           + "By using local engine context, file listing is performed on the 
driver, allowing targeted memory scaling. "
           + "When enabled, both non-partitioned datasets and metadata tables 
use the driver for scheduling cleans.");
 
+  private static final String CLEAN_PARTITION_FILTER_REGEX_KEY = 
"hoodie.clean.partition.filter.regex";
+  private static final String CLEAN_PARTITION_FILTER_SELECTED_KEY = 
"hoodie.clean.partition.filter.selected";
+
+  public static final ConfigProperty<String> CLEAN_PARTITION_FILTER_REGEX = 
ConfigProperty
+      .key(CLEAN_PARTITION_FILTER_REGEX_KEY)
+      .noDefaultValue()
+      .withAlternatives("hoodie.cleaner.partition.filter.regex")
+      .markAdvanced()
+      .sinceVersion("1.2.0")
+      .withDocumentation("When incremental clean is disabled, this regex can 
be used to filter the partitions to be cleaned. "
+          + "Only partitions matching this regex pattern will be cleaned. "
+          + "This can be useful for very large tables to avoid OOM issues 
during cleaning. "
+          + "If both this config and " + CLEAN_PARTITION_FILTER_SELECTED_KEY + 
" are set, the selected partitions take precedence.");
+
+  public static final ConfigProperty<String> CLEAN_PARTITION_FILTER_SELECTED = 
ConfigProperty
+      .key(CLEAN_PARTITION_FILTER_SELECTED_KEY)
+      .noDefaultValue()
+      .withAlternatives("hoodie.cleaner.partition.filter.selected")
+      .markAdvanced()
+      .sinceVersion("1.2.0")
+      .withDocumentation("When incremental clean is disabled, this 
comma-separated list of partitions can be used to filter the partitions to be 
cleaned. "
+          + "Only the specified partitions will be cleaned. "
+          + "This can be useful for very large tables to avoid OOM issues 
during cleaning. "
+          + "If both this config and " + CLEAN_PARTITION_FILTER_REGEX_KEY + " 
are set, the selected partitions take precedence.");
+
   /** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
   @Deprecated
   public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 2187d03aa98d..b4b1af1059d9 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -1838,6 +1838,14 @@ public class HoodieWriteConfig extends HoodieConfig {
     return getBoolean(HoodieCleanConfig.CLEANER_INCREMENTAL_MODE_ENABLE);
   }
 
+  public String getCleanerPartitionFilterRegex() {
+    return getString(HoodieCleanConfig.CLEAN_PARTITION_FILTER_REGEX);
+  }
+
+  public String getCleanerPartitionFilterSelected() {
+    return getString(HoodieCleanConfig.CLEAN_PARTITION_FILTER_SELECTED);
+  }
+
   public boolean inlineCompactionEnabled() {
     return getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
   }
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 5b1a76f5da6b..d5d5b5ebabe2 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -254,11 +254,41 @@ public class CleanPlanner<T, I, K, O> implements 
Serializable {
    */
   private List<String> getPartitionPathsForFullCleaning() {
     // Go to brute force mode of scanning all partitions
+    List<String> allPartitionPaths;
     try {
-      return hoodieTable.getTableMetadata().getAllPartitionPaths();
+      allPartitionPaths = 
hoodieTable.getTableMetadata().getAllPartitionPaths();
     } catch (IOException ioe) {
       throw new HoodieIOException("Fetching all partitions failed ", ioe);
     }
+
+    String partitionSelected = config.getCleanerPartitionFilterSelected();
+    String partitionRegex = config.getCleanerPartitionFilterRegex();
+
+    // Return early if no partition filter is configured
+    if (StringUtils.isNullOrEmpty(partitionSelected) && 
StringUtils.isNullOrEmpty(partitionRegex)) {
+      return allPartitionPaths;
+    }
+
+    // Partition filter cannot be used with incremental cleaning mode
+    if (config.incrementalCleanerModeEnabled()) {
+      throw new IllegalArgumentException("Incremental Cleaning mode is 
enabled. Partition filter for clean cannot be used.");
+    }
+
+    // Static list of partitions takes precedence over regex pattern
+    List<String> filteredPartitions;
+    if (!StringUtils.isNullOrEmpty(partitionSelected)) {
+      List<String> selectedPartitions = 
Arrays.asList(partitionSelected.split(","));
+      filteredPartitions = allPartitionPaths.stream()
+          .filter(selectedPartitions::contains)
+          .collect(Collectors.toList());
+      log.info("Restricting partitions to clean using selected list. 
Partitions to clean: {}", filteredPartitions);
+    } else {
+      filteredPartitions = allPartitionPaths.stream()
+          .filter(p -> p.matches(partitionRegex))
+          .collect(Collectors.toList());
+      log.info("Restricting partitions to clean using regex '{}'. Partitions 
to clean: {}", partitionRegex, filteredPartitions);
+    }
+    return filteredPartitions;
   }
 
   /**

Reply via email to