This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 32adbe4dfb2 [HUDI-6256] Fix the data table archiving and MDT cleaning 
config conf… (#8792)
32adbe4dfb2 is described below

commit 32adbe4dfb2a0976cb312c2fa14eb49f5a29a151
Author: flashJd <jianyong...@163.com>
AuthorDate: Fri Jun 2 09:22:17 2023 +0800

    [HUDI-6256] Fix the data table archiving and MDT cleaning config conf… 
(#8792)
    
    * Fix the data table archiving and MDT cleaning config conflict
    * Takes the MDT cleaning num commits as min(3, num_commits_DT), while 3 is 
the hardcode max cleaning num commits for MDT
    
    ---------
    
    Co-authored-by: Danny Chan <yuzhao....@gmail.com>
---
 .../hudi/metadata/HoodieMetadataWriteUtils.java    |  2 +-
 .../functional/TestHoodieBackedMetadata.java       | 40 ++++++++++++++++++++++
 .../client/functional/TestHoodieMetadataBase.java  |  2 +-
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
index 5221f6523b0..df951ff3796 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java
@@ -93,7 +93,7 @@ public class HoodieMetadataWriteUtils {
             .withCleanerParallelism(parallelism)
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
             .withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
-            .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
+            .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(), 
DEFAULT_METADATA_CLEANER_COMMITS_RETAINED))
             .build())
         // we will trigger archive manually, to ensure only regular writer 
invokes it
         .withArchivalConfig(HoodieArchivalConfig.newBuilder()
diff --git 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index 10b134887c4..b540f97d806 100644
--- 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -538,6 +538,46 @@ public class TestHoodieBackedMetadata extends 
TestHoodieMetadataBase {
     assertEquals("0000004", 
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
   }
 
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testMetadataArchivalCleanConfig(HoodieTableType tableType) 
throws Exception {
+    init(tableType, false);
+    writeConfig = getWriteConfigBuilder(true, true, false)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .enableMetrics(false)
+            .withMaxNumDeltaCommitsBeforeCompaction(1)
+            .build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder()
+            .retainCommits(1)
+            .build())
+        .withArchivalConfig(HoodieArchivalConfig.newBuilder()
+            .archiveCommitsWith(2, 3)
+            .build())
+        .build();
+    initWriteConfigAndMetatableWriter(writeConfig, true);
+
+    AtomicInteger commitTime = new AtomicInteger(1);
+    // Trigger 4 regular writes in data table.
+    for (int i = 1; i <= 4; i++) {
+      doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), 
INSERT);
+    }
+
+    // The earliest deltacommit in the metadata table should be "0000001",
+    // and the "00000000000000" init deltacommit should be archived.
+    HoodieTableMetaClient metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build();
+    HoodieActiveTimeline metadataTimeline = 
metadataMetaClient.reloadActiveTimeline();
+    assertEquals("0000001", 
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
+
+    getHoodieWriteClient(writeConfig);
+    // Trigger data table archive, should archive "0000001", "0000002"
+    archiveDataTable(writeConfig, 
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build());
+    // Trigger a regular write operation. metadata timeline archival should 
kick in and catch up with data table.
+    doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), 
INSERT);
+    metadataTimeline = metadataMetaClient.reloadActiveTimeline();
+    assertEquals("0000003", 
metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp());
+  }
+
   @ParameterizedTest
   @EnumSource(HoodieTableType.class)
   public void testMetadataInsertUpsertClean(HoodieTableType tableType) throws 
Exception {
diff --git 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
index 7974d9151a2..a8cd9a37739 100644
--- 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
+++ 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
@@ -400,7 +400,7 @@ public class TestHoodieMetadataBase extends 
HoodieClientTestHarness {
             .withCleanerParallelism(parallelism)
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
             
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
-            .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
+            .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(), 
DEFAULT_METADATA_CLEANER_COMMITS_RETAINED))
             .build())
         // we will trigger archival manually, to control the instant times
         .withArchivalConfig(HoodieArchivalConfig.newBuilder()

Reply via email to