This is an automated email from the ASF dual-hosted git repository. danny0405 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push: new 32adbe4dfb2 [HUDI-6256] Fix the data table archiving and MDT cleaning config conf… (#8792) 32adbe4dfb2 is described below commit 32adbe4dfb2a0976cb312c2fa14eb49f5a29a151 Author: flashJd <jianyong...@163.com> AuthorDate: Fri Jun 2 09:22:17 2023 +0800 [HUDI-6256] Fix the data table archiving and MDT cleaning config conf… (#8792) * Fix the data table archiving and MDT cleaning config conflict * Takes the MDT cleaning num commits as min(3, num_commits_DT), while 3 is the hardcode max cleaning num commits for MDT --------- Co-authored-by: Danny Chan <yuzhao....@gmail.com> --- .../hudi/metadata/HoodieMetadataWriteUtils.java | 2 +- .../functional/TestHoodieBackedMetadata.java | 40 ++++++++++++++++++++++ .../client/functional/TestHoodieMetadataBase.java | 2 +- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 5221f6523b0..df951ff3796 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -93,7 +93,7 @@ public class HoodieMetadataWriteUtils { .withCleanerParallelism(parallelism) .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .withFailedWritesCleaningPolicy(failedWritesCleaningPolicy) - .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED) + .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(), DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)) .build()) // we will trigger archive manually, to ensure only regular writer invokes it .withArchivalConfig(HoodieArchivalConfig.newBuilder() diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index 10b134887c4..b540f97d806 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -538,6 +538,46 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { assertEquals("0000004", metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp()); } + @ParameterizedTest + @EnumSource(HoodieTableType.class) + public void testMetadataArchivalCleanConfig(HoodieTableType tableType) throws Exception { + init(tableType, false); + writeConfig = getWriteConfigBuilder(true, true, false) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .enable(true) + .enableMetrics(false) + .withMaxNumDeltaCommitsBeforeCompaction(1) + .build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .retainCommits(1) + .build()) + .withArchivalConfig(HoodieArchivalConfig.newBuilder() + .archiveCommitsWith(2, 3) + .build()) + .build(); + initWriteConfigAndMetatableWriter(writeConfig, true); + + AtomicInteger commitTime = new AtomicInteger(1); + // Trigger 4 regular writes in data table. + for (int i = 1; i <= 4; i++) { + doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), INSERT); + } + + // The earliest deltacommit in the metadata table should be "0000001", + // and the "00000000000000" init deltacommit should be archived. + HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(metadataTableBasePath).build(); + HoodieActiveTimeline metadataTimeline = metadataMetaClient.reloadActiveTimeline(); + assertEquals("0000001", metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp()); + + getHoodieWriteClient(writeConfig); + // Trigger data table archive, should archive "0000001", "0000002" + archiveDataTable(writeConfig, HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build()); + // Trigger a regular write operation. metadata timeline archival should kick in and catch up with data table. + doWriteOperation(testTable, "000000" + (commitTime.getAndIncrement()), INSERT); + metadataTimeline = metadataMetaClient.reloadActiveTimeline(); + assertEquals("0000003", metadataTimeline.getCommitsTimeline().firstInstant().get().getTimestamp()); + } + @ParameterizedTest @EnumSource(HoodieTableType.class) public void testMetadataInsertUpsertClean(HoodieTableType tableType) throws Exception { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java index 7974d9151a2..a8cd9a37739 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java @@ -400,7 +400,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness { .withCleanerParallelism(parallelism) .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY) - .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED) + .retainCommits(Math.min(writeConfig.getCleanerCommitsRetained(), DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)) .build()) // we will trigger archival manually, to control the instant times .withArchivalConfig(HoodieArchivalConfig.newBuilder()