nsivabalan commented on a change in pull request #4016: URL: https://github.com/apache/hudi/pull/4016#discussion_r774249257
########## File path: hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java ########## @@ -750,6 +750,59 @@ public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throw assertTrue(testTable.baseFileExists(p0, "00000000000003", file3P0C2)); } + @Test + public void testCleanEmptyInstants() throws Exception { + HoodieWriteConfig config = + HoodieWriteConfig.newBuilder() + .withPath(basePath) + .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()) + .withCompactionConfig(HoodieCompactionConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainFileVersions(1).build()) + .build(); + metaClient = HoodieTableMetaClient.reload(metaClient); + + int commitCount = 20; + int cleanCount = 20; + + int startInstant = 1; + for (int i = 0; i < commitCount; i++, startInstant++) { + String commitTime = makeNewCommitTime(startInstant); + HoodieTestTable.of(metaClient).addCommit(commitTime); + } + + for (int i = 0; i < cleanCount; i++, startInstant++) { + String commitTime = makeNewCommitTime(startInstant); + createCleanMetadata(commitTime + "", false, true); + } + + List<HoodieCleanStat> cleanStats = runCleaner(config); + HoodieActiveTimeline timeline = metaClient.reloadActiveTimeline(); + + assertEquals(0, cleanStats.size(), "Must not clean any files"); + assertEquals(1, timeline.getTimelineOfActions( Review comment: may I know why we expect 1 here. we would have deleted all empty instants right within runcleaner? also, can you help me understand rest of the assertions. not very apparent to me. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java ########## @@ -230,11 +230,15 @@ public HoodieCleanMetadata execute() { .filterInflightsAndRequested().getInstants().collect(Collectors.toList()); if (pendingCleanInstants.size() > 0) { pendingCleanInstants.forEach(hoodieInstant -> { - LOG.info("Finishing previously unfinished cleaner instant=" + hoodieInstant); - try { - cleanMetadataList.add(runPendingClean(table, hoodieInstant)); - } catch (Exception e) { - LOG.warn("Failed to perform previous clean operation, instant: " + hoodieInstant, e); + if (table.getCleanTimeline().isEmpty(hoodieInstant)) { Review comment: if we have taken care of empty instants during planning phase, we should not see empty instants during execution phase right? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org