vinothchandar commented on a change in pull request #2422: URL: https://github.com/apache/hudi/pull/2422#discussion_r554455329
########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -158,8 +159,7 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi * @param newInstantToRetain * @return */ - private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata cleanMetadata, - Option<HoodieInstant> newInstantToRetain) { + private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata cleanMetadata, Option<HoodieInstant> newInstantToRetain) { Review comment: avoid the reformatting? ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -168,10 +168,16 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi cleanMetadata.getEarliestCommitToRetain()) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, newInstantToRetain.get().getTimestamp())).flatMap(instant -> { try { - HoodieCommitMetadata commitMetadata = HoodieCommitMetadata - .fromBytes(hoodieTable.getActiveTimeline().getInstantDetails(instant).get(), - HoodieCommitMetadata.class); - return commitMetadata.getPartitionToWriteStats().keySet().stream(); + if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instant.getAction())) { Review comment: @satishkotha I think we should not tie this to incremental mode of cleaning alone. We need the delete paths even if non-incrementally cleaning. That's why I had implemented this as a standalone in my first commit. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -322,6 +325,20 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi } return deletePaths; } + + private List<CleanFileInfo> getReplacedFilesEligibleToClean(List<String> savepointedFiles, String partitionPath, Option<HoodieInstant> earliestCommitToRetain) { Review comment: do you mean `savepointInstants`? ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -168,10 +168,16 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi cleanMetadata.getEarliestCommitToRetain()) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, newInstantToRetain.get().getTimestamp())).flatMap(instant -> { try { - HoodieCommitMetadata commitMetadata = HoodieCommitMetadata - .fromBytes(hoodieTable.getActiveTimeline().getInstantDetails(instant).get(), - HoodieCommitMetadata.class); - return commitMetadata.getPartitionToWriteStats().keySet().stream(); + if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instant.getAction())) { Review comment: Alternatively, we should look at the full cleaning path and also do the necessary change there. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java ########## @@ -122,6 +122,7 @@ static HoodieRollbackStat mergeRollbackStat(HoodieRollbackStat stat1, HoodieRoll List<ListingBasedRollbackRequest> partitionRollbackRequests = new ArrayList<>(); switch (instantToRollback.getAction()) { case HoodieTimeline.COMMIT_ACTION: + case HoodieTimeline.REPLACE_COMMIT_ACTION: Review comment: this looks like something we should have had from the beginning? ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -168,10 +168,16 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi cleanMetadata.getEarliestCommitToRetain()) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, newInstantToRetain.get().getTimestamp())).flatMap(instant -> { try { - HoodieCommitMetadata commitMetadata = HoodieCommitMetadata - .fromBytes(hoodieTable.getActiveTimeline().getInstantDetails(instant).get(), - HoodieCommitMetadata.class); - return commitMetadata.getPartitionToWriteStats().keySet().stream(); + if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instant.getAction())) { Review comment: @satishkotha I think we should not tie this to incremental mode of cleaning alone. We need the delete paths even if non-incrementally cleaning. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java ########## @@ -168,10 +168,16 @@ public CleanPlanner(HoodieTable<T, I, K, O> hoodieTable, HoodieWriteConfig confi cleanMetadata.getEarliestCommitToRetain()) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, newInstantToRetain.get().getTimestamp())).flatMap(instant -> { try { - HoodieCommitMetadata commitMetadata = HoodieCommitMetadata - .fromBytes(hoodieTable.getActiveTimeline().getInstantDetails(instant).get(), - HoodieCommitMetadata.class); - return commitMetadata.getPartitionToWriteStats().keySet().stream(); + if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(instant.getAction())) { Review comment: I take it back. We return all partition paths otherwise. So its all good. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org