This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new a5e5c7ba1e2 [HUDI-6675] Fix Clean action will delete the whole table 
(#9413)
a5e5c7ba1e2 is described below

commit a5e5c7ba1e2db1b3d8d8fd6da441998d54e57a80
Author: leosanqing <liurongtong...@qq.com>
AuthorDate: Mon Aug 14 11:28:14 2023 +0800

    [HUDI-6675] Fix Clean action will delete the whole table (#9413)
    
    The clean action mistakenly delete the whole table when the table is 
non-partitioned.
    
    ---------
    
    Co-authored-by: Sagar Sumit <sagarsumi...@gmail.com>
---
 .../table/action/clean/CleanActionExecutor.java    | 10 ++++-
 .../java/org/apache/hudi/table/TestCleaner.java    | 51 ++++++++++++++++++++++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index c04f1ba8f21..05e1056324a 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -54,6 +54,8 @@ import java.util.Map;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
+
 public class CleanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, 
K, O, HoodieCleanMetadata> {
 
   private static final long serialVersionUID = 1L;
@@ -144,10 +146,14 @@ public class CleanActionExecutor<T, I, K, O> extends 
BaseActionExecutor<T, I, K,
     Map<String, PartitionCleanStat> partitionCleanStatsMap = 
partitionCleanStats
         .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
 
-    List<String> partitionsToBeDeleted = 
cleanerPlan.getPartitionsToBeDeleted() != null ? 
cleanerPlan.getPartitionsToBeDeleted() : new ArrayList<>();
+    List<String> partitionsToBeDeleted = 
table.getMetaClient().getTableConfig().isTablePartitioned() && 
cleanerPlan.getPartitionsToBeDeleted() != null
+        ? cleanerPlan.getPartitionsToBeDeleted()
+        : new ArrayList<>();
     partitionsToBeDeleted.forEach(entry -> {
       try {
-        deleteFileAndGetResult(table.getMetaClient().getFs(), 
table.getMetaClient().getBasePath() + "/" + entry);
+        if (!isNullOrEmpty(entry)) {
+          deleteFileAndGetResult(table.getMetaClient().getFs(), 
table.getMetaClient().getBasePath() + "/" + entry);
+        }
       } catch (IOException e) {
         LOG.warn("Partition deletion failed " + entry);
       }
diff --git 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index f8d37e859d8..c2aceae0b52 100644
--- 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++ 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -36,6 +36,7 @@ import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -94,6 +95,7 @@ import java.util.stream.Stream;
 
 import scala.Tuple3;
 
+import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.NO_PARTITION_PATH;
 import static 
org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime;
 import static 
org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
@@ -352,6 +354,55 @@ public class TestCleaner extends HoodieCleanerTestBase {
     }
   }
 
+  /**
+   * Test clean non-partitioned table.
+   * This test is to ensure that the clean action does not clean the whole 
table data.
+   */
+  @Test
+  public void testCleanNonPartitionedTable() throws IOException {
+    HoodieWriteConfig writeConfig = getConfigBuilder().withPath(basePath)
+        .withFileSystemViewConfig(new FileSystemViewStorageConfig.Builder()
+            .withEnableBackupForRemoteFileSystemView(false)
+            .build())
+        .withCleanConfig(HoodieCleanConfig.newBuilder()
+            .withAutoClean(false)
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(1)
+            .build())
+        .withEmbeddedTimelineServerEnabled(false).build();
+    // datagen for non-partitioned table
+    initTestDataGenerator(new String[] {NO_PARTITION_PATH});
+    // init non-partitioned table
+    HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE, 
HoodieFileFormat.PARQUET,
+        true, "org.apache.hudi.keygen.NonpartitionedKeyGenerator", true);
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(context, 
writeConfig)) {
+      String instantTime;
+      for (int idx = 0; idx < 3; ++idx) {
+        instantTime = HoodieActiveTimeline.createNewInstantTime();
+        List<HoodieRecord> records = dataGen.generateInserts(instantTime, 1);
+        client.startCommitWithTime(instantTime);
+        client.insert(jsc.parallelize(records, 1), instantTime).collect();
+      }
+
+      instantTime = HoodieActiveTimeline.createNewInstantTime();
+      HoodieTable table = HoodieSparkTable.create(writeConfig, context);
+      Option<HoodieCleanerPlan> cleanPlan = table.scheduleCleaning(context, 
instantTime, Option.empty());
+      assertEquals(cleanPlan.get().getPartitionsToBeDeleted().size(), 0);
+      
assertEquals(cleanPlan.get().getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).size(),
 1);
+      table.getMetaClient().reloadActiveTimeline();
+      String filePathToClean = 
cleanPlan.get().getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).get(0).getFilePath();
+      // clean
+      HoodieCleanMetadata cleanMetadata = table.clean(context, instantTime);
+      // check the cleaned file
+      
assertEquals(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().size(),
 1);
+      
assertTrue(filePathToClean.contains(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().get(0)));
+      // ensure table is not fully cleaned and has a file group
+      assertTrue(FSUtils.isTableExists(basePath, fs));
+      
assertTrue(table.getFileSystemView().getAllFileGroups(NO_PARTITION_PATH).findAny().isPresent());
+    }
+  }
+
   /**
    * Tests no more than 1 clean is scheduled if hoodie.clean.allow.multiple 
config is set to false.
    */

Reply via email to