yihua commented on a change in pull request #3233:
URL: https://github.com/apache/hudi/pull/3233#discussion_r684595766



##########
File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
##########
@@ -601,4 +604,84 @@ public static HoodieWrapperFileSystem getFs(String path, 
SerializableConfigurati
         .filter(fileStatus -> 
!fileStatus.getPath().toString().contains(HoodieTableMetaClient.METAFOLDER_NAME))
         .collect(Collectors.toList());
   }
+
+  /**
+   * Deletes a directory by deleting sub-paths in parallel on the file system.
+   *
+   * @param hoodieEngineContext {@code HoodieEngineContext} instance
+   * @param fs file system
+   * @param dirPath directory path
+   * @param parallelism parallelism to use for sub-paths
+   * @return {@code true} if the directory is delete; {@code false} otherwise.
+   */
+  public static boolean deleteDir(
+      HoodieEngineContext hoodieEngineContext, FileSystem fs, Path dirPath, 
int parallelism) {
+    try {
+      if (fs.exists(dirPath)) {
+        FSUtils.parallelizeSubPathProcess(hoodieEngineContext, fs, dirPath, 
parallelism, e -> true,
+            pairOfSubPathAndConf -> 
deleteSubPath(pairOfSubPathAndConf.getKey(), pairOfSubPathAndConf.getValue())
+        );
+
+        boolean result = fs.delete(dirPath, true);
+        LOG.info("Removed directory at " + dirPath);
+        return result;
+      }
+    } catch (IOException ioe) {
+      throw new HoodieIOException(ioe.getMessage(), ioe);
+    }
+    return false;
+  }
+
+  /**
+   * Processes sub-path in parallel.
+   *
+   * @param hoodieEngineContext {@code HoodieEngineContext} instance
+   * @param fs file system
+   * @param dirPath directory path
+   * @param parallelism parallelism to use for sub-paths
+   * @param subPathPredicate predicate to use to filter sub-paths for 
processing
+   * @param pairFunction actual processing logic for each sub-path
+   * @param <T> type of result to return for each sub-path
+   * @return a map of sub-path to result of the processing
+   */
+  public static <T> Map<String, T> parallelizeSubPathProcess(

Review comment:
       This one is removed from the PR due to `Task not serializable`.  I'll 
add the unit tests once I fix the issue and check it back.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to