danielhumanmod commented on code in PR #312:
URL: https://github.com/apache/polaris/pull/312#discussion_r1827219703


##########
polaris-service/src/main/java/org/apache/polaris/service/task/ManifestFileCleanupTaskHandler.java:
##########
@@ -68,58 +70,107 @@ public boolean canHandleTask(TaskEntity task) {
   @Override
   public boolean handleTask(TaskEntity task) {
     ManifestCleanupTask cleanupTask = task.readData(ManifestCleanupTask.class);
-    ManifestFile manifestFile = 
decodeManifestData(cleanupTask.getManifestFileData());
     TableIdentifier tableId = cleanupTask.getTableId();
     try (FileIO authorizedFileIO = fileIOSupplier.apply(task)) {
-
-      // if the file doesn't exist, we assume that another task execution was 
successful, but failed
-      // to drop the task entity. Log a warning and return success
-      if (!TaskUtils.exists(manifestFile.path(), authorizedFileIO)) {
+      if (cleanupTask.getManifestFileData() != null) {
+        ManifestFile manifestFile = 
decodeManifestData(cleanupTask.getManifestFileData());
+        return manifestFileCleanUp(manifestFile, authorizedFileIO, tableId);
+      } else if (cleanupTask.getContentFileBatch() != null) {
+        return contentFileCleanup(cleanupTask.getContentFileBatch(), 
authorizedFileIO, tableId);
+      } else {
         LOGGER
             .atWarn()
-            .addKeyValue("manifestFile", manifestFile.path())
             .addKeyValue("tableId", tableId)
-            .log("Manifest cleanup task scheduled, but manifest file doesn't 
exist");
+            .log("Cleanup task scheduled, but no file inputs were found");
         return true;
       }
+    }
+  }
 
-      ManifestReader<DataFile> dataFiles = ManifestFiles.read(manifestFile, 
authorizedFileIO);
-      List<CompletableFuture<Void>> dataFileDeletes =
-          StreamSupport.stream(
-                  Spliterators.spliteratorUnknownSize(dataFiles.iterator(), 
Spliterator.IMMUTABLE),
-                  false)
-              .map(
-                  file ->
-                      tryDelete(
-                          tableId, authorizedFileIO, manifestFile, 
file.path().toString(), null, 1))
-              .toList();
-      LOGGER.debug(
-          "Scheduled {} data files to be deleted from manifest {}",
-          dataFileDeletes.size(),
-          manifestFile.path());
-      try {
-        // wait for all data files to be deleted, then wait for the manifest 
itself to be deleted
-        
CompletableFuture.allOf(dataFileDeletes.toArray(CompletableFuture[]::new))
-            .thenCompose(
-                (v) -> {
-                  LOGGER
-                      .atInfo()
-                      .addKeyValue("manifestFile", manifestFile.path())
-                      .log("All data files in manifest deleted - deleting 
manifest");
-                  return tryDelete(
-                      tableId, authorizedFileIO, manifestFile, 
manifestFile.path(), null, 1);
-                })
-            .get();
-        return true;
-      } catch (InterruptedException e) {
-        LOGGER.error(
-            "Interrupted exception deleting data files from manifest {}", 
manifestFile.path(), e);
-        throw new RuntimeException(e);
-      } catch (ExecutionException e) {
-        LOGGER.error("Unable to delete data files from manifest {}", 
manifestFile.path(), e);
-        return false;
-      }
+  private boolean manifestFileCleanUp(

Review Comment:
   For manifest and all its data, I just refactored the deletion logic into a 
new method; no other changes were made.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to