steveloughran commented on code in PR #6726:
URL: https://github.com/apache/hadoop/pull/6726#discussion_r1581288128


##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
  */
 package org.apache.hadoop.fs;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.util.functional.Tuples;
 
 import static java.util.Objects.requireNonNull;
 import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
 
 /**
  * Default implementation of the {@link BulkDelete} interface.
  */
 public class DefaultBulkDeleteOperation implements BulkDelete {
 
-    private final int pageSize;
+    private static Logger LOG = 
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+    /** Default page size for bulk delete. */
+    private static final int DEFAULT_PAGE_SIZE = 1;
 
+    /** Base path for the bulk delete operation. */
     private final Path basePath;
 
+    /** Delegate File system make actual delete calls. */
     private final FileSystem fs;
 
-    public DefaultBulkDeleteOperation(int pageSize,
-                                      Path basePath,
+    public DefaultBulkDeleteOperation(Path basePath,
                                       FileSystem fs) {
-        checkArgument(pageSize == 1, "Page size must be equal to 1");
-        this.pageSize = pageSize;
         this.basePath = requireNonNull(basePath);
         this.fs = fs;
     }
 
     @Override
     public int pageSize() {
-        return pageSize;
+        return DEFAULT_PAGE_SIZE;
     }
 
     @Override
     public Path basePath() {
         return basePath;
     }
 
+    /**
+     * {@inheritDoc}
+     */
     @Override
     public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
             throws IOException, IllegalArgumentException {
-        validateBulkDeletePaths(paths, pageSize, basePath);
+        validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
         List<Map.Entry<Path, String>> result = new ArrayList<>();
-        // this for loop doesn't make sense as pageSize must be 1.
-        for (Path path : paths) {
+        if (!paths.isEmpty()) {
+            // As the page size is always 1, this should be the only one
+            // path in the collection.
+            Path pathToDelete = paths.iterator().next();
             try {
-                fs.delete(path, false);
-                // What to do if this return false?
-                // I think we should add the path to the result list with 
value "Not Deleted".
-            } catch (IOException e) {
-                result.add(Tuples.pair(path, e.toString()));
+                boolean deleted = fs.delete(pathToDelete, false);
+                if (deleted) {
+                    return result;
+                } else {
+                    try {
+                        FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+                        if (fileStatus.isDirectory()) {
+                            result.add(Tuples.pair(pathToDelete, "Path is a 
directory"));
+                        }
+                    } catch (FileNotFoundException e) {
+                        // Ignore FNFE and don't add to the result list.
+                        LOG.debug("Couldn't delete {} - does not exist: {}", 
pathToDelete, e.toString());
+                    } catch (Exception e) {
+                        LOG.debug("Couldn't delete {} - exception occurred: 
{}", pathToDelete, e.toString());
+                        result.add(Tuples.pair(pathToDelete, e.toString()));
+                    }
+                }
+            } catch (Exception ex) {

Review Comment:
   make this an IOException



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
  */
 package org.apache.hadoop.fs;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.util.functional.Tuples;
 
 import static java.util.Objects.requireNonNull;
 import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
 
 /**
  * Default implementation of the {@link BulkDelete} interface.
  */
 public class DefaultBulkDeleteOperation implements BulkDelete {
 
-    private final int pageSize;
+    private static Logger LOG = 
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+    /** Default page size for bulk delete. */
+    private static final int DEFAULT_PAGE_SIZE = 1;
 
+    /** Base path for the bulk delete operation. */
     private final Path basePath;
 
+    /** Delegate File system make actual delete calls. */
     private final FileSystem fs;
 
-    public DefaultBulkDeleteOperation(int pageSize,
-                                      Path basePath,
+    public DefaultBulkDeleteOperation(Path basePath,
                                       FileSystem fs) {
-        checkArgument(pageSize == 1, "Page size must be equal to 1");
-        this.pageSize = pageSize;
         this.basePath = requireNonNull(basePath);
         this.fs = fs;
     }
 
     @Override
     public int pageSize() {
-        return pageSize;
+        return DEFAULT_PAGE_SIZE;
     }
 
     @Override
     public Path basePath() {
         return basePath;
     }
 
+    /**
+     * {@inheritDoc}
+     */
     @Override
     public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
             throws IOException, IllegalArgumentException {
-        validateBulkDeletePaths(paths, pageSize, basePath);
+        validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
         List<Map.Entry<Path, String>> result = new ArrayList<>();
-        // this for loop doesn't make sense as pageSize must be 1.
-        for (Path path : paths) {
+        if (!paths.isEmpty()) {
+            // As the page size is always 1, this should be the only one
+            // path in the collection.
+            Path pathToDelete = paths.iterator().next();
             try {
-                fs.delete(path, false);
-                // What to do if this return false?
-                // I think we should add the path to the result list with 
value "Not Deleted".
-            } catch (IOException e) {
-                result.add(Tuples.pair(path, e.toString()));
+                boolean deleted = fs.delete(pathToDelete, false);
+                if (deleted) {
+                    return result;
+                } else {
+                    try {
+                        FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+                        if (fileStatus.isDirectory()) {
+                            result.add(Tuples.pair(pathToDelete, "Path is a 
directory"));
+                        }
+                    } catch (FileNotFoundException e) {
+                        // Ignore FNFE and don't add to the result list.
+                        LOG.debug("Couldn't delete {} - does not exist: {}", 
pathToDelete, e.toString());
+                    } catch (Exception e) {

Review Comment:
   make IOE



##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
 ### Availability
 
 The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` 
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via 
`org.apache.hadoop.fs.DefalutBulkDeleteSource` 
+Some FS MAY still decide to not support the API by overwriting the 
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare 
the path 

Review Comment:
   use MUST in capitals and recommend that implementations do not do this.



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java:
##########
@@ -768,6 +769,11 @@ private void 
executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi
     bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir);
     roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig);
     S3AFileSystem fs = getFileSystem();
+    if (WrappedIO.bulkDeletePageSize(fs, destDir) == 1) {
+      String msg = "Skipping as this test requires more than one paths to be 
deleted in bulk";

Review Comment:
   nit "path"



##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
 ### Availability
 
 The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` 
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via 
`org.apache.hadoop.fs.DefalutBulkDeleteSource` 
+Some FS MAY still decide to not support the API by overwriting the 
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare 
the path 
+capability `fs.capability.bulk.delete` as false.

Review Comment:
   we're going to have to modify that iceberg PoC to check this aren't we? then 
fail



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java:
##########
@@ -85,6 +88,9 @@ public ITestS3AContractBulkDelete(boolean 
enableMultiObjectDelete) {
     protected Configuration createConfiguration() {
         Configuration conf = super.createConfiguration();
         S3ATestUtils.disableFilesystemCaching(conf);
+        conf = propagateBucketOptions(conf, getTestBucketName(conf));
+        skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE,

Review Comment:
   this should actually be guarded with 'if (enableMultiObjectDelete)' so at 
least the single entry delete ops are tested with GCS. This does matter as GCS 
returns 404 on a DELETE nonexistent-path; we need to make sure that doesn't 
trigger a failure



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to