[ 
https://issues.apache.org/jira/browse/HADOOP-18679?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17841304#comment-17841304
 ] 

ASF GitHub Bot commented on HADOOP-18679:
-----------------------------------------

steveloughran commented on code in PR #6726:
URL: https://github.com/apache/hadoop/pull/6726#discussion_r1581288128


##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
  */
 package org.apache.hadoop.fs;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.util.functional.Tuples;
 
 import static java.util.Objects.requireNonNull;
 import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
 
 /**
  * Default implementation of the {@link BulkDelete} interface.
  */
 public class DefaultBulkDeleteOperation implements BulkDelete {
 
-    private final int pageSize;
+    private static Logger LOG = 
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+    /** Default page size for bulk delete. */
+    private static final int DEFAULT_PAGE_SIZE = 1;
 
+    /** Base path for the bulk delete operation. */
     private final Path basePath;
 
+    /** Delegate File system make actual delete calls. */
     private final FileSystem fs;
 
-    public DefaultBulkDeleteOperation(int pageSize,
-                                      Path basePath,
+    public DefaultBulkDeleteOperation(Path basePath,
                                       FileSystem fs) {
-        checkArgument(pageSize == 1, "Page size must be equal to 1");
-        this.pageSize = pageSize;
         this.basePath = requireNonNull(basePath);
         this.fs = fs;
     }
 
     @Override
     public int pageSize() {
-        return pageSize;
+        return DEFAULT_PAGE_SIZE;
     }
 
     @Override
     public Path basePath() {
         return basePath;
     }
 
+    /**
+     * {@inheritDoc}
+     */
     @Override
     public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
             throws IOException, IllegalArgumentException {
-        validateBulkDeletePaths(paths, pageSize, basePath);
+        validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
         List<Map.Entry<Path, String>> result = new ArrayList<>();
-        // this for loop doesn't make sense as pageSize must be 1.
-        for (Path path : paths) {
+        if (!paths.isEmpty()) {
+            // As the page size is always 1, this should be the only one
+            // path in the collection.
+            Path pathToDelete = paths.iterator().next();
             try {
-                fs.delete(path, false);
-                // What to do if this return false?
-                // I think we should add the path to the result list with 
value "Not Deleted".
-            } catch (IOException e) {
-                result.add(Tuples.pair(path, e.toString()));
+                boolean deleted = fs.delete(pathToDelete, false);
+                if (deleted) {
+                    return result;
+                } else {
+                    try {
+                        FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+                        if (fileStatus.isDirectory()) {
+                            result.add(Tuples.pair(pathToDelete, "Path is a 
directory"));
+                        }
+                    } catch (FileNotFoundException e) {
+                        // Ignore FNFE and don't add to the result list.
+                        LOG.debug("Couldn't delete {} - does not exist: {}", 
pathToDelete, e.toString());
+                    } catch (Exception e) {
+                        LOG.debug("Couldn't delete {} - exception occurred: 
{}", pathToDelete, e.toString());
+                        result.add(Tuples.pair(pathToDelete, e.toString()));
+                    }
+                }
+            } catch (Exception ex) {

Review Comment:
   make this an IOException



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java:
##########
@@ -17,61 +17,86 @@
  */
 package org.apache.hadoop.fs;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.util.functional.Tuples;
 
 import static java.util.Objects.requireNonNull;
 import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths;
-import static org.apache.hadoop.util.Preconditions.checkArgument;
 
 /**
  * Default implementation of the {@link BulkDelete} interface.
  */
 public class DefaultBulkDeleteOperation implements BulkDelete {
 
-    private final int pageSize;
+    private static Logger LOG = 
LoggerFactory.getLogger(DefaultBulkDeleteOperation.class);
+
+    /** Default page size for bulk delete. */
+    private static final int DEFAULT_PAGE_SIZE = 1;
 
+    /** Base path for the bulk delete operation. */
     private final Path basePath;
 
+    /** Delegate File system make actual delete calls. */
     private final FileSystem fs;
 
-    public DefaultBulkDeleteOperation(int pageSize,
-                                      Path basePath,
+    public DefaultBulkDeleteOperation(Path basePath,
                                       FileSystem fs) {
-        checkArgument(pageSize == 1, "Page size must be equal to 1");
-        this.pageSize = pageSize;
         this.basePath = requireNonNull(basePath);
         this.fs = fs;
     }
 
     @Override
     public int pageSize() {
-        return pageSize;
+        return DEFAULT_PAGE_SIZE;
     }
 
     @Override
     public Path basePath() {
         return basePath;
     }
 
+    /**
+     * {@inheritDoc}
+     */
     @Override
     public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths)
             throws IOException, IllegalArgumentException {
-        validateBulkDeletePaths(paths, pageSize, basePath);
+        validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath);
         List<Map.Entry<Path, String>> result = new ArrayList<>();
-        // this for loop doesn't make sense as pageSize must be 1.
-        for (Path path : paths) {
+        if (!paths.isEmpty()) {
+            // As the page size is always 1, this should be the only one
+            // path in the collection.
+            Path pathToDelete = paths.iterator().next();
             try {
-                fs.delete(path, false);
-                // What to do if this return false?
-                // I think we should add the path to the result list with 
value "Not Deleted".
-            } catch (IOException e) {
-                result.add(Tuples.pair(path, e.toString()));
+                boolean deleted = fs.delete(pathToDelete, false);
+                if (deleted) {
+                    return result;
+                } else {
+                    try {
+                        FileStatus fileStatus = fs.getFileStatus(pathToDelete);
+                        if (fileStatus.isDirectory()) {
+                            result.add(Tuples.pair(pathToDelete, "Path is a 
directory"));
+                        }
+                    } catch (FileNotFoundException e) {
+                        // Ignore FNFE and don't add to the result list.
+                        LOG.debug("Couldn't delete {} - does not exist: {}", 
pathToDelete, e.toString());
+                    } catch (Exception e) {

Review Comment:
   make IOE



##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
 ### Availability
 
 The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` 
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via 
`org.apache.hadoop.fs.DefalutBulkDeleteSource` 
+Some FS MAY still decide to not support the API by overwriting the 
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare 
the path 

Review Comment:
   use MUST in capitals and recommend that implementations do not do this.



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java:
##########
@@ -768,6 +769,11 @@ private void 
executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi
     bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir);
     roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig);
     S3AFileSystem fs = getFileSystem();
+    if (WrappedIO.bulkDeletePageSize(fs, destDir) == 1) {
+      String msg = "Skipping as this test requires more than one paths to be 
deleted in bulk";

Review Comment:
   nit "path"



##########
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md:
##########
@@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome.
 ### Availability
 
 The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` 
storage clients
-which MAY support the API; it may still be unsupported by the
-specific instance.
+which is available for all FS via 
`org.apache.hadoop.fs.DefalutBulkDeleteSource` 
+Some FS MAY still decide to not support the API by overwriting the 
`createBulkDelete()` method
+with an UnsupportedOperationException. While doing so they must also declare 
the path 
+capability `fs.capability.bulk.delete` as false.

Review Comment:
   we're going to have to modify that iceberg PoC to check this aren't we? then 
fail



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java:
##########
@@ -85,6 +88,9 @@ public ITestS3AContractBulkDelete(boolean 
enableMultiObjectDelete) {
     protected Configuration createConfiguration() {
         Configuration conf = super.createConfiguration();
         S3ATestUtils.disableFilesystemCaching(conf);
+        conf = propagateBucketOptions(conf, getTestBucketName(conf));
+        skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE,

Review Comment:
   this should actually be guarded with 'if (enableMultiObjectDelete)' so at 
least the single entry delete ops are tested with GCS. This does matter as GCS 
returns 404 on a DELETE nonexistent-path; we need to make sure that doesn't 
trigger a failure





> Add API for bulk/paged object deletion
> --------------------------------------
>
>                 Key: HADOOP-18679
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18679
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/s3
>    Affects Versions: 3.3.5
>            Reporter: Steve Loughran
>            Priority: Major
>              Labels: pull-request-available
>
> iceberg and hbase could benefit from being able to give a list of individual 
> files to delete -files which may be scattered round the bucket for better 
> read peformance. 
> Add some new optional interface for an object store which allows a caller to 
> submit a list of paths to files to delete, where
> the expectation is
> * if a path is a file: delete
> * if a path is a dir, outcome undefined
> For s3 that'd let us build these into DeleteRequest objects, and submit, 
> without any probes first.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to