[ https://issues.apache.org/jira/browse/HADOOP-18679?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17841304#comment-17841304 ]
ASF GitHub Bot commented on HADOOP-18679: ----------------------------------------- steveloughran commented on code in PR #6726: URL: https://github.com/apache/hadoop/pull/6726#discussion_r1581288128 ########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java: ########## @@ -17,61 +17,86 @@ */ package org.apache.hadoop.fs; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.util.functional.Tuples; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths; -import static org.apache.hadoop.util.Preconditions.checkArgument; /** * Default implementation of the {@link BulkDelete} interface. */ public class DefaultBulkDeleteOperation implements BulkDelete { - private final int pageSize; + private static Logger LOG = LoggerFactory.getLogger(DefaultBulkDeleteOperation.class); + + /** Default page size for bulk delete. */ + private static final int DEFAULT_PAGE_SIZE = 1; + /** Base path for the bulk delete operation. */ private final Path basePath; + /** Delegate File system make actual delete calls. */ private final FileSystem fs; - public DefaultBulkDeleteOperation(int pageSize, - Path basePath, + public DefaultBulkDeleteOperation(Path basePath, FileSystem fs) { - checkArgument(pageSize == 1, "Page size must be equal to 1"); - this.pageSize = pageSize; this.basePath = requireNonNull(basePath); this.fs = fs; } @Override public int pageSize() { - return pageSize; + return DEFAULT_PAGE_SIZE; } @Override public Path basePath() { return basePath; } + /** + * {@inheritDoc} + */ @Override public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths) throws IOException, IllegalArgumentException { - validateBulkDeletePaths(paths, pageSize, basePath); + validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath); List<Map.Entry<Path, String>> result = new ArrayList<>(); - // this for loop doesn't make sense as pageSize must be 1. - for (Path path : paths) { + if (!paths.isEmpty()) { + // As the page size is always 1, this should be the only one + // path in the collection. + Path pathToDelete = paths.iterator().next(); try { - fs.delete(path, false); - // What to do if this return false? - // I think we should add the path to the result list with value "Not Deleted". - } catch (IOException e) { - result.add(Tuples.pair(path, e.toString())); + boolean deleted = fs.delete(pathToDelete, false); + if (deleted) { + return result; + } else { + try { + FileStatus fileStatus = fs.getFileStatus(pathToDelete); + if (fileStatus.isDirectory()) { + result.add(Tuples.pair(pathToDelete, "Path is a directory")); + } + } catch (FileNotFoundException e) { + // Ignore FNFE and don't add to the result list. + LOG.debug("Couldn't delete {} - does not exist: {}", pathToDelete, e.toString()); + } catch (Exception e) { + LOG.debug("Couldn't delete {} - exception occurred: {}", pathToDelete, e.toString()); + result.add(Tuples.pair(pathToDelete, e.toString())); + } + } + } catch (Exception ex) { Review Comment: make this an IOException ########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java: ########## @@ -17,61 +17,86 @@ */ package org.apache.hadoop.fs; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.util.functional.Tuples; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths; -import static org.apache.hadoop.util.Preconditions.checkArgument; /** * Default implementation of the {@link BulkDelete} interface. */ public class DefaultBulkDeleteOperation implements BulkDelete { - private final int pageSize; + private static Logger LOG = LoggerFactory.getLogger(DefaultBulkDeleteOperation.class); + + /** Default page size for bulk delete. */ + private static final int DEFAULT_PAGE_SIZE = 1; + /** Base path for the bulk delete operation. */ private final Path basePath; + /** Delegate File system make actual delete calls. */ private final FileSystem fs; - public DefaultBulkDeleteOperation(int pageSize, - Path basePath, + public DefaultBulkDeleteOperation(Path basePath, FileSystem fs) { - checkArgument(pageSize == 1, "Page size must be equal to 1"); - this.pageSize = pageSize; this.basePath = requireNonNull(basePath); this.fs = fs; } @Override public int pageSize() { - return pageSize; + return DEFAULT_PAGE_SIZE; } @Override public Path basePath() { return basePath; } + /** + * {@inheritDoc} + */ @Override public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths) throws IOException, IllegalArgumentException { - validateBulkDeletePaths(paths, pageSize, basePath); + validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath); List<Map.Entry<Path, String>> result = new ArrayList<>(); - // this for loop doesn't make sense as pageSize must be 1. - for (Path path : paths) { + if (!paths.isEmpty()) { + // As the page size is always 1, this should be the only one + // path in the collection. + Path pathToDelete = paths.iterator().next(); try { - fs.delete(path, false); - // What to do if this return false? - // I think we should add the path to the result list with value "Not Deleted". - } catch (IOException e) { - result.add(Tuples.pair(path, e.toString())); + boolean deleted = fs.delete(pathToDelete, false); + if (deleted) { + return result; + } else { + try { + FileStatus fileStatus = fs.getFileStatus(pathToDelete); + if (fileStatus.isDirectory()) { + result.add(Tuples.pair(pathToDelete, "Path is a directory")); + } + } catch (FileNotFoundException e) { + // Ignore FNFE and don't add to the result list. + LOG.debug("Couldn't delete {} - does not exist: {}", pathToDelete, e.toString()); + } catch (Exception e) { Review Comment: make IOE ########## hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md: ########## @@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome. ### Availability The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` storage clients -which MAY support the API; it may still be unsupported by the -specific instance. +which is available for all FS via `org.apache.hadoop.fs.DefalutBulkDeleteSource` +Some FS MAY still decide to not support the API by overwriting the `createBulkDelete()` method +with an UnsupportedOperationException. While doing so they must also declare the path Review Comment: use MUST in capitals and recommend that implementations do not do this. ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java: ########## @@ -768,6 +769,11 @@ private void executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); S3AFileSystem fs = getFileSystem(); + if (WrappedIO.bulkDeletePageSize(fs, destDir) == 1) { + String msg = "Skipping as this test requires more than one paths to be deleted in bulk"; Review Comment: nit "path" ########## hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md: ########## @@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome. ### Availability The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` storage clients -which MAY support the API; it may still be unsupported by the -specific instance. +which is available for all FS via `org.apache.hadoop.fs.DefalutBulkDeleteSource` +Some FS MAY still decide to not support the API by overwriting the `createBulkDelete()` method +with an UnsupportedOperationException. While doing so they must also declare the path +capability `fs.capability.bulk.delete` as false. Review Comment: we're going to have to modify that iceberg PoC to check this aren't we? then fail ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java: ########## @@ -85,6 +88,9 @@ public ITestS3AContractBulkDelete(boolean enableMultiObjectDelete) { protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); S3ATestUtils.disableFilesystemCaching(conf); + conf = propagateBucketOptions(conf, getTestBucketName(conf)); + skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE, Review Comment: this should actually be guarded with 'if (enableMultiObjectDelete)' so at least the single entry delete ops are tested with GCS. This does matter as GCS returns 404 on a DELETE nonexistent-path; we need to make sure that doesn't trigger a failure > Add API for bulk/paged object deletion > -------------------------------------- > > Key: HADOOP-18679 > URL: https://issues.apache.org/jira/browse/HADOOP-18679 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/s3 > Affects Versions: 3.3.5 > Reporter: Steve Loughran > Priority: Major > Labels: pull-request-available > > iceberg and hbase could benefit from being able to give a list of individual > files to delete -files which may be scattered round the bucket for better > read peformance. > Add some new optional interface for an object store which allows a caller to > submit a list of paths to files to delete, where > the expectation is > * if a path is a file: delete > * if a path is a dir, outcome undefined > For s3 that'd let us build these into DeleteRequest objects, and submit, > without any probes first. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org