steveloughran commented on code in PR #6726: URL: https://github.com/apache/hadoop/pull/6726#discussion_r1581288128
########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java: ########## @@ -17,61 +17,86 @@ */ package org.apache.hadoop.fs; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.util.functional.Tuples; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths; -import static org.apache.hadoop.util.Preconditions.checkArgument; /** * Default implementation of the {@link BulkDelete} interface. */ public class DefaultBulkDeleteOperation implements BulkDelete { - private final int pageSize; + private static Logger LOG = LoggerFactory.getLogger(DefaultBulkDeleteOperation.class); + + /** Default page size for bulk delete. */ + private static final int DEFAULT_PAGE_SIZE = 1; + /** Base path for the bulk delete operation. */ private final Path basePath; + /** Delegate File system make actual delete calls. */ private final FileSystem fs; - public DefaultBulkDeleteOperation(int pageSize, - Path basePath, + public DefaultBulkDeleteOperation(Path basePath, FileSystem fs) { - checkArgument(pageSize == 1, "Page size must be equal to 1"); - this.pageSize = pageSize; this.basePath = requireNonNull(basePath); this.fs = fs; } @Override public int pageSize() { - return pageSize; + return DEFAULT_PAGE_SIZE; } @Override public Path basePath() { return basePath; } + /** + * {@inheritDoc} + */ @Override public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths) throws IOException, IllegalArgumentException { - validateBulkDeletePaths(paths, pageSize, basePath); + validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath); List<Map.Entry<Path, String>> result = new ArrayList<>(); - // this for loop doesn't make sense as pageSize must be 1. - for (Path path : paths) { + if (!paths.isEmpty()) { + // As the page size is always 1, this should be the only one + // path in the collection. + Path pathToDelete = paths.iterator().next(); try { - fs.delete(path, false); - // What to do if this return false? - // I think we should add the path to the result list with value "Not Deleted". - } catch (IOException e) { - result.add(Tuples.pair(path, e.toString())); + boolean deleted = fs.delete(pathToDelete, false); + if (deleted) { + return result; + } else { + try { + FileStatus fileStatus = fs.getFileStatus(pathToDelete); + if (fileStatus.isDirectory()) { + result.add(Tuples.pair(pathToDelete, "Path is a directory")); + } + } catch (FileNotFoundException e) { + // Ignore FNFE and don't add to the result list. + LOG.debug("Couldn't delete {} - does not exist: {}", pathToDelete, e.toString()); + } catch (Exception e) { + LOG.debug("Couldn't delete {} - exception occurred: {}", pathToDelete, e.toString()); + result.add(Tuples.pair(pathToDelete, e.toString())); + } + } + } catch (Exception ex) { Review Comment: make this an IOException ########## hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DefaultBulkDeleteOperation.java: ########## @@ -17,61 +17,86 @@ */ package org.apache.hadoop.fs; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.util.functional.Tuples; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths; -import static org.apache.hadoop.util.Preconditions.checkArgument; /** * Default implementation of the {@link BulkDelete} interface. */ public class DefaultBulkDeleteOperation implements BulkDelete { - private final int pageSize; + private static Logger LOG = LoggerFactory.getLogger(DefaultBulkDeleteOperation.class); + + /** Default page size for bulk delete. */ + private static final int DEFAULT_PAGE_SIZE = 1; + /** Base path for the bulk delete operation. */ private final Path basePath; + /** Delegate File system make actual delete calls. */ private final FileSystem fs; - public DefaultBulkDeleteOperation(int pageSize, - Path basePath, + public DefaultBulkDeleteOperation(Path basePath, FileSystem fs) { - checkArgument(pageSize == 1, "Page size must be equal to 1"); - this.pageSize = pageSize; this.basePath = requireNonNull(basePath); this.fs = fs; } @Override public int pageSize() { - return pageSize; + return DEFAULT_PAGE_SIZE; } @Override public Path basePath() { return basePath; } + /** + * {@inheritDoc} + */ @Override public List<Map.Entry<Path, String>> bulkDelete(Collection<Path> paths) throws IOException, IllegalArgumentException { - validateBulkDeletePaths(paths, pageSize, basePath); + validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath); List<Map.Entry<Path, String>> result = new ArrayList<>(); - // this for loop doesn't make sense as pageSize must be 1. - for (Path path : paths) { + if (!paths.isEmpty()) { + // As the page size is always 1, this should be the only one + // path in the collection. + Path pathToDelete = paths.iterator().next(); try { - fs.delete(path, false); - // What to do if this return false? - // I think we should add the path to the result list with value "Not Deleted". - } catch (IOException e) { - result.add(Tuples.pair(path, e.toString())); + boolean deleted = fs.delete(pathToDelete, false); + if (deleted) { + return result; + } else { + try { + FileStatus fileStatus = fs.getFileStatus(pathToDelete); + if (fileStatus.isDirectory()) { + result.add(Tuples.pair(pathToDelete, "Path is a directory")); + } + } catch (FileNotFoundException e) { + // Ignore FNFE and don't add to the result list. + LOG.debug("Couldn't delete {} - does not exist: {}", pathToDelete, e.toString()); + } catch (Exception e) { Review Comment: make IOE ########## hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md: ########## @@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome. ### Availability The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` storage clients -which MAY support the API; it may still be unsupported by the -specific instance. +which is available for all FS via `org.apache.hadoop.fs.DefalutBulkDeleteSource` +Some FS MAY still decide to not support the API by overwriting the `createBulkDelete()` method +with an UnsupportedOperationException. While doing so they must also declare the path Review Comment: use MUST in capitals and recommend that implementations do not do this. ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java: ########## @@ -768,6 +769,11 @@ private void executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); S3AFileSystem fs = getFileSystem(); + if (WrappedIO.bulkDeletePageSize(fs, destDir) == 1) { + String msg = "Skipping as this test requires more than one paths to be deleted in bulk"; Review Comment: nit "path" ########## hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md: ########## @@ -94,8 +94,10 @@ No other restrictions are placed upon the outcome. ### Availability The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` storage clients -which MAY support the API; it may still be unsupported by the -specific instance. +which is available for all FS via `org.apache.hadoop.fs.DefalutBulkDeleteSource` +Some FS MAY still decide to not support the API by overwriting the `createBulkDelete()` method +with an UnsupportedOperationException. While doing so they must also declare the path +capability `fs.capability.bulk.delete` as false. Review Comment: we're going to have to modify that iceberg PoC to check this aren't we? then fail ########## hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java: ########## @@ -85,6 +88,9 @@ public ITestS3AContractBulkDelete(boolean enableMultiObjectDelete) { protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); S3ATestUtils.disableFilesystemCaching(conf); + conf = propagateBucketOptions(conf, getTestBucketName(conf)); + skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE, Review Comment: this should actually be guarded with 'if (enableMultiObjectDelete)' so at least the single entry delete ops are tested with GCS. This does matter as GCS returns 404 on a DELETE nonexistent-path; we need to make sure that doesn't trigger a failure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org