[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-07-14 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r454200493



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,126 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+// If file status was already passed, reuse it.
+final S3AFileStatus fileStatus = status != null
+? status
+: (S3AFileStatus) getFileStatus(path);

Review comment:
   This won't work as explained in 
https://github.com/apache/hadoop/pull/2038#discussion_r450676043





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-07-07 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r450676043



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,114 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);

Review comment:
   If there is an empty directory within a base directory. For example 
directory structure in test 
ITestS3AContractGetFileStatus>AbstractContractGetFileStatusTest.testListFilesEmptyDirectoryRecursive
   There won't be any files thus listing will be empty. 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-06-25 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r445414974



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,114 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);

Review comment:
   We can't do this as we will need a list for an directory which is empty. 
Else we will get FNFE.   





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-06-12 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r439283239



##
File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
##
@@ -168,6 +168,65 @@ public void testCostOfListLocatedStatusOnNonEmptyDir() 
throws Throwable {
 }
   }
 
+  @Test
+  public void testCostOfListFilesOnFile() throws Throwable {
+describe("Performing listFiles() on a file");
+Path file = path(getMethodName() + ".txt");
+S3AFileSystem fs = getFileSystem();
+touch(fs, file);
+resetMetricDiffs();
+fs.listFiles(file, true);
+if (!fs.hasMetadataStore()) {
+  metadataRequests.assertDiffEquals(1);
+} else {
+  if (fs.allowAuthoritative(file)) {
+listRequests.assertDiffEquals(0);
+  } else {
+listRequests.assertDiffEquals(1);
+  }
+}
+  }
+
+  @Test
+  public void testCostOfListFilesOnEmptyDir() throws Throwable {
+describe("Performing listFiles() on an empty dir");
+Path dir = path(getMethodName());
+S3AFileSystem fs = getFileSystem();
+fs.mkdirs(dir);
+resetMetricDiffs();
+fs.listFiles(dir, true);
+if (!fs.hasMetadataStore()) {
+  verifyOperationCount(2, 1);
+} else {
+  if (fs.allowAuthoritative(dir)) {
+verifyOperationCount(0, 0);
+  } else {
+verifyOperationCount(0, 1);
+  }
+}
+  }
+
+  @Test
+  public void testCostOfListFilesOnNonEmptyDir() throws Throwable {

Review comment:
   Done. There are no cost changes.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-06-12 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r439283351



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,114 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);

Review comment:
   Done





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-06-11 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r438621464



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,114 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
+if (fileStatus.isFile()) {
+  return new Listing.SingleStatusRemoteIterator(
+  toLocatedFileStatus(fileStatus));
 }
-return listing.createTombstoneReconcilingIterator(
-listing.createLocatedFileStatusIterator(
-listing.createFileStatusListingIterator(path,
-createListObjectsRequest(key, delimiter),
-ACCEPT_ALL,
-acceptor,
-cachedFilesIterator)),
-collectTombstones ? tombstones : null);
   }
+  // If we have reached here, it means either there are files
+  // in this directory or it is empty.
+  return listFilesAssumingDir;
 } catch (AmazonClientException e) {
   // TODO S3Guard: retry on file not found exception

Review comment:
   I don't 

[GitHub] [hadoop] mukund-thakur commented on a change in pull request #2038: HADOOP-17022 Tune S3AFileSystem.listFiles() api.

2020-06-10 Thread GitBox


mukund-thakur commented on a change in pull request #2038:
URL: https://github.com/apache/hadoop/pull/2038#discussion_r438192115



##
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##
@@ -4181,79 +4181,114 @@ public LocatedFileStatus next() throws IOException {
 Path path = qualify(f);
 LOG.debug("listFiles({}, {})", path, recursive);
 try {
-  // if a status was given, that is used, otherwise
-  // call getFileStatus, which triggers an existence check
-  final S3AFileStatus fileStatus = status != null
-  ? status
-  : (S3AFileStatus) getFileStatus(path);
-  if (fileStatus.isFile()) {
+  // if a status was given and it is a file.
+  if (status != null && status.isFile()) {
 // simple case: File
 LOG.debug("Path is a file");
 return new Listing.SingleStatusRemoteIterator(
-toLocatedFileStatus(fileStatus));
-  } else {
-// directory: do a bulk operation
-String key = maybeAddTrailingSlash(pathToKey(path));
-String delimiter = recursive ? null : "/";
-LOG.debug("Requesting all entries under {} with delimiter '{}'",
-key, delimiter);
-final RemoteIterator cachedFilesIterator;
-final Set tombstones;
-boolean allowAuthoritative = allowAuthoritative(f);
-if (recursive) {
-  final PathMetadata pm = metadataStore.get(path, true);
-  // shouldn't need to check pm.isDeleted() because that will have
-  // been caught by getFileStatus above.
-  MetadataStoreListFilesIterator metadataStoreListFilesIterator =
-  new MetadataStoreListFilesIterator(metadataStore, pm,
-  allowAuthoritative);
-  tombstones = metadataStoreListFilesIterator.listTombstones();
-  // if all of the below is true
-  //  - authoritative access is allowed for this metadatastore for 
this directory,
-  //  - all the directory listings are authoritative on the client
-  //  - the caller does not force non-authoritative access
-  // return the listing without any further s3 access
-  if (!forceNonAuthoritativeMS &&
-  allowAuthoritative &&
-  metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
-S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
-metadataStoreListFilesIterator, tombstones);
-cachedFilesIterator = listing.createProvidedFileStatusIterator(
-statuses, ACCEPT_ALL, acceptor);
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
-  cachedFilesIterator = metadataStoreListFilesIterator;
-} else {
-  DirListingMetadata meta =
-  S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
-  allowAuthoritative);
-  if (meta != null) {
-tombstones = meta.listTombstones();
-  } else {
-tombstones = null;
-  }
-  cachedFilesIterator = listing.createProvidedFileStatusIterator(
-  S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
-  if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
-// metadata listing is authoritative, so return it directly
-return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
-  }
+toLocatedFileStatus(status));
+  }
+  // Assuming the path to be a directory
+  // do a bulk operation.
+  RemoteIterator listFilesAssumingDir =
+  getListFilesAssumingDir(path,
+  recursive,
+  acceptor,
+  collectTombstones,
+  forceNonAuthoritativeMS);
+  // If there are no list entries present, we
+  // fallback to file existence check as the path
+  // can be a file or empty directory.
+  if (!listFilesAssumingDir.hasNext()) {
+final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
+if (fileStatus.isFile()) {
+  return new Listing.SingleStatusRemoteIterator(
+  toLocatedFileStatus(fileStatus));
 }
-return listing.createTombstoneReconcilingIterator(
-listing.createLocatedFileStatusIterator(
-listing.createFileStatusListingIterator(path,
-createListObjectsRequest(key, delimiter),
-ACCEPT_ALL,
-acceptor,
-cachedFilesIterator)),
-collectTombstones ? tombstones : null);
   }
+  // If we have reached here, it means either there are files
+  // in this directory or it is empty.
+  return listFilesAssumingDir;
 } catch (AmazonClientException e) {
   // TODO S3Guard: retry on file not found exception
   throw