Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 6a06ed834 -> 0db7176ba
HADOOP-14488. S3Guard: inconsistency injection not handling deleted paths properly. Contributed by Sean Mackrory and Aaron Fabbri. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0db7176b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0db7176b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0db7176b Branch: refs/heads/HADOOP-13345 Commit: 0db7176baaa2a89c61f6f5043ca15c28c3f89831 Parents: 6a06ed8 Author: Sean Mackrory <[email protected]> Authored: Thu Jun 15 09:26:26 2017 -0600 Committer: Sean Mackrory <[email protected]> Committed: Thu Jun 15 12:57:00 2017 -0600 ---------------------------------------------------------------------- .../fs/s3a/InconsistentAmazonS3Client.java | 117 ++++++++++++++++--- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +- .../fs/s3a/ITestS3GuardListConsistency.java | 84 +++++++++++++ 3 files changed, 184 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java index 5b62c66..85f4a2f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java @@ -24,14 +24,18 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.DeleteObjectRequest; +import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import com.amazonaws.services.s3.model.DeleteObjectsResult; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -134,10 +138,24 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { } @Override + public DeleteObjectsResult deleteObjects(DeleteObjectsRequest + deleteObjectsRequest) throws AmazonClientException, + AmazonServiceException + { + for (DeleteObjectsRequest.KeyVersion keyVersion : + deleteObjectsRequest.getKeys()) { + registerDeleteObject(keyVersion.getKey(), deleteObjectsRequest + .getBucketName()); + } + return super.deleteObjects(deleteObjectsRequest); + } + + @Override public void deleteObject(DeleteObjectRequest deleteObjectRequest) throws AmazonClientException, AmazonServiceException { - LOG.debug("key {}", deleteObjectRequest.getKey()); - registerDeleteObject(deleteObjectRequest); + String key = deleteObjectRequest.getKey(); + LOG.debug("key {}", key); + registerDeleteObject(key, deleteObjectRequest.getBucketName()); super.deleteObject(deleteObjectRequest); } @@ -161,38 +179,100 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { return listing; } - private boolean addIfNotPresent(List<S3ObjectSummary> list, + private void addSummaryIfNotPresent(List<S3ObjectSummary> list, S3ObjectSummary item) { // Behavior of S3ObjectSummary String key = item.getKey(); for (S3ObjectSummary member : list) { if (member.getKey().equals(key)) { - return false; + return; + } + } + list.add(item); + } + + /** + * Add prefix of child to given list. The added prefix will be equal to + * ancestor plus one directory past ancestor. e.g.: + * if ancestor is "/a/b/c" and child is "/a/b/c/d/e/file" then "a/b/c/d" is + * added to list. + * @param prefixes list to add to + * @param ancestor path we are listing in + * @param child full path to get prefix from + */ + private void addPrefixIfNotPresent(List<String> prefixes, String ancestor, + String child) { + Path prefixCandidate = new Path(child).getParent(); + Path ancestorPath = new Path(ancestor); + Preconditions.checkArgument(child.startsWith(ancestor), "%s does not " + + "start with %s", child, ancestor); + while (!prefixCandidate.isRoot()) { + Path nextParent = prefixCandidate.getParent(); + if (nextParent.equals(ancestorPath)) { + String prefix = prefixCandidate.toString(); + if (!prefixes.contains(prefix)) { + prefixes.add(prefix); + } + return; + } + prefixCandidate = nextParent; + } + } + + /** + * Checks that the parent key is an ancestor of the child key. + * @param parent key that may be the parent. + * @param child key that may be the child. + * @param recursive if false, only return true for direct children. If + * true, any descendant will count. + * @return true if parent is an ancestor of child + */ + private boolean isDescendant(String parent, String child, boolean recursive) { + if (recursive) { + if (!parent.endsWith("/")) { + parent = parent + "/"; } + return child.startsWith(parent); + } else { + Path actualParentPath = new Path(child).getParent(); + Path expectedParentPath = new Path(parent); + return actualParentPath.equals(expectedParentPath); } - return list.add(item); } + /** + * Simulate eventual consistency of delete for this list operation: Any + * recently-deleted keys will be added. + * @param request List request + * @param rawListing listing returned from underlying S3 + * @return listing with recently-deleted items restored + */ private ObjectListing restoreListObjects(ListObjectsRequest request, - ObjectListing rawListing) { + ObjectListing rawListing) { List<S3ObjectSummary> outputList = rawListing.getObjectSummaries(); List<String> outputPrefixes = rawListing.getCommonPrefixes(); + // recursive list has no delimiter, returns everything that matches a + // prefix. + boolean recursiveObjectList = !("/".equals(request.getDelimiter())); + + // Go through all deleted keys for (String key : new HashSet<>(delayedDeletes.keySet())) { Delete delete = delayedDeletes.get(key); if (isKeyDelayed(delete.time(), key)) { - // TODO this works fine for flat directories but: - // if you have a delayed key /a/b/c/d and you are listing /a/b, - // this incorrectly will add /a/b/c/d to the listing for b - if (key.startsWith(request.getPrefix())) { - if (delete.summary == null) { - if (!outputPrefixes.contains(key)) { - outputPrefixes.add(key); - } - } else { - addIfNotPresent(outputList, delete.summary()); + if (isDescendant(request.getPrefix(), key, recursiveObjectList)) { + if (delete.summary() != null) { + addSummaryIfNotPresent(outputList, delete.summary()); + } + } + // Non-recursive list has delimiter: will return rolled-up prefixes for + // all keys that are not direct children + if (!recursiveObjectList) { + if (isDescendant(request.getPrefix(), key, true)) { + addPrefixIfNotPresent(outputPrefixes, request.getPrefix(), key); } } } else { + // Clean up any expired entries delayedDeletes.remove(key); } } @@ -240,12 +320,11 @@ public class InconsistentAmazonS3Client extends AmazonS3Client { } } - private void registerDeleteObject(DeleteObjectRequest req) { - String key = req.getKey(); + private void registerDeleteObject(String key, String bucket) { if (shouldDelay(key)) { // Record summary so we can add it back for some time post-deletion S3ObjectSummary summary = null; - ObjectListing list = listObjects(req.getBucketName(), key); + ObjectListing list = listObjects(bucket, key); for (S3ObjectSummary result : list.getObjectSummaries()) { if (result.getKey().equals(key)) { summary = result; http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 4eb94ad..51543f8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1595,7 +1595,8 @@ public class S3AFileSystem extends FileSystem { * @param delimiter any delimiter * @return the request */ - private ListObjectsRequest createListObjectsRequest(String key, + @VisibleForTesting + ListObjectsRequest createListObjectsRequest(String key, String delimiter) { ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(bucket); http://git-wip-us.apache.org/repos/asf/hadoop/blob/0db7176b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index e06afd0..b0da172 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -18,7 +18,11 @@ package org.apache.hadoop.fs.s3a; +import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.AmazonS3; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; @@ -34,6 +38,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*; @@ -452,4 +457,83 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { } } + @Test + public void testCommitByRenameOperations() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Assume.assumeTrue(fs.hasMetadataStore()); + Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING); + Path task00 = new Path(work, "task00"); + fs.mkdirs(task00); + String name = "part-00"; + try (FSDataOutputStream out = + fs.create(new Path(task00, name), false)) { + out.writeChars("hello"); + } + for (FileStatus stat : fs.listStatus(task00)) { + fs.rename(stat.getPath(), work); + } + List<FileStatus> files = new ArrayList<>(2); + for (FileStatus stat : fs.listStatus(work)) { + if (stat.isFile()) { + files.add(stat); + } + } + assertFalse("renamed file " + name + " not found in " + work, + files.isEmpty()); + assertEquals("more files found than expected in " + work + + " " + ls(work), 1, files.size()); + FileStatus status = files.get(0); + assertEquals("Wrong filename in " + status, + name, status.getPath().getName()); + } + + @Test + public void testInconsistentS3ClientDeletes() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING); + for (int i = 0; i < 3; i++) { + fs.mkdirs(new Path(root, "dir" + i)); + touch(fs, new Path(root, "file" + i)); + for (int j = 0; j < 3; j++) { + touch(fs, new Path(new Path(root, "dir" + i), "file" + i + "-" + j)); + } + } + Thread.sleep(2 * DEFAULT_DELAY_KEY_MSEC); + + AmazonS3 client = fs.getAmazonS3Client(); + String key = fs.pathToKey(root) + "/"; + + ObjectListing preDeleteDelimited = client.listObjects( + fs.createListObjectsRequest(key, "/")); + ObjectListing preDeleteUndelimited = client.listObjects( + fs.createListObjectsRequest(key, null)); + + fs.delete(root, true); + + ObjectListing postDeleteDelimited = client.listObjects( + fs.createListObjectsRequest(key, "/")); + ObjectListing postDeleteUndelimited = client.listObjects( + fs.createListObjectsRequest(key, null)); + + assertEquals("InconsistentAmazonS3Client added back objects incorrectly " + + "in a non-recursive listing", + preDeleteDelimited.getObjectSummaries().size(), + postDeleteDelimited.getObjectSummaries().size() + ); + assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " + + "in a non-recursive listing", + preDeleteDelimited.getCommonPrefixes().size(), + postDeleteDelimited.getCommonPrefixes().size() + ); + assertEquals("InconsistentAmazonS3Client added back objects incorrectly " + + "in a recursive listing", + preDeleteUndelimited.getObjectSummaries().size(), + postDeleteUndelimited.getObjectSummaries().size() + ); + assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " + + "in a recursive listing", + preDeleteUndelimited.getCommonPrefixes().size(), + postDeleteUndelimited.getCommonPrefixes().size() + ); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
