HADOOP-12292. Make use of DeleteObjects optional. (Thomas Demoor via stevel)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/29ae2580 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/29ae2580 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/29ae2580 Branch: refs/heads/yarn-2877 Commit: 29ae25801380b94442253c4202dee782dc4713f5 Parents: fe124da Author: Steve Loughran <ste...@apache.org> Authored: Sat Feb 6 15:05:16 2016 +0000 Committer: Steve Loughran <ste...@apache.org> Committed: Sat Feb 6 15:05:16 2016 +0000 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 ++ .../src/main/resources/core-default.xml | 9 ++++ hadoop-project/pom.xml | 3 +- .../org/apache/hadoop/fs/s3a/Constants.java | 3 ++ .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 57 +++++++++++++------- .../src/site/markdown/tools/hadoop-aws/index.md | 9 ++++ .../hadoop/fs/s3a/scale/S3AScaleTestBase.java | 11 +++- .../fs/s3a/scale/TestS3ADeleteManyFiles.java | 2 +- 8 files changed, 75 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 73eadef..c8effc5 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -265,6 +265,9 @@ Trunk (Unreleased) HADOOP-11828. Implement the Hitchhiker erasure coding algorithm. (Jack Liuquan via zhz) + HADOOP-12292. Make use of DeleteObjects optional. + (Thomas Demoor via stevel) + BUG FIXES HADOOP-12617. SPNEGO authentication request to non-default realm gets http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index ceae3ed..34e1236 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -888,6 +888,15 @@ for ldap providers in the same way as above does. </property> <property> + <name>fs.s3a.multiobjectdelete.enable</name> + <value>true</value> + <description>When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + </description> +</property> + +<property> <name>fs.s3a.acl.default</name> <description>Set a canned ACL for newly created and copied objects. Value may be private, public-read, public-read-write, authenticated-read, log-delivery-write, http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4e9b502..6078573 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -115,6 +115,7 @@ <exec-maven-plugin.version>1.3.1</exec-maven-plugin.version> <make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version> <native-maven-plugin.version>1.0-alpha-8</native-maven-plugin.version> + <surefire.fork.timeout>900</surefire.fork.timeout> </properties> <dependencyManagement> @@ -1129,7 +1130,7 @@ <artifactId>maven-surefire-plugin</artifactId> <configuration> <reuseForks>false</reuseForks> - <forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds> + <forkedProcessTimeoutInSeconds>${surefire.fork.timeout}</forkedProcessTimeoutInSeconds> <argLine>${maven-surefire-plugin.argLine}</argLine> <environmentVariables> <HADOOP_COMMON_HOME>${hadoop.common.build.dir}</HADOOP_COMMON_HOME> http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index fc99cb9..f10f3db 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -79,6 +79,9 @@ public class Constants { public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; + //enable multiobject-delete calls? + public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable"; + // comma separated list of directories public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 71771bd..5ea6bec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -38,6 +38,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.CannedAccessControlList; +import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; @@ -82,6 +83,7 @@ public class S3AFileSystem extends FileSystem { private String bucket; private int maxKeys; private long partSize; + private boolean enableMultiObjectsDelete; private TransferManager transfers; private ExecutorService threadPoolExecutor; private long multiPartThreshold; @@ -200,6 +202,7 @@ public class S3AFileSystem extends FileSystem { partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE); multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); + enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); if (partSize < 5 * 1024 * 1024) { LOG.error(MULTIPART_SIZE + " must be at least 5 MB"); @@ -522,11 +525,7 @@ public class S3AFileSystem extends FileSystem { copyFile(summary.getKey(), newDstKey); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keysToDelete.clear(); + removeKeys(keysToDelete, true); } } @@ -534,11 +533,8 @@ public class S3AFileSystem extends FileSystem { objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { - if (keysToDelete.size() > 0) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + if (!keysToDelete.isEmpty()) { + removeKeys(keysToDelete, false); } break; } @@ -552,6 +548,36 @@ public class S3AFileSystem extends FileSystem { return true; } + /** + * A helper method to delete a list of keys on a s3-backend. + * + * @param keysToDelete collection of keys to delete on the s3-backend + * @param clearKeys clears the keysToDelete-list after processing the list + * when set to true + */ + private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete, + boolean clearKeys) { + if (enableMultiObjectsDelete) { + DeleteObjectsRequest deleteRequest + = new DeleteObjectsRequest(bucket).withKeys(keysToDelete); + s3.deleteObjects(deleteRequest); + statistics.incrementWriteOps(1); + } else { + int writeops = 0; + + for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { + s3.deleteObject( + new DeleteObjectRequest(bucket, keyVersion.getKey())); + writeops++; + } + + statistics.incrementWriteOps(writeops); + } + if (clearKeys) { + keysToDelete.clear(); + } + } + /** Delete a file. * * @param f the path to delete. @@ -626,11 +652,7 @@ public class S3AFileSystem extends FileSystem { } if (keys.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keys.clear(); + removeKeys(keys, true); } } @@ -639,10 +661,7 @@ public class S3AFileSystem extends FileSystem { statistics.incrementReadOps(1); } else { if (!keys.isEmpty()) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + removeKeys(keys, false); } break; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 9fb0f36..73775c8 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -267,6 +267,15 @@ If you do any of these: change your credentials immediately! </property> <property> + <name>fs.s3a.multiobjectdelete.enable</name> + <value>false</value> + <description>When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + </description> + </property> + + <property> <name>fs.s3a.acl.default</name> <description>Set a canned ACL for newly created and copied objects. Value may be private, public-read, public-read-write, authenticated-read, log-delivery-write, http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index e0cbc92..e44a90e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -36,13 +36,21 @@ import static org.junit.Assume.assumeTrue; /** * Base class for scale tests; here is where the common scale configuration - * keys are defined + * keys are defined. */ public class S3AScaleTestBase { public static final String SCALE_TEST = "scale.test."; + + /** + * The number of operations to perform: {@value} + */ public static final String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; + + /** + * The default number of operations to perform: {@value} + */ public static final long DEFAULT_OPERATION_COUNT = 2005; protected S3AFileSystem fs; @@ -71,6 +79,7 @@ public class S3AScaleTestBase { @Before public void setUp() throws Exception { conf = createConfiguration(); + LOG.info("Scale test operation count = {}", getOperationCount()); fs = S3ATestUtils.createTestFileSystem(conf); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java index c913a67..d521ba8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -61,7 +61,7 @@ public class TestS3ADeleteManyFiles extends S3AScaleTestBase { // use Executor to speed up file creation ExecutorService exec = Executors.newFixedThreadPool(16); final ExecutorCompletionService<Boolean> completionService = - new ExecutorCompletionService<Boolean>(exec); + new ExecutorCompletionService<>(exec); try { final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');