This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch feature-HADOOP-18073-s3a-sdk-upgrade in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/feature-HADOOP-18073-s3a-sdk-upgrade by this push: new 238ba6dc6f6 HADOOP-18853. Upgrade AWS SDK version to 2.20.28 (#5960) 238ba6dc6f6 is described below commit 238ba6dc6f60033fda4d63273b56f7d0cd597e81 Author: ahmarsuhail <ahmar.suh...@gmail.com> AuthorDate: Wed Aug 23 12:02:32 2023 +0100 HADOOP-18853. Upgrade AWS SDK version to 2.20.28 (#5960) Upgrades the AWS sdk v2 version to 2.20.28 This * adds multipart COPY/rename in the java async client * removes the aws-crt JAR dependency Contributed by Ahmar Suhail --- LICENSE-binary | 3 +-- hadoop-project/pom.xml | 12 +++-------- hadoop-tools/hadoop-aws/pom.xml | 4 ---- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 11 +++++++++- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 1 + .../org/apache/hadoop/fs/s3a/S3ClientFactory.java | 24 ++++++++++++++++++++++ 6 files changed, 39 insertions(+), 16 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 851d839cdbd..0c8199da1be 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -363,8 +363,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.1 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final -software.amazon.awssdk:bundle:jar:2.19.12 -software.amazon.awssdk.crt:aws-crt:0.21.0 +software.amazon.awssdk:bundle:jar:2.20.128 -------------------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 809084cb3f7..47fa59cc671 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -184,9 +184,8 @@ <surefire.fork.timeout>900</surefire.fork.timeout> <aws-java-sdk.version>1.12.367</aws-java-sdk.version> <hsqldb.version>2.7.1</hsqldb.version> - <aws-java-sdk-v2.version>2.19.12</aws-java-sdk-v2.version> - <aws.evenstream.version>1.0.1</aws.evenstream.version> - <awscrt.version>0.21.0</awscrt.version> + <aws-java-sdk-v2.version>2.20.128</aws-java-sdk-v2.version> + <aws.eventstream.version>1.0.1</aws.eventstream.version> <frontend-maven-plugin.version>1.11.2</frontend-maven-plugin.version> <jasmine-maven-plugin.version>2.1</jasmine-maven-plugin.version> <phantomjs-maven-plugin.version>0.7</phantomjs-maven-plugin.version> @@ -1154,12 +1153,7 @@ <dependency> <groupId>software.amazon.eventstream</groupId> <artifactId>eventstream</artifactId> - <version>${aws.evenstream.version}</version> - </dependency> - <dependency> - <groupId>software.amazon.awssdk.crt</groupId> - <artifactId>aws-crt</artifactId> - <version>${awscrt.version}</version> + <version>${aws.eventstream.version}</version> </dependency> <dependency> <groupId>org.apache.mina</groupId> diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 3591ab4ea5a..66154e2840e 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -518,10 +518,6 @@ <artifactId>bundle</artifactId> <scope>compile</scope> </dependency> - <dependency> - <groupId>software.amazon.awssdk.crt</groupId> - <artifactId>aws-crt</artifactId> - </dependency> <dependency> <groupId>software.amazon.eventstream</groupId> <artifactId>eventstream</artifactId> diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 1b2c129a642..98c72d27662 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -37,6 +37,7 @@ import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3BaseClientBuilder; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.commons.lang3.StringUtils; @@ -98,17 +99,25 @@ public class DefaultS3ClientFactory extends Configured Configuration conf = getConf(); String bucket = uri.getHost(); + NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig .createAsyncHttpClientBuilder(conf) .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket)); + + MultipartConfiguration multipartConfiguration = MultipartConfiguration.builder() + .minimumPartSizeInBytes(parameters.getMinimumPartSize()) + .thresholdInBytes(parameters.getMultiPartThreshold()) + .build(); + return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) .httpClientBuilder(httpClientBuilder) + .multipartConfiguration(multipartConfiguration) + .multipartEnabled(true) .build(); } @Override public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) { - return S3TransferManager.builder() .s3Client(s3AsyncClient) .build(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 305695f2ef5..43f0b9c98cd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -982,6 +982,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) .withExecutionInterceptors(auditManager.createExecutionInterceptors()) .withMinimumPartSize(partSize) + .withMultipartThreshold(multiPartThreshold) .withTransferManagerExecutor(unboundedThreadPool) .withRegion(region); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index fa2c0769d26..d4504cd08d7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -151,6 +151,11 @@ public interface S3ClientFactory { */ private long minimumPartSize; + /** + * Threshold for multipart operations. + */ + private long multiPartThreshold; + /** * Executor that the transfer manager will use to execute background tasks. */ @@ -337,6 +342,25 @@ public interface S3ClientFactory { return this; } + /** + * Get the threshold for multipart operations. + * @return multipart threshold + */ + public long getMultiPartThreshold() { + return multiPartThreshold; + } + + /** + * Set the threshold for multipart operations. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withMultipartThreshold( + final long value) { + multiPartThreshold = value; + return this; + } + /** * Get the executor that the transfer manager will use to execute background tasks. * @return part size --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org