This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch feature-HADOOP-18073-s3a-sdk-upgrade
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to 
refs/heads/feature-HADOOP-18073-s3a-sdk-upgrade by this push:
     new 238ba6dc6f6 HADOOP-18853. Upgrade AWS SDK version to 2.20.28 (#5960)
238ba6dc6f6 is described below

commit 238ba6dc6f60033fda4d63273b56f7d0cd597e81
Author: ahmarsuhail <ahmar.suh...@gmail.com>
AuthorDate: Wed Aug 23 12:02:32 2023 +0100

    HADOOP-18853. Upgrade AWS SDK version to 2.20.28 (#5960)
    
    
    Upgrades the AWS sdk v2 version to 2.20.28
    
    This
    * adds multipart COPY/rename in the java async client
    * removes the aws-crt JAR dependency
    
    Contributed by Ahmar Suhail
---
 LICENSE-binary                                     |  3 +--
 hadoop-project/pom.xml                             | 12 +++--------
 hadoop-tools/hadoop-aws/pom.xml                    |  4 ----
 .../hadoop/fs/s3a/DefaultS3ClientFactory.java      | 11 +++++++++-
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java    |  1 +
 .../org/apache/hadoop/fs/s3a/S3ClientFactory.java  | 24 ++++++++++++++++++++++
 6 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 851d839cdbd..0c8199da1be 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -363,8 +363,7 @@ org.objenesis:objenesis:2.6
 org.xerial.snappy:snappy-java:1.1.10.1
 org.yaml:snakeyaml:2.0
 org.wildfly.openssl:wildfly-openssl:1.1.3.Final
-software.amazon.awssdk:bundle:jar:2.19.12
-software.amazon.awssdk.crt:aws-crt:0.21.0
+software.amazon.awssdk:bundle:jar:2.20.128
 
 
 
--------------------------------------------------------------------------------
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index 809084cb3f7..47fa59cc671 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -184,9 +184,8 @@
     <surefire.fork.timeout>900</surefire.fork.timeout>
     <aws-java-sdk.version>1.12.367</aws-java-sdk.version>
     <hsqldb.version>2.7.1</hsqldb.version>
-    <aws-java-sdk-v2.version>2.19.12</aws-java-sdk-v2.version>
-    <aws.evenstream.version>1.0.1</aws.evenstream.version>
-    <awscrt.version>0.21.0</awscrt.version>
+    <aws-java-sdk-v2.version>2.20.128</aws-java-sdk-v2.version>
+    <aws.eventstream.version>1.0.1</aws.eventstream.version>
     <frontend-maven-plugin.version>1.11.2</frontend-maven-plugin.version>
     <jasmine-maven-plugin.version>2.1</jasmine-maven-plugin.version>
     <phantomjs-maven-plugin.version>0.7</phantomjs-maven-plugin.version>
@@ -1154,12 +1153,7 @@
       <dependency>
         <groupId>software.amazon.eventstream</groupId>
         <artifactId>eventstream</artifactId>
-        <version>${aws.evenstream.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>software.amazon.awssdk.crt</groupId>
-        <artifactId>aws-crt</artifactId>
-        <version>${awscrt.version}</version>
+        <version>${aws.eventstream.version}</version>
       </dependency>
       <dependency>
         <groupId>org.apache.mina</groupId>
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index 3591ab4ea5a..66154e2840e 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -518,10 +518,6 @@
       <artifactId>bundle</artifactId>
       <scope>compile</scope>
     </dependency>
-    <dependency>
-      <groupId>software.amazon.awssdk.crt</groupId>
-      <artifactId>aws-crt</artifactId>
-    </dependency>
     <dependency>
       <groupId>software.amazon.eventstream</groupId>
       <artifactId>eventstream</artifactId>
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
index 1b2c129a642..98c72d27662 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
@@ -37,6 +37,7 @@ import software.amazon.awssdk.services.s3.S3AsyncClient;
 import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
 import software.amazon.awssdk.services.s3.S3Client;
 import software.amazon.awssdk.services.s3.S3Configuration;
+import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration;
 import software.amazon.awssdk.transfer.s3.S3TransferManager;
 
 import org.apache.commons.lang3.StringUtils;
@@ -98,17 +99,25 @@ public class DefaultS3ClientFactory extends Configured
 
     Configuration conf = getConf();
     String bucket = uri.getHost();
+
     NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig
         .createAsyncHttpClientBuilder(conf)
         
.proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, 
bucket));
+
+    MultipartConfiguration multipartConfiguration = 
MultipartConfiguration.builder()
+        .minimumPartSizeInBytes(parameters.getMinimumPartSize())
+        .thresholdInBytes(parameters.getMultiPartThreshold())
+        .build();
+
     return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, 
bucket)
         .httpClientBuilder(httpClientBuilder)
+        .multipartConfiguration(multipartConfiguration)
+        .multipartEnabled(true)
         .build();
   }
 
   @Override
   public S3TransferManager createS3TransferManager(final S3AsyncClient 
s3AsyncClient) {
-
     return S3TransferManager.builder()
         .s3Client(s3AsyncClient)
         .build();
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 305695f2ef5..43f0b9c98cd 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -982,6 +982,7 @@ public class S3AFileSystem extends FileSystem implements 
StreamCapabilities,
         .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, 
DEFAULT_ALLOW_REQUESTER_PAYS))
         .withExecutionInterceptors(auditManager.createExecutionInterceptors())
         .withMinimumPartSize(partSize)
+        .withMultipartThreshold(multiPartThreshold)
         .withTransferManagerExecutor(unboundedThreadPool)
         .withRegion(region);
 
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
index fa2c0769d26..d4504cd08d7 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
@@ -151,6 +151,11 @@ public interface S3ClientFactory {
      */
     private long minimumPartSize;
 
+    /**
+     * Threshold for multipart operations.
+     */
+    private long multiPartThreshold;
+
     /**
      * Executor that the transfer manager will use to execute background tasks.
      */
@@ -337,6 +342,25 @@ public interface S3ClientFactory {
       return this;
     }
 
+    /**
+     * Get the threshold for multipart operations.
+     * @return multipart threshold
+     */
+    public long getMultiPartThreshold() {
+      return multiPartThreshold;
+    }
+
+    /**
+     * Set the threshold for multipart operations.
+     * @param value new value
+     * @return the builder
+     */
+    public S3ClientCreationParameters withMultipartThreshold(
+        final long value) {
+      multiPartThreshold = value;
+      return this;
+    }
+
     /**
      * Get the executor that the transfer manager will use to execute 
background tasks.
      * @return part size


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to