This is an automated email from the ASF dual-hosted git repository.

jackye pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 5c7a5ead85 AWS: Support S3 acceleration mode (#5555)
5c7a5ead85 is described below

commit 5c7a5ead85894e7ed523b6d0bd8953b0f7036cdc
Author: price-qian <[email protected]>
AuthorDate: Thu Aug 18 09:13:22 2022 -0700

    AWS: Support S3 acceleration mode (#5555)
---
 .../java/org/apache/iceberg/aws/AwsClientFactories.java | 13 ++++++++++++-
 .../main/java/org/apache/iceberg/aws/AwsProperties.java | 10 ++++++++++
 docs/aws.md                                             | 17 +++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java 
b/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
index fa15410755..9c8f12c6f5 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
@@ -94,6 +94,7 @@ public class AwsClientFactories {
     private String s3SessionToken;
     private Boolean s3PathStyleAccess;
     private Boolean s3UseArnRegionEnabled;
+    private Boolean s3AccelerationEnabled;
     private String dynamoDbEndpoint;
     private String httpClientType;
 
@@ -104,7 +105,12 @@ public class AwsClientFactories {
       return S3Client.builder()
           .httpClientBuilder(configureHttpClientBuilder(httpClientType))
           .applyMutation(builder -> configureEndpoint(builder, s3Endpoint))
-          .serviceConfiguration(s3Configuration(s3PathStyleAccess, 
s3UseArnRegionEnabled))
+          .serviceConfiguration(
+              S3Configuration.builder()
+                  .pathStyleAccessEnabled(s3PathStyleAccess)
+                  .useArnRegionEnabled(s3UseArnRegionEnabled)
+                  .accelerateModeEnabled(s3AccelerationEnabled)
+                  .build())
           .credentialsProvider(
               credentialsProvider(s3AccessKeyId, s3SecretAccessKey, 
s3SessionToken))
           .build();
@@ -150,6 +156,11 @@ public class AwsClientFactories {
               properties,
               AwsProperties.S3_USE_ARN_REGION_ENABLED,
               AwsProperties.S3_USE_ARN_REGION_ENABLED_DEFAULT);
+      this.s3AccelerationEnabled =
+          PropertyUtil.propertyAsBoolean(
+              properties,
+              AwsProperties.S3_ACCELERATION_ENABLED,
+              AwsProperties.S3_ACCELERATION_ENABLED_DEFAULT);
 
       ValidationException.check(
           (s3AccessKeyId == null) == (s3SecretAccessKey == null),
diff --git a/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java 
b/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
index baf1ed1d2b..48bb1c6ead 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
@@ -374,6 +374,16 @@ public class AwsProperties implements Serializable {
 
   public static final boolean S3_DELETE_ENABLED_DEFAULT = true;
 
+  /**
+   * Determines if S3 client will use the Acceleration Mode, default to false.
+   *
+   * <p>For more details, see
+   * 
https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration.html
+   */
+  public static final String S3_ACCELERATION_ENABLED = 
"s3.acceleration-enabled";
+
+  public static final boolean S3_ACCELERATION_ENABLED_DEFAULT = false;
+
   /**
    * Used by {@link S3FileIO}, prefix used for bucket access point 
configuration. To set, we can
    * pass a catalog property.
diff --git a/docs/aws.md b/docs/aws.md
index b34488f225..bd35a0bf0b 100644
--- a/docs/aws.md
+++ b/docs/aws.md
@@ -492,6 +492,23 @@ access-point for all S3 operations.
 
 For more details on using access-points, please refer [Using access points 
with compatible Amazon S3 
operations](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points-usage-examples.html).
 
+### S3 Acceleration
+
+[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be 
used to speed up transfers to and from Amazon S3 by as much as 50-500% for 
long-distance transfer of larger objects.
+
+To use S3 Acceleration, we need to set `s3.acceleration-enabled` catalog 
property to `true` to enable `S3FileIO` to make accelerated S3 calls.
+
+For example, to use S3 Acceleration with Spark 3.0, you can start the Spark 
SQL shell with:
+```
+spark-sql --conf 
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
+    --conf 
spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
+    --conf 
spark.sql.catalog.my_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog
 \
+    --conf 
spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
+    --conf spark.sql.catalog.my_catalog.s3.acceleration-enabled=true
+```
+
+For more details on using S3 Acceleration, please refer to [Configuring fast, 
secure file transfers using Amazon S3 Transfer 
Acceleration](https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration.html).
+
 ## AWS Client Customization
 
 Many organizations have customized their way of configuring AWS clients with 
their own credential provider, access proxy, retry strategy, etc.

Reply via email to