This is an automated email from the ASF dual-hosted git repository.
jackye pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 5c7a5ead85 AWS: Support S3 acceleration mode (#5555)
5c7a5ead85 is described below
commit 5c7a5ead85894e7ed523b6d0bd8953b0f7036cdc
Author: price-qian <[email protected]>
AuthorDate: Thu Aug 18 09:13:22 2022 -0700
AWS: Support S3 acceleration mode (#5555)
---
.../java/org/apache/iceberg/aws/AwsClientFactories.java | 13 ++++++++++++-
.../main/java/org/apache/iceberg/aws/AwsProperties.java | 10 ++++++++++
docs/aws.md | 17 +++++++++++++++++
3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
b/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
index fa15410755..9c8f12c6f5 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/AwsClientFactories.java
@@ -94,6 +94,7 @@ public class AwsClientFactories {
private String s3SessionToken;
private Boolean s3PathStyleAccess;
private Boolean s3UseArnRegionEnabled;
+ private Boolean s3AccelerationEnabled;
private String dynamoDbEndpoint;
private String httpClientType;
@@ -104,7 +105,12 @@ public class AwsClientFactories {
return S3Client.builder()
.httpClientBuilder(configureHttpClientBuilder(httpClientType))
.applyMutation(builder -> configureEndpoint(builder, s3Endpoint))
- .serviceConfiguration(s3Configuration(s3PathStyleAccess,
s3UseArnRegionEnabled))
+ .serviceConfiguration(
+ S3Configuration.builder()
+ .pathStyleAccessEnabled(s3PathStyleAccess)
+ .useArnRegionEnabled(s3UseArnRegionEnabled)
+ .accelerateModeEnabled(s3AccelerationEnabled)
+ .build())
.credentialsProvider(
credentialsProvider(s3AccessKeyId, s3SecretAccessKey,
s3SessionToken))
.build();
@@ -150,6 +156,11 @@ public class AwsClientFactories {
properties,
AwsProperties.S3_USE_ARN_REGION_ENABLED,
AwsProperties.S3_USE_ARN_REGION_ENABLED_DEFAULT);
+ this.s3AccelerationEnabled =
+ PropertyUtil.propertyAsBoolean(
+ properties,
+ AwsProperties.S3_ACCELERATION_ENABLED,
+ AwsProperties.S3_ACCELERATION_ENABLED_DEFAULT);
ValidationException.check(
(s3AccessKeyId == null) == (s3SecretAccessKey == null),
diff --git a/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
b/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
index baf1ed1d2b..48bb1c6ead 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
@@ -374,6 +374,16 @@ public class AwsProperties implements Serializable {
public static final boolean S3_DELETE_ENABLED_DEFAULT = true;
+ /**
+ * Determines if S3 client will use the Acceleration Mode, default to false.
+ *
+ * <p>For more details, see
+ *
https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration.html
+ */
+ public static final String S3_ACCELERATION_ENABLED =
"s3.acceleration-enabled";
+
+ public static final boolean S3_ACCELERATION_ENABLED_DEFAULT = false;
+
/**
* Used by {@link S3FileIO}, prefix used for bucket access point
configuration. To set, we can
* pass a catalog property.
diff --git a/docs/aws.md b/docs/aws.md
index b34488f225..bd35a0bf0b 100644
--- a/docs/aws.md
+++ b/docs/aws.md
@@ -492,6 +492,23 @@ access-point for all S3 operations.
For more details on using access-points, please refer [Using access points
with compatible Amazon S3
operations](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points-usage-examples.html).
+### S3 Acceleration
+
+[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be
used to speed up transfers to and from Amazon S3 by as much as 50-500% for
long-distance transfer of larger objects.
+
+To use S3 Acceleration, we need to set `s3.acceleration-enabled` catalog
property to `true` to enable `S3FileIO` to make accelerated S3 calls.
+
+For example, to use S3 Acceleration with Spark 3.0, you can start the Spark
SQL shell with:
+```
+spark-sql --conf
spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
+ --conf
spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
+ --conf
spark.sql.catalog.my_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog
\
+ --conf
spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
+ --conf spark.sql.catalog.my_catalog.s3.acceleration-enabled=true
+```
+
+For more details on using S3 Acceleration, please refer to [Configuring fast,
secure file transfers using Amazon S3 Transfer
Acceleration](https://docs.aws.amazon.com/AmazonS3/latest/userguide/transfer-acceleration.html).
+
## AWS Client Customization
Many organizations have customized their way of configuring AWS clients with
their own credential provider, access proxy, retry strategy, etc.