This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f6a59edc79a [SPARK-45719][K8S][TESTS] Upgrade AWS SDK to v2 for 
Kubernetes IT
f6a59edc79a is described below

commit f6a59edc79ab16556093b1f86cbf890717687e1a
Author: Junyu Chen <junyu...@amazon.com>
AuthorDate: Wed Nov 15 14:09:10 2023 -0800

    [SPARK-45719][K8S][TESTS] Upgrade AWS SDK to v2 for Kubernetes IT
    
    ### What changes were proposed in this pull request?
    As Spark is moving to 4.0, one of the major improvement is to upgrade AWS 
SDK to v2,
    as tracked in this parent Jira: 
https://issues.apache.org/jira/browse/SPARK-44124.
    
    Currently, some tests in this module (i.e. DepsTestsSuite) uses S3 client 
which requires
    AWS credentials during initialization.
    
    As part of the SDK upgrade, the main purpose of this PR is to upgrading AWS 
SDK to v2
    for the Kubernetes integration tests module.
    
    ### Why are the changes needed?
    
    As the GA of AWS SDK v2, the SDKv1 has entered maintenance mode where its 
future
    release are only limited to address critical bug and security issues. More 
details
    about the SDK maintenance policy can be found here: 
https://docs.aws.amazon.com/sdkref/latest/guide/maint-policy.html.
    To keep Spark’s dependent softwares up to date, we should consider 
upgrading the SDK to v2.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No because this change only impacts the integration tests codes.
    
    ### How was this patch tested?
    
    The existing integration tests in the k8s integration test module passed
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #43510 from junyuc25/junyuc25/k8s-test.
    
    Authored-by: Junyu Chen <junyu...@amazon.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .github/workflows/build_and_test.yml               |  2 +-
 pom.xml                                            |  1 +
 .../kubernetes/integration-tests/pom.xml           |  6 ++--
 .../k8s/integrationtest/DepsTestsSuite.scala       | 35 +++++++++++++++-------
 4 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index 7c23980d281..25af93af280 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1065,7 +1065,7 @@ jobs:
           kubectl create clusterrolebinding serviceaccounts-cluster-admin 
--clusterrole=cluster-admin --group=system:serviceaccounts || true
           kubectl apply -f 
https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.1/installer/volcano-development.yaml
 || true
           eval $(minikube docker-env)
-          build/sbt -Psparkr -Pkubernetes -Pvolcano 
-Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 
-Dspark.kubernetes.test.executorRequestCores=0.2 
-Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local 
"kubernetes-integration-tests/test"
+          build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano 
-Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 
-Dspark.kubernetes.test.executorRequestCores=0.2 
-Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local 
"kubernetes-integration-tests/test"
       - name: Upload Spark on K8S integration tests log files
         if: ${{ !success() }}
         uses: actions/upload-artifact@v3
diff --git a/pom.xml b/pom.xml
index 14754c0bcaa..f4aeb5d935b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -162,6 +162,7 @@
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->
     <aws.java.sdk.version>1.11.655</aws.java.sdk.version>
+    <aws.java.sdk.v2.version>2.20.160</aws.java.sdk.v2.version>
     <!-- the producer is used in tests -->
     <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version>
     <gcs-connector.version>hadoop3-2.2.17</gcs-connector.version>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml 
b/resource-managers/kubernetes/integration-tests/pom.xml
index c5f55c52d0b..518c5bc2170 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -210,9 +210,9 @@
       </activation>
       <dependencies>
         <dependency>
-          <groupId>com.amazonaws</groupId>
-          <artifactId>aws-java-sdk-bundle</artifactId>
-          <version>1.11.375</version>
+          <groupId>software.amazon.awssdk</groupId>
+          <artifactId>bundle</artifactId>
+          <version>${aws.java.sdk.v2.version}</version>
           <scope>test</scope>
         </dependency>
       </dependencies>
diff --git 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index e4650479b2c..f4e23cf839c 100644
--- 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -17,22 +17,25 @@
 package org.apache.spark.deploy.k8s.integrationtest
 
 import java.io.File
-import java.net.URL
+import java.net.{URI, URL}
 import java.nio.file.Files
 
 import scala.jdk.CollectionConverters._
 
-import com.amazonaws.auth.BasicAWSCredentials
-import com.amazonaws.services.s3.AmazonS3Client
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder
 import org.apache.hadoop.util.VersionInfo
 import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
 import org.scalatest.time.{Minutes, Span}
+import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, 
StaticCredentialsProvider}
+import software.amazon.awssdk.core.sync.RequestBody
+import software.amazon.awssdk.regions.Region
+import software.amazon.awssdk.services.s3.S3Client
+import software.amazon.awssdk.services.s3.model.{CreateBucketRequest, 
PutObjectRequest}
 
 import org.apache.spark.SparkException
 import 
org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, 
FILE_CONTENTS, HOST_PATH}
-import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, 
MinikubeTag, SPARK_PI_MAIN_CLASS, TIMEOUT}
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
 import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
 import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, 
PYSPARK_PYTHON}
@@ -45,6 +48,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: 
KubernetesSuite =>
   val BUCKET = "spark"
   val ACCESS_KEY = "minio"
   val SECRET_KEY = "miniostorage"
+  val REGION = "us-west-2"
 
   private def getMinioContainer(): Container = {
     val envVars = Map (
@@ -302,10 +306,13 @@ private[spark] trait DepsTestsSuite { k8sSuite: 
KubernetesSuite =>
   private def getS3Client(
       endPoint: String,
       accessKey: String = ACCESS_KEY,
-      secretKey: String = SECRET_KEY): AmazonS3Client = {
-    val credentials = new BasicAWSCredentials(accessKey, secretKey)
-    val s3client = new AmazonS3Client(credentials)
-    s3client.setEndpoint(endPoint)
+      secretKey: String = SECRET_KEY): S3Client = {
+    val credentials = AwsBasicCredentials.create(accessKey, secretKey)
+    val s3client = S3Client.builder()
+      .credentialsProvider(StaticCredentialsProvider.create(credentials))
+      .endpointOverride(URI.create(endPoint))
+      .region(Region.of(REGION))
+      .build()
     s3client
   }
 
@@ -313,9 +320,13 @@ private[spark] trait DepsTestsSuite { k8sSuite: 
KubernetesSuite =>
     Eventually.eventually(TIMEOUT, INTERVAL) {
       try {
         val s3client = getS3Client(endPoint, accessKey, secretKey)
-        s3client.createBucket(BUCKET)
+        val createBucketRequest = CreateBucketRequest.builder()
+          .bucket(BUCKET)
+          .build()
+        s3client.createBucket(createBucketRequest)
       } catch {
         case e: Exception =>
+          logError(s"Failed to create bucket $BUCKET", e)
           throw new SparkException(s"Failed to create bucket $BUCKET.", e)
       }
     }
@@ -328,7 +339,11 @@ private[spark] trait DepsTestsSuite { k8sSuite: 
KubernetesSuite =>
     Eventually.eventually(TIMEOUT, INTERVAL) {
       try {
         val s3client = getS3Client(endPoint)
-        s3client.putObject(BUCKET, objectKey, objectContent)
+        val putObjectRequest = PutObjectRequest.builder()
+          .bucket(BUCKET)
+          .key(objectKey)
+          .build()
+        s3client.putObject(putObjectRequest, 
RequestBody.fromString(objectContent))
       } catch {
         case e: Exception =>
           throw new SparkException(s"Failed to create object 
$BUCKET/$objectKey.", e)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to