This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f6a59edc79a [SPARK-45719][K8S][TESTS] Upgrade AWS SDK to v2 for Kubernetes IT f6a59edc79a is described below commit f6a59edc79ab16556093b1f86cbf890717687e1a Author: Junyu Chen <junyu...@amazon.com> AuthorDate: Wed Nov 15 14:09:10 2023 -0800 [SPARK-45719][K8S][TESTS] Upgrade AWS SDK to v2 for Kubernetes IT ### What changes were proposed in this pull request? As Spark is moving to 4.0, one of the major improvement is to upgrade AWS SDK to v2, as tracked in this parent Jira: https://issues.apache.org/jira/browse/SPARK-44124. Currently, some tests in this module (i.e. DepsTestsSuite) uses S3 client which requires AWS credentials during initialization. As part of the SDK upgrade, the main purpose of this PR is to upgrading AWS SDK to v2 for the Kubernetes integration tests module. ### Why are the changes needed? As the GA of AWS SDK v2, the SDKv1 has entered maintenance mode where its future release are only limited to address critical bug and security issues. More details about the SDK maintenance policy can be found here: https://docs.aws.amazon.com/sdkref/latest/guide/maint-policy.html. To keep Spark’s dependent softwares up to date, we should consider upgrading the SDK to v2. ### Does this PR introduce _any_ user-facing change? No because this change only impacts the integration tests codes. ### How was this patch tested? The existing integration tests in the k8s integration test module passed ### Was this patch authored or co-authored using generative AI tooling? No Closes #43510 from junyuc25/junyuc25/k8s-test. Authored-by: Junyu Chen <junyu...@amazon.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .github/workflows/build_and_test.yml | 2 +- pom.xml | 1 + .../kubernetes/integration-tests/pom.xml | 6 ++-- .../k8s/integrationtest/DepsTestsSuite.scala | 35 +++++++++++++++------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7c23980d281..25af93af280 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1065,7 +1065,7 @@ jobs: kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.1/installer/volcano-development.yaml || true eval $(minikube docker-env) - build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" + build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" - name: Upload Spark on K8S integration tests log files if: ${{ !success() }} uses: actions/upload-artifact@v3 diff --git a/pom.xml b/pom.xml index 14754c0bcaa..f4aeb5d935b 100644 --- a/pom.xml +++ b/pom.xml @@ -162,6 +162,7 @@ <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version> <!-- Should be consistent with Kinesis client dependency --> <aws.java.sdk.version>1.11.655</aws.java.sdk.version> + <aws.java.sdk.v2.version>2.20.160</aws.java.sdk.v2.version> <!-- the producer is used in tests --> <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version> <gcs-connector.version>hadoop3-2.2.17</gcs-connector.version> diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index c5f55c52d0b..518c5bc2170 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -210,9 +210,9 @@ </activation> <dependencies> <dependency> - <groupId>com.amazonaws</groupId> - <artifactId>aws-java-sdk-bundle</artifactId> - <version>1.11.375</version> + <groupId>software.amazon.awssdk</groupId> + <artifactId>bundle</artifactId> + <version>${aws.java.sdk.v2.version}</version> <scope>test</scope> </dependency> </dependencies> diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala index e4650479b2c..f4e23cf839c 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala @@ -17,22 +17,25 @@ package org.apache.spark.deploy.k8s.integrationtest import java.io.File -import java.net.URL +import java.net.{URI, URL} import java.nio.file.Files import scala.jdk.CollectionConverters._ -import com.amazonaws.auth.BasicAWSCredentials -import com.amazonaws.services.s3.AmazonS3Client import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder import org.apache.hadoop.util.VersionInfo import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Span} +import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} +import software.amazon.awssdk.core.sync.RequestBody +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.services.s3.model.{CreateBucketRequest, PutObjectRequest} import org.apache.spark.SparkException import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, FILE_CONTENTS, HOST_PATH} -import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, SPARK_PI_MAIN_CLASS, TIMEOUT} +import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._ import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON} @@ -45,6 +48,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => val BUCKET = "spark" val ACCESS_KEY = "minio" val SECRET_KEY = "miniostorage" + val REGION = "us-west-2" private def getMinioContainer(): Container = { val envVars = Map ( @@ -302,10 +306,13 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => private def getS3Client( endPoint: String, accessKey: String = ACCESS_KEY, - secretKey: String = SECRET_KEY): AmazonS3Client = { - val credentials = new BasicAWSCredentials(accessKey, secretKey) - val s3client = new AmazonS3Client(credentials) - s3client.setEndpoint(endPoint) + secretKey: String = SECRET_KEY): S3Client = { + val credentials = AwsBasicCredentials.create(accessKey, secretKey) + val s3client = S3Client.builder() + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .endpointOverride(URI.create(endPoint)) + .region(Region.of(REGION)) + .build() s3client } @@ -313,9 +320,13 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => Eventually.eventually(TIMEOUT, INTERVAL) { try { val s3client = getS3Client(endPoint, accessKey, secretKey) - s3client.createBucket(BUCKET) + val createBucketRequest = CreateBucketRequest.builder() + .bucket(BUCKET) + .build() + s3client.createBucket(createBucketRequest) } catch { case e: Exception => + logError(s"Failed to create bucket $BUCKET", e) throw new SparkException(s"Failed to create bucket $BUCKET.", e) } } @@ -328,7 +339,11 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => Eventually.eventually(TIMEOUT, INTERVAL) { try { val s3client = getS3Client(endPoint) - s3client.putObject(BUCKET, objectKey, objectContent) + val putObjectRequest = PutObjectRequest.builder() + .bucket(BUCKET) + .key(objectKey) + .build() + s3client.putObject(putObjectRequest, RequestBody.fromString(objectContent)) } catch { case e: Exception => throw new SparkException(s"Failed to create object $BUCKET/$objectKey.", e) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org