This is an automated email from the ASF dual-hosted git repository. yikun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark-docker.git
The following commit(s) were added to refs/heads/master by this push: new 3037f75 [SPARK-40783][INFRA] Enable Spark on K8s integration test 3037f75 is described below commit 3037f75a88ca7ea57746c7d1bf49c125a828f56e Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Fri Oct 14 11:57:01 2022 +0800 [SPARK-40783][INFRA] Enable Spark on K8s integration test ### What changes were proposed in this pull request? This patch enable the Spark on K8s integration test: - **scala2.12-java11-python3-ubuntu**: Run scala / PySpark basic test - **scala2.12-java11-ubuntu**: Run scala basic test - **scala2.12-java11-r-ubuntu**: Run scala / SparkR basic test - **scala2.12-java11-python3-r-ubuntu**: Run all K8s integration test Currently, we use the local registry as a bridge between build and test: <img width="646" alt="image" src="https://user-images.githubusercontent.com/1736354/195758243-abfbea7f-05e9-4678-a3a5-cfd38cc1b8f5.png"> - Build: generate the image and push to local registry - Test: load to minikube docker, run K8s test using specific image Due to the multi-platform images cannot be exported with the `docker` export type, the local registry (push) is used here rather than local build (load). Compare to `ghcr` it reduces the network transmition and permission required. Also: - Upgrade `setup-qemu-action` to v2 - Upgrade `setup-buildx-action` to v2 - Remove ununsed `Image digest` step ### Why are the changes needed? To ensure the quality of official dockerfiles. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Closes #9 from Yikun/enable-k8s-it. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Yikun Jiang <yikunk...@gmail.com> --- .github/workflows/main.yml | 142 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7972703..b47245b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,6 +41,15 @@ on: jobs: main: runs-on: ubuntu-latest + # Due to the multi-platform images cannot be exported with the `docker` export type, + # https://github.com/docker/buildx/issues/59 + # So, the local registry (push) is used here rather than local build (load): + # https://github.com/docker/build-push-action/blob/master/docs/advanced/local-registry.md + services: + registry: + image: registry:2 + ports: + - 5000:5000 strategy: matrix: spark_version: @@ -55,29 +64,26 @@ jobs: uses: actions/checkout@v2 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to GHCR - uses: docker/login-action@v2 + uses: docker/setup-buildx-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + # This required by local registry + driver-opts: network=host - name: Generate tags run: | TAG=scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-${{ matrix.image_suffix }} REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - TEST_REPO=ghcr.io/$REPO_OWNER/spark-docker + TEST_REPO=localhost:5000/$REPO_OWNER/spark-docker IMAGE_NAME=spark IMAGE_PATH=${{ matrix.spark_version }}/$TAG UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG + IMAGE_URL=$TEST_REPO/$IMAGE_NAME:$UNIQUE_IMAGE_TAG - # Unique image tag in each version: scala2.12-java11-python3-ubuntu + # Unique image tag in each version: 3.3.0-scala2.12-java11-python3-ubuntu echo "UNIQUE_IMAGE_TAG=${UNIQUE_IMAGE_TAG}" >> $GITHUB_ENV # Test repo: ghcr.io/apache/spark-docker echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV @@ -85,6 +91,8 @@ jobs: echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV # Image dockerfile path: 3.3.0/scala2.12-java11-python3-ubuntu echo "IMAGE_PATH=${IMAGE_PATH}" >> $GITHUB_ENV + # Image URL: ghcr.io/apache/spark-docker/spark:3.3.0-scala2.12-java11-python3-ubuntu + echo "IMAGE_URL=${IMAGE_URL}" >> $GITHUB_ENV - name: Print Image tags run: | @@ -92,13 +100,121 @@ jobs: echo "TEST_REPO: "${TEST_REPO} echo "IMAGE_NAME: "${IMAGE_NAME} echo "IMAGE_PATH: "${IMAGE_PATH} + echo "IMAGE_URL: "${IMAGE_URL} - name: Build and push test image uses: docker/build-push-action@v2 with: context: ${{ env.IMAGE_PATH }} - tags: ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} + tags: ${{ env.IMAGE_URL }} platforms: linux/amd64,linux/arm64 + push: true + + - name: Test - Checkout Spark repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + repository: apache/spark + ref: v${{ matrix.spark_version }} + path: ${{ github.workspace }}/spark + + - name: Test - Cherry pick commits + # Apache Spark enable resource limited k8s IT since v3.3.1, cherry-pick patches for old release + # https://github.com/apache/spark/pull/36087#issuecomment-1251756266 + if: matrix.spark_version == '3.3.0' + working-directory: ${{ github.workspace }}/spark + run: | + # SPARK-38802: Add driverRequestCores/executorRequestCores supported + # https://github.com/apache/spark/commit/83963828b54bffe99527a004057272bc584cbc26 + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' cherry-pick 83963828b54bffe99527a004057272bc584cbc26 + # SPARK-38803: Lower minio cpu to 250m + # https://github.com/apache/spark/commit/5ea2b386eb866e20540660cdb6ed43792cb29969 + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' cherry-pick 5ea2b386eb866e20540660cdb6ed43792cb29969 + + - name: Test - Install Java ${{ inputs.java }} + uses: actions/setup-java@v3 + with: + # This is required after v2, now just keep same distribution with v1 + # https://github.com/actions/setup-java/releases/tag/v2.0.0 + distribution: 'zulu' + java-version: ${{ matrix.java_version }} + + - name: Test - Cache Scala, SBT and Maven + uses: actions/cache@v3 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }} + + - name: Test - Cache Coursier local repository + uses: actions/cache@v3 + with: + path: ~/.cache/coursier + key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-coursier - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} + - name: Test - Start minikube + run: | + # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ + curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 + sudo install minikube-linux-amd64 /usr/local/bin/minikube + # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic + minikube start --cpus 2 --memory 6144 + + - name: Test - Print K8S pods and nodes info + run: | + kubectl get pods -A + kubectl describe node + + - name: Test - Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited) + working-directory: ${{ github.workspace }}/spark + run: | + kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true + minikube image load ${{ env.IMAGE_URL }} + + eval $(minikube docker-env) + OPTS="-Pkubernetes -Pkubernetes-integration-tests " + OPTS+="-Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 " + OPTS+="-Dspark.kubernetes.test.deployMode=minikube " + OPTS+="-Dspark.kubernetes.test.imageRepo=${TEST_REPO} -Dspark.kubernetes.test.imageTag=${UNIQUE_IMAGE_TAG} " + OPTS+="-Dspark.kubernetes.test.jvmImage=${IMAGE_NAME} " + OPTS+="-Dspark.kubernetes.test.pythonImage=${IMAGE_NAME} " + OPTS+="-Dspark.kubernetes.test.rImage=${IMAGE_NAME} " + + if echo ${{ matrix.image_suffix }} | grep -q "python3-r-ubuntu"; then + # Prepare test jar for client tests + CONTAINER_TMP_NAME=spark-example-image + docker create -ti --name $CONTAINER_TMP_NAME ${{ env.IMAGE_URL }} bash + docker cp $CONTAINER_TMP_NAME:/opt/spark/examples/jars/spark-examples_${{ matrix.scala_version }}-${{ matrix.spark_version }}.jar . + docker rm -f $CONTAINER_TMP_NAME + # Prepare PV test + PVC_TMP_DIR=$(mktemp -d) + export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR + export PVC_TESTS_VM_PATH=$PVC_TMP_DIR + minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 & + # Run all K8s test for all in one image + build/sbt $OPTS 'kubernetes-integration-tests/testOnly' + else + # Run basic test for Scala/PySpark/SparkR image + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"' + + # Run basic test for PySpark image + if echo ${{ matrix.image_suffix }} | grep -q "python"; then + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"' + fi + + # Run basic test for SparkR image + if echo ${{ matrix.image_suffix }} | grep -q "r-"; then + OPTS+="-Psparkr -Dtest.include.tags=r " + build/sbt $OPTS 'kubernetes-integration-tests/testOnly' + fi + fi + + - name: Test - Upload Spark on K8S integration tests log files + if: failure() + uses: actions/upload-artifact@v3 + with: + name: spark-on-kubernetes-it-log + path: "**/target/integration-tests.log" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org