This is an automated email from the ASF dual-hosted git repository. changchen pushed a commit to branch feature/41 in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
commit 44fab9302f4f08cfa8ec9e126e13c08d546ee9a2 Author: Chang chen <[email protected]> AuthorDate: Wed Dec 31 16:41:44 2025 +0800 [Feat] Support Spark in install script --- .github/workflows/util/install-spark-resources.sh | 4 + .github/workflows/velox_backend_x86.yml | 106 ++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/.github/workflows/util/install-spark-resources.sh b/.github/workflows/util/install-spark-resources.sh index 4d1dd27a9c..e1fc6a483d 100755 --- a/.github/workflows/util/install-spark-resources.sh +++ b/.github/workflows/util/install-spark-resources.sh @@ -119,6 +119,10 @@ case "$1" in cd ${INSTALL_DIR} && \ install_spark "4.0.1" "3" "2.12" ;; +4.1) + # Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix + cd ${INSTALL_DIR} && \ + install_spark "4.1.0" "3" "2.12" *) echo "Spark version is expected to be specified." exit 1 diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml index 1f3df3eaa8..dbb9ff9c1f 100644 --- a/.github/workflows/velox_backend_x86.yml +++ b/.github/workflows/velox_backend_x86.yml @@ -1483,3 +1483,109 @@ jobs: **/target/*.log **/gluten-ut/**/hs_err_*.log **/gluten-ut/**/core.* + + spark-test-spark41: + needs: build-native-lib-centos-7 + runs-on: ubuntu-22.04 + env: + SPARK_TESTING: true + container: apache/gluten:centos-8-jdk17 + steps: + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v4 + with: + name: velox-native-lib-centos-7-${{github.sha}} + path: ./cpp/build/releases + - name: Download Arrow Jars + uses: actions/download-artifact@v4 + with: + name: arrow-jars-centos-7-${{github.sha}} + path: /root/.m2/repository/org/apache/arrow/ + - name: Prepare + run: | + dnf module -y install python39 && \ + alternatives --set python3 /usr/bin/python3.9 && \ + pip3 install setuptools==77.0.3 && \ + pip3 install pyspark==3.5.5 cython && \ + pip3 install pandas==2.2.3 pyarrow==20.0.0 + - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update + run: | + rm -rf /opt/shims/spark41 + bash .github/workflows/util/install-spark-resources.sh 4.1 + mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13 + - name: Build and Run unit test for Spark 4.1.0 with scala-2.13 (other tests) + run: | + cd $GITHUB_WORKSPACE/ + export SPARK_SCALA_VERSION=2.13 + yum install -y java-17-openjdk-devel + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export PATH=$JAVA_HOME/bin:$PATH + java -version + $MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox \ + -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \ + -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest + - name: Upload test report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ${{ github.job }}-report + path: '**/surefire-reports/TEST-*.xml' + - name: Upload unit tests log files + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ github.job }}-test-log + path: | + **/target/*.log + **/gluten-ut/**/hs_err_*.log + **/gluten-ut/**/core.* + + spark-test-spark41-slow: + needs: build-native-lib-centos-7 + runs-on: ubuntu-22.04 + env: + SPARK_TESTING: true + container: apache/gluten:centos-8-jdk17 + steps: + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v4 + with: + name: velox-native-lib-centos-7-${{github.sha}} + path: ./cpp/build/releases + - name: Download Arrow Jars + uses: actions/download-artifact@v4 + with: + name: arrow-jars-centos-7-${{github.sha}} + path: /root/.m2/repository/org/apache/arrow/ + - name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image update + run: | + rm -rf /opt/shims/spark41 + bash .github/workflows/util/install-spark-resources.sh 4.1 + mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12 /opt/shims/spark41/spark_home/assembly/target/scala-2.13 + - name: Build and Run unit test for Spark 4.0 (slow tests) + run: | + cd $GITHUB_WORKSPACE/ + yum install -y java-17-openjdk-devel + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + export PATH=$JAVA_HOME/bin:$PATH + java -version + $MVN_CMD clean test -Pspark-4.1 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \ + -DargLine="-Dspark.test.home=/opt/shims/spark41/spark_home/" \ + -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + - name: Upload test report + if: always() + uses: actions/upload-artifact@v4 + with: + name: ${{ github.job }}-report + path: '**/surefire-reports/TEST-*.xml' + - name: Upload unit tests log files + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ github.job }}-test-log + path: | + **/target/*.log + **/gluten-ut/**/hs_err_*.log + **/gluten-ut/**/core.* --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
