This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2be447f89ea8 Revert "[SPARK-48116][INFRA][FOLLOWUP] Simplify the build with fixing the if condition" 2be447f89ea8 is described below commit 2be447f89ea846c10dcd993de74d06f87e61c1f3 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat May 4 14:06:33 2024 +0900 Revert "[SPARK-48116][INFRA][FOLLOWUP] Simplify the build with fixing the if condition" This reverts commit 2cb6ea721fe0c649d70f82d28a5058ae93c20831. --- .github/workflows/build_and_test.yml | 139 ++++++++++++++++++++++++++++++++--- 1 file changed, 128 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 885593b4e34b..8568cd539f03 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -79,7 +79,7 @@ jobs: pyspark=true; sparkr=true; pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` pyspark=`./dev/is-changed.py -m $pyspark_modules` - if [ "${{ github.repository }}" != 'apache/spark' ]; then + if [ "${{ github.repository != 'apache/spark' }}" ]; then pandas=$pyspark else pandas=false @@ -355,6 +355,133 @@ jobs: pyspark-mllib, pyspark-ml, pyspark-ml-connect - >- pyspark-connect + env: + MODULES_TO_TEST: ${{ matrix.modules }} + PYTHON_TO_TEST: 'python3.11' + HADOOP_PROFILE: ${{ inputs.hadoop }} + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + SKIP_UNIDOC: true + SKIP_MIMA: true + SKIP_PACKAGING: true + METASPACE_SIZE: 1g + BRANCH: ${{ inputs.branch }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + # In order to fetch changed files + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' commit -m "Merged commit" --allow-empty + # Cache local repositories. Note that GitHub Actions cache has a 10G limit. + - name: Cache SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + pyspark-coursier- + - name: Free up disk space + shell: 'script -q -e -c "bash {0}"' + run: | + if [ -f ./dev/free_disk_space_container ]; then + ./dev/free_disk_space_container + fi + - name: Install Java ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ matrix.java }} + - name: List Python packages (${{ env.PYTHON_TO_TEST }}) + env: ${{ fromJSON(inputs.envs) }} + shell: 'script -q -e -c "bash {0}"' + run: | + for py in $(echo $PYTHON_TO_TEST | tr "," "\n") + do + echo $py + $py -m pip list + done + - name: Install Conda for pip packaging test + if: contains(matrix.modules, 'pyspark-errors') + run: | + curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + rm miniconda.sh + # Run the tests. + - name: Run tests + env: ${{ fromJSON(inputs.envs) }} + shell: 'script -q -e -c "bash {0}"' + run: | + if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then + export PATH=$PATH:$HOME/miniconda/bin + export SKIP_PACKAGING=false + echo "Python Packaging Tests Enabled!" + fi + if [ ! -z "$PYTHON_TO_TEST" ]; then + ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST" + else + # For branch-3.5 and below, it uses the default Python versions. + ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" + fi + - name: Upload coverage to Codecov + if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' + uses: codecov/codecov-action@v4 + with: + files: ./python/coverage.xml + flags: unittests + name: PySpark + - name: Upload test results to report + env: ${{ fromJSON(inputs.envs) }} + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + env: ${{ fromJSON(inputs.envs) }} + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} + path: "**/target/unit-tests.log" + + pyspark-pandas: + needs: [precondition, infra-image] + # always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job) + if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true' + name: "Build modules: ${{ matrix.modules }}" + runs-on: ubuntu-latest + timeout-minutes: 180 + container: + image: ${{ needs.precondition.outputs.image_url }} + strategy: + fail-fast: false + matrix: + java: + - ${{ inputs.java }} + modules: - >- pyspark-pandas - >- @@ -367,16 +494,6 @@ jobs: pyspark-pandas-connect-part2 - >- pyspark-pandas-connect-part3 - exclude: - # Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job) - # In practice, the build will run in individual PR, but not against the individual commit - # in Apache Spark repository. - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }} - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }} - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }} - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }} - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }} - - modules: ${{ (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }} env: MODULES_TO_TEST: ${{ matrix.modules }} PYTHON_TO_TEST: 'python3.11' --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org