This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f3d9b819f3c0 [SPARK-48187][INFRA] Run `docs` only in PR builders and `build_non_ansi` Daily CI f3d9b819f3c0 is described below commit f3d9b819f3c013cd402ed98d01842173c45a5dd6 Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Wed May 8 00:02:44 2024 -0700 [SPARK-48187][INFRA] Run `docs` only in PR builders and `build_non_ansi` Daily CI ### What changes were proposed in this pull request? This PR aims to run `docs` (Documentation Generation) step only in PR builders and `build_non_ansi` Daily CI. To do that, this PR spins off `documentation generation` tasks from `lint` job. ### Why are the changes needed? Currently, Apache Spark CI is running `Documentation Generation` always inside `lint` job. We can take advantage PR Builder and one of Daily CIs. - https://infra.apache.org/github-actions-policy.html ### Does this PR introduce _any_ user-facing change? No because this is an infra update. ### How was this patch tested? Pass the CIs and manual review because PR builders will not be affected by this. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46463 from dongjoon-hyun/SPARK-48187. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .github/workflows/build_and_test.yml | 94 ++++++++++++++++++++++++++++++++++-- .github/workflows/build_non_ansi.yml | 1 + 2 files changed, 90 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 00ba16265dce..bb9f2f9a9603 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -85,6 +85,7 @@ jobs: sparkr=`./dev/is-changed.py -m sparkr` buf=true ui=true + docs=true else pandas=false yarn=false @@ -92,6 +93,7 @@ jobs: sparkr=false buf=false ui=false + docs=false fi build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,protobuf,yarn,connect,sql,hive"` precondition=" @@ -103,6 +105,7 @@ jobs: \"tpcds-1g\": \"false\", \"docker-integration-tests\": \"false\", \"lint\" : \"true\", + \"docs\" : \"$docs\", \"yarn\" : \"$yarn\", \"k8s-integration-tests\" : \"$kubernetes\", \"buf\" : \"$buf\", @@ -621,12 +624,12 @@ jobs: - name: Python CodeGen check run: ./dev/connect-check-protos.py - # Static analysis, and documentation build + # Static analysis lint: needs: [precondition, infra-image] # always run if lint == 'true', even infra-image is skip (such as non-master job) if: (!cancelled()) && fromJson(needs.precondition.outputs.required).lint == 'true' - name: Linters, licenses, dependencies and documentation generation + name: Linters, licenses, and dependencies runs-on: ubuntu-latest timeout-minutes: 180 env: @@ -764,7 +767,90 @@ jobs: Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" - name: Install R linter dependencies and SparkR run: ./R/install-dev.sh - # Should delete this section after SPARK 3.5 EOL. + - name: R linter + run: ./dev/lint-r + + # Documentation build + docs: + needs: [precondition, infra-image] + # always run if lint == 'true', even infra-image is skip (such as non-master job) + if: (!cancelled()) && fromJson(needs.precondition.outputs.required).docs == 'true' + name: Documentation generation + runs-on: ubuntu-latest + timeout-minutes: 180 + env: + LC_ALL: C.UTF-8 + LANG: C.UTF-8 + NOLINT_ON_COMPILE: false + PYSPARK_DRIVER_PYTHON: python3.9 + PYSPARK_PYTHON: python3.9 + GITHUB_PREV_SHA: ${{ github.event.before }} + container: + image: ${{ needs.precondition.outputs.image_url }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktest...@gmail.com' commit -m "Merged commit" --allow-empty + # Cache local repositories. Note that GitHub Actions cache has a 10G limit. + - name: Cache SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + docs-coursier- + - name: Cache Maven local repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: docs-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + docs-maven- + - name: Free up disk space + run: | + if [ -f ./dev/free_disk_space_container ]; then + ./dev/free_disk_space_container + fi + - name: Install Java ${{ inputs.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ inputs.java }} + - name: Install Python dependencies for python linter and documentation generation + if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + run: | + # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 + # See 'ipython_genutils' in SPARK-38517 + # See 'docutils<0.18.0' in SPARK-39421 + python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ + ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ + 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip list - name: Install dependencies for documentation generation for branch-3.4, branch-3.5 if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | @@ -785,8 +871,6 @@ jobs: gem install bundler -v 2.4.22 cd docs bundle install - - name: R linter - run: ./dev/lint-r - name: Run documentation build run: | # We need this link because the jekyll build calls `python`. diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml index 902627690320..30ead890728c 100644 --- a/.github/workflows/build_non_ansi.yml +++ b/.github/workflows/build_non_ansi.yml @@ -41,6 +41,7 @@ jobs: jobs: >- { "build": "true", + "docs": "true", "pyspark": "true", "sparkr": "true", "tpcds-1g": "true", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org