This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new 7942701c3 [CORE] Support JDK17 (#5120) 7942701c3 is described below commit 7942701c3b67c72230f34286f837c3a6f13fd002 Author: Xiduo You <ulyssesyo...@gmail.com> AuthorDate: Wed Mar 27 11:10:11 2024 +0800 [CORE] Support JDK17 (#5120) * Support JDK17 * address comment --------- Co-authored-by: Kent Yao <y...@apache.org> --- .github/workflows/velox_docker.yml | 114 ++++++++++++++++++------------------- docs/developers/NewToGluten.md | 12 ++++ docs/get-started/Velox.md | 28 ++++----- pom.xml | 47 ++++++++++----- tools/gluten-it/pom.xml | 23 +++++++- tools/gluten-it/sbin/gluten-it.sh | 21 ++++++- 6 files changed, 152 insertions(+), 93 deletions(-) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index f2b73e81d..6329750d2 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -73,6 +73,17 @@ jobs: matrix: os: ["ubuntu:20.04", "ubuntu:22.04"] spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"] + java: [ "java-8", "java-17" ] + # Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772 + exclude: + - spark: spark-3.2 + java: java-17 + - spark: spark-3.4 + java: java-17 + - spark: spark-3.5 + java: java-17 + - os: ubuntu:22.04 + java: java-17 runs-on: ubuntu-20.04 container: ${{ matrix.os }} steps: @@ -84,69 +95,45 @@ jobs: path: ./cpp/build/releases - name: Setup java and maven run: | - apt-get update && \ - apt-get install -y openjdk-8-jdk maven && \ + if [ "${{ matrix.java }}" = "java-17" ]; then + apt-get update && apt-get install -y openjdk-17-jdk maven + else + apt-get update && apt-get install -y openjdk-8-jdk maven + fi apt remove openjdk-11* -y - - name: Build for Spark ${{ matrix.spark }} - run: | - cd $GITHUB_WORKSPACE/ && \ - mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests - - name: Build and run TPCH/DS ${{ matrix.spark }} - run: | - cd $GITHUB_WORKSPACE/tools/gluten-it && \ - mvn clean install -P${{ matrix.spark }} \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 - - - run-tpc-test-centos7: - needs: build-native-lib - strategy: - fail-fast: false - matrix: - spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"] - runs-on: ubuntu-20.04 - container: centos:7 - steps: - - uses: actions/checkout@v2 - - name: Download All Artifacts - uses: actions/download-artifact@v2 - with: - name: velox-native-lib-${{github.sha}} - path: ./cpp/build/releases - - name: Setup java and maven - run: | - yum update -y && yum install -y java-1.8.0-openjdk-devel wget - wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz - tar -xvf apache-maven-3.8.8-bin.tar.gz - mv apache-maven-3.8.8 /usr/lib/maven - - name: Build for Spark ${{ matrix.spark }} + - name: Build and run TPCH/DS run: | cd $GITHUB_WORKSPACE/ - export MAVEN_HOME=/usr/lib/maven - export PATH=${PATH}:${MAVEN_HOME}/bin - mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests - - name: Build and run TPCH/DS ${{ matrix.spark }} - run: | - cd $GITHUB_WORKSPACE/tools/gluten-it - export MAVEN_HOME=/usr/lib/maven - export PATH=${PATH}:${MAVEN_HOME}/bin - mvn clean install -P${{ matrix.spark }} \ + export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 + echo "JAVA_HOME: $JAVA_HOME" + mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 - run-tpc-test-centos8: + run-tpc-test-centos: needs: build-native-lib strategy: fail-fast: false matrix: + os: ["centos:7", "centos:8"] spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"] + java: ["java-8", "java-17"] + # Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772 + exclude: + - spark: spark-3.2 + java: java-17 + - spark: spark-3.4 + java: java-17 + - spark: spark-3.5 + java: java-17 + - os: centos:7 + java: java-17 runs-on: ubuntu-20.04 - container: centos:8 + container: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - name: Download All Artifacts @@ -155,32 +142,39 @@ jobs: name: velox-native-lib-${{github.sha}} path: ./cpp/build/releases - name: Update mirror list + if: matrix.os == 'centos:8' run: | sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true - name: Setup java and maven run: | - yum update -y && yum install -y java-1.8.0-openjdk-devel wget + if [ "${{ matrix.java }}" = "java-17" ]; then + yum update -y && yum install -y java-17-openjdk-devel wget + else + yum update -y && yum install -y java-1.8.0-openjdk-devel wget + fi wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz tar -xvf apache-maven-3.8.8-bin.tar.gz mv apache-maven-3.8.8 /usr/lib/maven - - name: Build for Spark ${{ matrix.spark }} - run: | - cd $GITHUB_WORKSPACE/ - export MAVEN_HOME=/usr/lib/maven - export PATH=${PATH}:${MAVEN_HOME}/bin - mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests - - name: Build and run TPCH/DS ${{ matrix.spark }} + - name: Build and run TPCH/DS run: | - cd $GITHUB_WORKSPACE/tools/gluten-it + cd $GITHUB_WORKSPACE/ export MAVEN_HOME=/usr/lib/maven export PATH=${PATH}:${MAVEN_HOME}/bin - mvn clean install -P${{ matrix.spark }} \ + if [ "${{ matrix.java }}" = "java-17" ]; then + export JAVA_HOME=/usr/lib/jvm/java-17-openjdk + else + export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk + fi + echo "JAVA_HOME: $JAVA_HOME" + mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 - + # run-tpc-test-centos8-oom-randomkill: # needs: build-native-lib # strategy: diff --git a/docs/developers/NewToGluten.md b/docs/developers/NewToGluten.md index 04074d4e6..681f5f37e 100644 --- a/docs/developers/NewToGluten.md +++ b/docs/developers/NewToGluten.md @@ -43,6 +43,18 @@ export PATH="$PATH:$JAVA_HOME/bin" > Must set PATH with double quote in ubuntu. +## Openjdk17 + +By defaults, Gluten compiles package using JDK8. Add maven profile `-Pjava-17` changing to use JDK17, and please make sure your JAVA_HOME points to jdk17. + +Apache Spark and Arrow requires setting java args `-Dio.netty.tryReflectionSetAccessible=true`, see [SPARK-29924](https://issues.apache.org/jira/browse/SPARK-29924) and [ARROW-6206](https://issues.apache.org/jira/browse/ARROW-6206). +So please add following configs in `spark-defaults.conf`: + +``` +spark.driver.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true +spark.executor.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true +``` + ## Maven 3.6.3 or above [Maven Dowload Page](https://maven.apache.org/docs/history.html) diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index e42ac16a2..7c3d77abc 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -5,28 +5,22 @@ nav_order: 1 parent: Getting-Started --- # Supported Version -| Type | Version | -|-------|------------------------------| -| Spark | 3.2.2, 3.3.1 | -| OS | Ubuntu20.04/22.04, Centos7/8 | -| jdk | openjdk8 | -| scala | 2.12 -Spark3.4.0 support is still WIP. TPCH/DS can pass, UT is not yet passed. - -There are pending PRs for jdk11 support. - - -Currently, the mvn script can automatically fetch and build all dependency libraries incluing Velox. Our nightly build still use Velox under oap-project. +| Type | Version | +|-------|---------------------------------| +| Spark | 3.2.2, 3.3.1, 3.4.2, 3.5.1(wip) | +| OS | Ubuntu20.04/22.04, Centos7/8 | +| jdk | openjdk8/jdk17 | +| scala | 2.12 | # Prerequisite -Currently, Gluten+Velox backend is only tested on **Ubuntu20.04/Ubuntu22.04/Centos8**. Other kinds of OS support are still in progress. The long term goal is to support several +Currently, Gluten+Velox backend is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. Other kinds of OS support are still in progress. The long term goal is to support several common OS and conda env deployment. -Gluten builds with Spark3.2.x and Spark3.3.x now but only fully tested in CI with 3.2.2 and 3.3.1. We will add/update supported/tested versions according to the upstream changes. +Gluten only fully tested in CI with 3.2.2, 3.3.1 and 3.4.2. We will add/update supported/tested versions according to the upstream changes. -we need to set up the `JAVA_HOME` env. Currently, **java 8** is required and the support for java 11/17 is not ready. +We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**. **For x86_64** @@ -63,7 +57,7 @@ It's recommended to use buildbundle-veloxbe.sh to build gluten in one script. ```bash cd /path/to/gluten -## The script builds two jars for spark 3.2.2 and 3.3.1. +## The script builds jars for all spark version ./dev/buildbundle-veloxbe.sh ## After a complete build, if you need to re-build the project and only some gluten code is changed, @@ -84,6 +78,8 @@ cd /path/to/gluten **Build Velox separately** +Gluten still uses Velox under oap-project and does daily update with upstream(meta) Velox. + Scripts under `/path/to/gluten/ep/build-velox/src` provide `get_velox.sh` and `build_velox.sh` to build Velox separately, you could use these scripts with custom repo/branch/location. Velox provides arrow/parquet lib. Gluten cpp module need a required VELOX_HOME parsed by --velox_home, if you specify custom ep location, make sure these variables be passed correctly. diff --git a/pom.xml b/pom.xml index 8975ce211..afe86e56a 100644 --- a/pom.xml +++ b/pom.xml @@ -37,6 +37,9 @@ </modules> <properties> + <java.version>1.8</java.version> + <maven.compiler.source>${java.version}</maven.compiler.source> + <maven.compiler.target>${java.version}</maven.compiler.target> <caffeine.version.java8>2.9.3</caffeine.version.java8> <delta.version>2.0.1</delta.version> <delta.binary.version>20</delta.binary.version> @@ -97,13 +100,31 @@ <!-- plugin version--> <build-helper-maven-plugin.version>3.2.0</build-helper-maven-plugin.version> <scala.compiler.version>4.8.0</scala.compiler.version> - <maven.compiler.plugin>3.8.0</maven.compiler.plugin> + <maven.compiler.plugin>3.12.1</maven.compiler.plugin> <maven.jar.plugin>3.2.2</maven.jar.plugin> <scalastyle.version>1.0.0</scalastyle.version> <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version> </properties> <profiles> + <profile> + <id>java-8</id> + <activation> + <jdk>1.8</jdk> + </activation> + <properties> + <java.version>1.8</java.version> + </properties> + </profile> + <profile> + <id>java-17</id> + <activation> + <jdk>17</jdk> + </activation> + <properties> + <java.version>17</java.version> + </properties> + </profile> <profile> <id>spark-3.2</id> <activation> @@ -112,8 +133,8 @@ <properties> <sparkbundle.version>3.2</sparkbundle.version> <sparkshim.artifactId>spark-sql-columnar-shims-spark32</sparkshim.artifactId> - <spark.version>3.2.2</spark.version> - <iceberg.version>1.3.1</iceberg.version> + <spark.version>3.2.2</spark.version> + <iceberg.version>1.3.1</iceberg.version> <delta.version>2.0.1</delta.version> <delta.binary.version>20</delta.binary.version> </properties> @@ -123,10 +144,10 @@ <properties> <sparkbundle.version>3.3</sparkbundle.version> <sparkshim.artifactId>spark-sql-columnar-shims-spark33</sparkshim.artifactId> - <spark.version>3.3.1</spark.version> - <!-- keep using iceberg v1.3.1 for parquet compatibilty. --> - <iceberg.version>1.3.1</iceberg.version> - <delta.version>2.2.0</delta.version> + <spark.version>3.3.1</spark.version> + <!-- keep using iceberg v1.3.1 for parquet compatibilty. --> + <iceberg.version>1.3.1</iceberg.version> + <delta.version>2.2.0</delta.version> <delta.binary.version>22</delta.binary.version> </properties> </profile> @@ -135,9 +156,9 @@ <properties> <sparkbundle.version>3.4</sparkbundle.version> <sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId> - <spark.version>3.4.2</spark.version> - <iceberg.version>1.4.3</iceberg.version> - <delta.version>2.4.0</delta.version> + <spark.version>3.4.2</spark.version> + <iceberg.version>1.4.3</iceberg.version> + <delta.version>2.4.0</delta.version> <delta.binary.version>24</delta.binary.version> </properties> </profile> @@ -146,8 +167,8 @@ <properties> <sparkbundle.version>3.5</sparkbundle.version> <sparkshim.artifactId>spark-sql-columnar-shims-spark35</sparkshim.artifactId> - <spark.version>3.5.1</spark.version> - <iceberg.version>1.4.3</iceberg.version> + <spark.version>3.5.1</spark.version> + <iceberg.version>1.4.3</iceberg.version> <delta.version>2.4.0</delta.version> <delta.binary.version>24</delta.binary.version> <hadoop.version>3.3.4</hadoop.version> @@ -542,8 +563,6 @@ <artifactId>maven-compiler-plugin</artifactId> <version>${maven.compiler.plugin}</version> <configuration> - <source>1.8</source> - <target>1.8</target> <encoding>UTF-8</encoding> <maxmem>1024m</maxmem> <fork>true</fork> diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index 7823cd32f..74e1da2f4 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -14,8 +14,9 @@ </modules> <properties> - <maven.compiler.source>8</maven.compiler.source> - <maven.compiler.target>8</maven.compiler.target> + <java.version>1.8</java.version> + <maven.compiler.source>${java.version}</maven.compiler.source> + <maven.compiler.target>${java.version}</maven.compiler.target> <scala.library.version>2.12.15</scala.library.version> <spark.version>3.4.2</spark.version> <scala.binary.version>2.12</scala.binary.version> @@ -89,6 +90,24 @@ </dependencyManagement> <profiles> + <profile> + <id>java-8</id> + <activation> + <jdk>1.8</jdk> + </activation> + <properties> + <java.version>1.8</java.version> + </properties> + </profile> + <profile> + <id>java-17</id> + <activation> + <jdk>17</jdk> + </activation> + <properties> + <java.version>17</java.version> + </properties> + </profile> <profile> <id>spark-3.2</id> <activation> diff --git a/tools/gluten-it/sbin/gluten-it.sh b/tools/gluten-it/sbin/gluten-it.sh index e5f27ed6b..98a240878 100755 --- a/tools/gluten-it/sbin/gluten-it.sh +++ b/tools/gluten-it/sbin/gluten-it.sh @@ -28,4 +28,23 @@ fi JAR_PATH=$LIB_DIR/* -java $GLUTEN_IT_JVM_ARGS -cp $JAR_PATH io.glutenproject.integration.tpc.Tpc $@ +$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \ + -XX:+IgnoreUnrecognizedVMOptions \ + --add-opens=java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ + --add-opens=java.base/java.io=ALL-UNNAMED \ + --add-opens=java.base/java.net=ALL-UNNAMED \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ + --add-opens=java.base/sun.security.action=ALL-UNNAMED \ + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ + -Djdk.reflect.useDirectMethodHandle=false \ + -Dio.netty.tryReflectionSetAccessible=true \ + -cp $JAR_PATH \ + io.glutenproject.integration.tpc.Tpc $@ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org