This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 7942701c3 [CORE] Support JDK17 (#5120)
7942701c3 is described below

commit 7942701c3b67c72230f34286f837c3a6f13fd002
Author: Xiduo You <ulyssesyo...@gmail.com>
AuthorDate: Wed Mar 27 11:10:11 2024 +0800

    [CORE] Support JDK17 (#5120)
    
    * Support JDK17
    
    * address comment
    
    ---------
    
    Co-authored-by: Kent Yao <y...@apache.org>
---
 .github/workflows/velox_docker.yml | 114 ++++++++++++++++++-------------------
 docs/developers/NewToGluten.md     |  12 ++++
 docs/get-started/Velox.md          |  28 ++++-----
 pom.xml                            |  47 ++++++++++-----
 tools/gluten-it/pom.xml            |  23 +++++++-
 tools/gluten-it/sbin/gluten-it.sh  |  21 ++++++-
 6 files changed, 152 insertions(+), 93 deletions(-)

diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_docker.yml
index f2b73e81d..6329750d2 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -73,6 +73,17 @@ jobs:
       matrix:
         os: ["ubuntu:20.04", "ubuntu:22.04"]
         spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
+        java: [ "java-8", "java-17" ]
+        # Spark supports JDK17 since 3.3 and later, see 
https://issues.apache.org/jira/browse/SPARK-33772
+        exclude:
+          - spark: spark-3.2
+            java: java-17
+          - spark: spark-3.4
+            java: java-17
+          - spark: spark-3.5
+            java: java-17
+          - os: ubuntu:22.04
+            java: java-17
     runs-on: ubuntu-20.04
     container: ${{ matrix.os }}
     steps:
@@ -84,69 +95,45 @@ jobs:
           path: ./cpp/build/releases
       - name: Setup java and maven
         run: |
-          apt-get update && \
-          apt-get install -y openjdk-8-jdk maven && \
+          if [ "${{ matrix.java }}" = "java-17" ]; then
+            apt-get update && apt-get install -y openjdk-17-jdk maven
+          else
+            apt-get update && apt-get install -y openjdk-8-jdk maven
+          fi
           apt remove openjdk-11* -y
-      - name: Build for Spark ${{ matrix.spark }}
-        run: |
-          cd $GITHUB_WORKSPACE/ && \
-          mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
-      - name: Build and run TPCH/DS ${{ matrix.spark }}
-        run: |
-          cd $GITHUB_WORKSPACE/tools/gluten-it && \
-          mvn clean install -P${{ matrix.spark }} \
-          && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
-            --local --preset=velox --benchmark-type=h --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
-          && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
-            --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
-
-
-  run-tpc-test-centos7:
-    needs: build-native-lib
-    strategy:
-      fail-fast: false
-      matrix:
-        spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
-    runs-on: ubuntu-20.04
-    container: centos:7
-    steps:
-      - uses: actions/checkout@v2
-      - name: Download All Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-native-lib-${{github.sha}}
-          path: ./cpp/build/releases
-      - name: Setup java and maven
-        run: |
-          yum update -y && yum install -y java-1.8.0-openjdk-devel wget
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-      - name: Build for Spark ${{ matrix.spark }}
+      - name: Build and run TPCH/DS
         run: |
           cd $GITHUB_WORKSPACE/
-          export MAVEN_HOME=/usr/lib/maven
-          export PATH=${PATH}:${MAVEN_HOME}/bin
-          mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
-      - name: Build and run TPCH/DS ${{ matrix.spark }}
-        run: |
-          cd $GITHUB_WORKSPACE/tools/gluten-it 
-          export MAVEN_HOME=/usr/lib/maven
-          export PATH=${PATH}:${MAVEN_HOME}/bin
-          mvn clean install -P${{ matrix.spark }} \
+          export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
+          echo "JAVA_HOME: $JAVA_HOME"
+          mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} 
-Pbackends-velox -DskipTests
+          cd $GITHUB_WORKSPACE/tools/gluten-it
+          mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=h --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
 
-  run-tpc-test-centos8:
+  run-tpc-test-centos:
     needs: build-native-lib
     strategy:
       fail-fast: false
       matrix:
+        os: ["centos:7", "centos:8"]
         spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
+        java: ["java-8", "java-17"]
+        # Spark supports JDK17 since 3.3 and later, see 
https://issues.apache.org/jira/browse/SPARK-33772
+        exclude:
+          - spark: spark-3.2
+            java: java-17
+          - spark: spark-3.4
+            java: java-17
+          - spark: spark-3.5
+            java: java-17
+          - os: centos:7
+            java: java-17
     runs-on: ubuntu-20.04
-    container: centos:8
+    container: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v2
       - name: Download All Artifacts
@@ -155,32 +142,39 @@ jobs:
           name: velox-native-lib-${{github.sha}}
           path: ./cpp/build/releases
       - name: Update mirror list
+        if: matrix.os == 'centos:8'
         run: |
           sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* 
|| true
           sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-* || true
       - name: Setup java and maven
         run: |
-          yum update -y && yum install -y java-1.8.0-openjdk-devel wget
+          if [ "${{ matrix.java }}" = "java-17" ]; then
+            yum update -y && yum install -y java-17-openjdk-devel wget
+          else
+            yum update -y && yum install -y java-1.8.0-openjdk-devel wget
+          fi
           wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
-      - name: Build for Spark ${{ matrix.spark }}
-        run: |
-          cd $GITHUB_WORKSPACE/ 
-          export MAVEN_HOME=/usr/lib/maven
-          export PATH=${PATH}:${MAVEN_HOME}/bin
-          mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
-      - name: Build and run TPCH/DS ${{ matrix.spark }}
+      - name: Build and run TPCH/DS
         run: |
-          cd $GITHUB_WORKSPACE/tools/gluten-it
+          cd $GITHUB_WORKSPACE/
           export MAVEN_HOME=/usr/lib/maven
           export PATH=${PATH}:${MAVEN_HOME}/bin
-          mvn clean install -P${{ matrix.spark }} \
+          if [ "${{ matrix.java }}" = "java-17" ]; then
+            export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          else
+            export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
+          fi
+          echo "JAVA_HOME: $JAVA_HOME"
+          mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} 
-Pbackends-velox -DskipTests
+          cd $GITHUB_WORKSPACE/tools/gluten-it 
+          mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=h --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
-     
+
   # run-tpc-test-centos8-oom-randomkill:
   #   needs: build-native-lib
   #   strategy:
diff --git a/docs/developers/NewToGluten.md b/docs/developers/NewToGluten.md
index 04074d4e6..681f5f37e 100644
--- a/docs/developers/NewToGluten.md
+++ b/docs/developers/NewToGluten.md
@@ -43,6 +43,18 @@ export PATH="$PATH:$JAVA_HOME/bin"
 
 > Must set PATH with double quote in ubuntu.
 
+## Openjdk17
+
+By defaults, Gluten compiles package using JDK8. Add maven profile `-Pjava-17` 
changing to use JDK17, and please make sure your JAVA_HOME points to jdk17.
+
+Apache Spark and Arrow requires setting java args 
`-Dio.netty.tryReflectionSetAccessible=true`, see 
[SPARK-29924](https://issues.apache.org/jira/browse/SPARK-29924) and 
[ARROW-6206](https://issues.apache.org/jira/browse/ARROW-6206).
+So please add following configs in `spark-defaults.conf`:
+
+```
+spark.driver.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true
+spark.executor.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true
+```
+
 ## Maven 3.6.3 or above
 
 [Maven Dowload Page](https://maven.apache.org/docs/history.html)
diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md
index e42ac16a2..7c3d77abc 100644
--- a/docs/get-started/Velox.md
+++ b/docs/get-started/Velox.md
@@ -5,28 +5,22 @@ nav_order: 1
 parent: Getting-Started
 ---
 # Supported Version
-| Type  | Version                      |
-|-------|------------------------------|
-| Spark | 3.2.2, 3.3.1                 |
-| OS    | Ubuntu20.04/22.04, Centos7/8 |
-| jdk   | openjdk8                     |
-| scala | 2.12
 
-Spark3.4.0 support is still WIP. TPCH/DS can pass, UT is not yet passed.
-
-There are pending PRs for jdk11 support.
-
-
-Currently, the mvn script can automatically fetch and build all dependency 
libraries incluing Velox. Our nightly build still use Velox under oap-project. 
+| Type  | Version                         |
+|-------|---------------------------------|
+| Spark | 3.2.2, 3.3.1, 3.4.2, 3.5.1(wip) |
+| OS    | Ubuntu20.04/22.04, Centos7/8    |
+| jdk   | openjdk8/jdk17                  |
+| scala | 2.12                            |
 
 # Prerequisite
 
-Currently, Gluten+Velox backend is only tested on 
**Ubuntu20.04/Ubuntu22.04/Centos8**. Other kinds of OS support are still in 
progress. The long term goal is to support several
+Currently, Gluten+Velox backend is only tested on 
**Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. Other kinds of OS support are 
still in progress. The long term goal is to support several
 common OS and conda env deployment.
 
-Gluten builds with Spark3.2.x and Spark3.3.x now but only fully tested in CI 
with 3.2.2 and 3.3.1. We will add/update supported/tested versions according to 
the upstream changes. 
+Gluten only fully tested in CI with 3.2.2, 3.3.1 and 3.4.2. We will add/update 
supported/tested versions according to the upstream changes. 
 
-we need to set up the `JAVA_HOME` env. Currently, **java 8** is required and 
the support for java 11/17 is not ready.
+We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** 
and **java 17**.
 
 **For x86_64**
 
@@ -63,7 +57,7 @@ It's recommended to use buildbundle-veloxbe.sh to build 
gluten in one script.
 ```bash
 cd /path/to/gluten
 
-## The script builds two jars for spark 3.2.2 and 3.3.1.
+## The script builds jars for all spark version
 ./dev/buildbundle-veloxbe.sh
 
 ## After a complete build, if you need to re-build the project and only some 
gluten code is changed,
@@ -84,6 +78,8 @@ cd /path/to/gluten
 
 **Build Velox separately**
 
+Gluten still uses Velox under oap-project and does daily update with 
upstream(meta) Velox.
+
 Scripts under `/path/to/gluten/ep/build-velox/src` provide `get_velox.sh` and 
`build_velox.sh` to build Velox separately, you could use these scripts with 
custom repo/branch/location.
 
 Velox provides arrow/parquet lib. Gluten cpp module need a required VELOX_HOME 
parsed by --velox_home, if you specify custom ep location, make sure these 
variables be passed correctly.
diff --git a/pom.xml b/pom.xml
index 8975ce211..afe86e56a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -37,6 +37,9 @@
   </modules>
 
   <properties>
+    <java.version>1.8</java.version>
+    <maven.compiler.source>${java.version}</maven.compiler.source>
+    <maven.compiler.target>${java.version}</maven.compiler.target>
     <caffeine.version.java8>2.9.3</caffeine.version.java8>
     <delta.version>2.0.1</delta.version>
     <delta.binary.version>20</delta.binary.version>
@@ -97,13 +100,31 @@
     <!-- plugin version-->
     
<build-helper-maven-plugin.version>3.2.0</build-helper-maven-plugin.version>
     <scala.compiler.version>4.8.0</scala.compiler.version>
-    <maven.compiler.plugin>3.8.0</maven.compiler.plugin>
+    <maven.compiler.plugin>3.12.1</maven.compiler.plugin>
     <maven.jar.plugin>3.2.2</maven.jar.plugin>
     <scalastyle.version>1.0.0</scalastyle.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
   </properties>
 
   <profiles>
+    <profile>
+      <id>java-8</id>
+      <activation>
+        <jdk>1.8</jdk>
+      </activation>
+      <properties>
+        <java.version>1.8</java.version>
+      </properties>
+    </profile>
+    <profile>
+      <id>java-17</id>
+      <activation>
+        <jdk>17</jdk>
+      </activation>
+      <properties>
+        <java.version>17</java.version>
+      </properties>
+    </profile>
     <profile>
       <id>spark-3.2</id>
       <activation>
@@ -112,8 +133,8 @@
       <properties>
         <sparkbundle.version>3.2</sparkbundle.version>
         
<sparkshim.artifactId>spark-sql-columnar-shims-spark32</sparkshim.artifactId>
-       <spark.version>3.2.2</spark.version>
-       <iceberg.version>1.3.1</iceberg.version>
+        <spark.version>3.2.2</spark.version>
+        <iceberg.version>1.3.1</iceberg.version>
         <delta.version>2.0.1</delta.version>
         <delta.binary.version>20</delta.binary.version>
       </properties>
@@ -123,10 +144,10 @@
       <properties>
         <sparkbundle.version>3.3</sparkbundle.version>
         
<sparkshim.artifactId>spark-sql-columnar-shims-spark33</sparkshim.artifactId>
-       <spark.version>3.3.1</spark.version>
-       <!-- keep using iceberg v1.3.1 for parquet compatibilty. -->
-       <iceberg.version>1.3.1</iceberg.version>
-       <delta.version>2.2.0</delta.version>
+        <spark.version>3.3.1</spark.version>
+        <!-- keep using iceberg v1.3.1 for parquet compatibilty. -->
+        <iceberg.version>1.3.1</iceberg.version>
+        <delta.version>2.2.0</delta.version>
         <delta.binary.version>22</delta.binary.version>
       </properties>
     </profile>
@@ -135,9 +156,9 @@
       <properties>
         <sparkbundle.version>3.4</sparkbundle.version>
         
<sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId>
-       <spark.version>3.4.2</spark.version>
-       <iceberg.version>1.4.3</iceberg.version>
-       <delta.version>2.4.0</delta.version>
+        <spark.version>3.4.2</spark.version>
+        <iceberg.version>1.4.3</iceberg.version>
+        <delta.version>2.4.0</delta.version>
         <delta.binary.version>24</delta.binary.version>
       </properties>
     </profile>
@@ -146,8 +167,8 @@
       <properties>
         <sparkbundle.version>3.5</sparkbundle.version>
         
<sparkshim.artifactId>spark-sql-columnar-shims-spark35</sparkshim.artifactId>
-       <spark.version>3.5.1</spark.version>
-       <iceberg.version>1.4.3</iceberg.version>
+        <spark.version>3.5.1</spark.version>
+        <iceberg.version>1.4.3</iceberg.version>
         <delta.version>2.4.0</delta.version>
         <delta.binary.version>24</delta.binary.version>
         <hadoop.version>3.3.4</hadoop.version>
@@ -542,8 +563,6 @@
           <artifactId>maven-compiler-plugin</artifactId>
           <version>${maven.compiler.plugin}</version>
           <configuration>
-            <source>1.8</source>
-            <target>1.8</target>
             <encoding>UTF-8</encoding>
             <maxmem>1024m</maxmem>
             <fork>true</fork>
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 7823cd32f..74e1da2f4 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -14,8 +14,9 @@
   </modules>
 
   <properties>
-    <maven.compiler.source>8</maven.compiler.source>
-    <maven.compiler.target>8</maven.compiler.target>
+    <java.version>1.8</java.version>
+    <maven.compiler.source>${java.version}</maven.compiler.source>
+    <maven.compiler.target>${java.version}</maven.compiler.target>
     <scala.library.version>2.12.15</scala.library.version>
     <spark.version>3.4.2</spark.version>
     <scala.binary.version>2.12</scala.binary.version>
@@ -89,6 +90,24 @@
   </dependencyManagement>
 
   <profiles>
+    <profile>
+      <id>java-8</id>
+      <activation>
+        <jdk>1.8</jdk>
+      </activation>
+      <properties>
+        <java.version>1.8</java.version>
+      </properties>
+    </profile>
+    <profile>
+      <id>java-17</id>
+      <activation>
+        <jdk>17</jdk>
+      </activation>
+      <properties>
+        <java.version>17</java.version>
+      </properties>
+    </profile>
     <profile>
       <id>spark-3.2</id>
       <activation>
diff --git a/tools/gluten-it/sbin/gluten-it.sh 
b/tools/gluten-it/sbin/gluten-it.sh
index e5f27ed6b..98a240878 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -28,4 +28,23 @@ fi
 
 JAR_PATH=$LIB_DIR/*
 
-java $GLUTEN_IT_JVM_ARGS -cp $JAR_PATH io.glutenproject.integration.tpc.Tpc $@
+$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
+    -XX:+IgnoreUnrecognizedVMOptions \
+    --add-opens=java.base/java.lang=ALL-UNNAMED \
+    --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
+    --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
+    --add-opens=java.base/java.io=ALL-UNNAMED \
+    --add-opens=java.base/java.net=ALL-UNNAMED \
+    --add-opens=java.base/java.nio=ALL-UNNAMED \
+    --add-opens=java.base/java.util=ALL-UNNAMED \
+    --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
+    --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
+    --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
+    --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
+    --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
+    --add-opens=java.base/sun.security.action=ALL-UNNAMED \
+    --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
+    -Djdk.reflect.useDirectMethodHandle=false \
+    -Dio.netty.tryReflectionSetAccessible=true \
+    -cp $JAR_PATH \
+    io.glutenproject.integration.tpc.Tpc $@


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

Reply via email to