This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new b2e749d187 [#8431] improvement(bundles): Remove unnecessary relocate 
in fileset bundle jars. (#8432)
b2e749d187 is described below

commit b2e749d1875f65e6269db833da44e80f25bf695c
Author: Mini Yu <[email protected]>
AuthorDate: Tue Sep 9 10:08:49 2025 +0800

    [#8431] improvement(bundles): Remove unnecessary relocate in fileset bundle 
jars. (#8432)
    
    ### What changes were proposed in this pull request?
    
    Remove unnecessary relocate in gcp/adls bundle jars.
    
    ### Why are the changes needed?
    
    To avoid potential problem.
    
    Fix: #8431
    Fix: #8391
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A.
    
    ### How was this patch tested?
    
    Existing tests.
    
    ---------
    
    Co-authored-by: mchades <[email protected]>
    Co-authored-by: yuhui <[email protected]>
---
 bundles/aliyun/build.gradle.kts       |  2 --
 bundles/azure-bundle/build.gradle.kts |  1 -
 bundles/azure/build.gradle.kts        |  3 ---
 bundles/gcp-bundle/build.gradle.kts   |  1 -
 bundles/gcp/build.gradle.kts          |  5 -----
 docs/fileset-catalog-with-adls.md     | 12 +++++-------
 docs/fileset-catalog-with-gcs.md      |  8 +++-----
 docs/fileset-catalog-with-oss.md      | 16 ++++++++++++----
 docs/fileset-catalog-with-s3.md       |  9 +++++----
 9 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/bundles/aliyun/build.gradle.kts b/bundles/aliyun/build.gradle.kts
index c84dfc8184..76e73c8669 100644
--- a/bundles/aliyun/build.gradle.kts
+++ b/bundles/aliyun/build.gradle.kts
@@ -83,7 +83,6 @@ tasks.withType(ShadowJar::class.java) {
   relocate("com.google", 
"org.apache.gravitino.aliyun.shaded.com.google.common")
   relocate("com.sun.activation", 
"org.apache.gravitino.aliyun.shaded.com.sun.activation")
   relocate("com.sun.istack", 
"org.apache.gravitino.aliyun.shaded.com.sun.istack")
-  relocate("com.sun.jersey", 
"org.apache.gravitino.aliyun.shaded.com.sun.jersey")
   relocate("com.sun.xml", "org.apache.gravitino.aliyun.shaded.com.sun.xml")
   relocate("okhttp3", "org.apache.gravitino.aliyun.shaded.okhttp3")
   relocate("okio", "org.apache.gravitino.aliyun.shaded.okio")
@@ -91,7 +90,6 @@ tasks.withType(ShadowJar::class.java) {
   relocate("org.apache.http", 
"org.apache.gravitino.aliyun.shaded.org.apache.http")
   relocate("org.checkerframework", 
"org.apache.gravitino.aliyun.shaded.org.checkerframework")
   relocate("org.jacoco.agent.rt", 
"org.apache.gravitino.aliyun.shaded.org.jacoco.agent.rt")
-  relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom")
 
   mergeServiceFiles()
 }
diff --git a/bundles/azure-bundle/build.gradle.kts 
b/bundles/azure-bundle/build.gradle.kts
index 6ff704ea28..df6be64a83 100644
--- a/bundles/azure-bundle/build.gradle.kts
+++ b/bundles/azure-bundle/build.gradle.kts
@@ -52,7 +52,6 @@ tasks.withType(ShadowJar::class.java) {
   relocate("com.microsoft.aad", 
"org.apache.gravitino.azure.shaded.com.microsoft.aad")
   relocate("com.nimbusds", "org.apache.gravitino.azure.shaded.com.nimbusds")
   relocate("com.sun.jna", "org.apache.gravitino.azure.shaded.com.sun.jna")
-  relocate("com.sun.xml", "org.apache.gravitino.azure.shaded.com.sun.xml")
   relocate("io.netty", "org.apache.gravitino.azure.shaded.io.netty")
   relocate("net.minidev", "org.apache.gravitino.azure.shaded.net.minidev")
   relocate("net.jcip.annotations", 
"org.apache.gravitino.azure.shaded.net.jcip.annotations")
diff --git a/bundles/azure/build.gradle.kts b/bundles/azure/build.gradle.kts
index a11df2ed00..aeb3b7406d 100644
--- a/bundles/azure/build.gradle.kts
+++ b/bundles/azure/build.gradle.kts
@@ -71,15 +71,12 @@ tasks.withType(ShadowJar::class.java) {
   relocate("com.microsoft.aad", 
"org.apache.gravitino.azure.shaded.com.microsoft.aad")
   relocate("com.nimbusds", "org.apache.gravitino.azure.shaded.com.nimbusds")
   relocate("com.sun.jna", "org.apache.gravitino.azure.shaded.com.sun.jna")
-  relocate("com.sun.xml", "org.apache.gravitino.azure.shaded.com.sun.xml")
   relocate("io.netty", "org.apache.gravitino.azure.shaded.io.netty")
   relocate("net.minidev", "org.apache.gravitino.azure.shaded.net.minidev")
   relocate("net.jcip.annotations", 
"org.apache.gravitino.azure.shaded.net.jcip.annotations")
   relocate("org.apache.commons", 
"org.apache.gravitino.azure.shaded.org.apache.commons")
-  relocate("org.apache.httpcomponents", 
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
   relocate("org.checkerframework", 
"org.apache.gravitino.azure.shaded.org.checkerframework")
   relocate("org.codehaus.stax2", 
"org.apache.gravitino.azure.shaded.org.codehaus.stax2")
-  relocate("org.eclipse.jetty", 
"org.apache.gravitino.azure.shaded.org.eclipse.jetty")
   relocate("org.objectweb.asm", 
"org.apache.gravitino.azure.shaded.org.objectweb.asm")
   relocate("org.reactivestreams", 
"org.apache.gravitino.azure.shaded.org.reactivestreams")
   relocate("reactor", "org.apache.gravitino.azure.shaded.reactor")
diff --git a/bundles/gcp-bundle/build.gradle.kts 
b/bundles/gcp-bundle/build.gradle.kts
index df3d976b8f..e8df40e4eb 100644
--- a/bundles/gcp-bundle/build.gradle.kts
+++ b/bundles/gcp-bundle/build.gradle.kts
@@ -50,7 +50,6 @@ tasks.withType(ShadowJar::class.java) {
   relocate("com.google.errorprone", 
"org.apache.gravitino.gcp.shaded.com.google.errorprone")
   relocate("com.google.gson", 
"org.apache.gravitino.gcp.shaded.com.google.gson")
   relocate("com.google.j2objc", 
"org.apache.gravitino.gcp.shaded.com.google.j2objc")
-  relocate("com.google.longrunning", 
"org.apache.gravitino.gcp.shaded.com.google.longrunning")
   relocate("com.google.protobuf", 
"org.apache.gravitino.gcp.shaded.com.google.protobuf")
   relocate("com.google.thirdparty", 
"org.apache.gravitino.gcp.shaded.com.google.thirdparty")
   relocate("io.grpc", "org.apache.gravitino.gcp.shaded.io.grpc")
diff --git a/bundles/gcp/build.gradle.kts b/bundles/gcp/build.gradle.kts
index 4be8d0a154..90234f7538 100644
--- a/bundles/gcp/build.gradle.kts
+++ b/bundles/gcp/build.gradle.kts
@@ -61,17 +61,13 @@ tasks.withType(ShadowJar::class.java) {
   }
 
   // Relocate dependencies to avoid conflicts
-  relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml")
   relocate("com.google.api", "org.apache.gravitino.gcp.shaded.com.google.api")
   relocate("com.google.auth", 
"org.apache.gravitino.gcp.shaded.com.google.auth")
   relocate("com.google.auto", 
"org.apache.gravitino.gcp.shaded.com.google.auto")
   relocate("com.google.common", 
"org.apache.gravitino.gcp.shaded.com.google.common")
   relocate("com.google.errorprone", 
"org.apache.gravitino.gcp.shaded.com.google.errorprone")
   relocate("com.google.gson", 
"org.apache.gravitino.gcp.shaded.com.google.gson")
-  relocate("com.google.iam", "org.apache.gravitino.gcp.shaded.com.google.iam")
   relocate("com.google.j2objc", 
"org.apache.gravitino.gcp.shaded.com.google.j2objc")
-  relocate("com.google.longrunning", 
"org.apache.gravitino.gcp.shaded.com.google.longrunning")
-  relocate("com.google.protobuf", 
"org.apache.gravitino.gcp.shaded.com.google.protobuf")
   relocate("com.google.thirdparty", 
"org.apache.gravitino.gcp.shaded.com.google.thirdparty")
   relocate("io.grpc", "org.apache.gravitino.gcp.shaded.io.grpc")
   relocate("io.opencensus", "org.apache.gravitino.gcp.shaded.io.opencensus")
@@ -79,7 +75,6 @@ tasks.withType(ShadowJar::class.java) {
   relocate("org.apache.http", 
"org.apache.gravitino.gcp.shaded.org.apache.http")
   relocate("org.apache.httpcomponents", 
"org.apache.gravitino.gcp.shaded.org.apache.httpcomponents")
   relocate("org.checkerframework", 
"org.apache.gravitino.gcp.shaded.org.checkerframework")
-  relocate("org.eclipse.jetty", 
"org.apache.gravitino.gcp.shaded.org.eclipse.jetty")
 
   mergeServiceFiles()
 }
diff --git a/docs/fileset-catalog-with-adls.md 
b/docs/fileset-catalog-with-adls.md
index 276fcbce39..089d2454b0 100644
--- a/docs/fileset-catalog-with-adls.md
+++ b/docs/fileset-catalog-with-adls.md
@@ -306,12 +306,12 @@ Or use the bundle jar with Hadoop environment if there is 
no Hadoop environment:
 
 ### Using Spark to access the fileset
 
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop 
environment(Hadoop 3.2.0)** and JDK8 to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop 
environment(Hadoop 3.3.4)** to access the fileset:
 
 Before running the following code, you need to install required packages:
 
 ```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
 pip install apache-gravitino==${GRAVITINO_VERSION}
 ```
 Then you can run the following code:
@@ -326,10 +326,8 @@ metalake_name = "test"
 catalog_name = "your_adls_catalog"
 schema_name = "your_adls_schema"
 fileset_name = "your_adls_fileset"
-# JDK8
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.2.0.jar,/path/to/azure-storage-7.0.0.jar,/path/to/wildfly-openssl-1.0.4.Final.jar
 --master local[1] pyspark-shell"
-# JDK17
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.2.0.jar,/path/to/azure-storage-7.0.0.jar,/path/to/wildfly-openssl-1.0.4.Final.jar
 --conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
 --master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
 to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.3.4.jar,/path/to/azure-storage-7.0.1.jar,/path/to/wildfly-openssl-1.0.7.Final.jar
 --master local[1] pyspark-shell"
 spark = SparkSession.builder
     .appName("adls_fileset_test")
     .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -378,7 +376,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = (
 
 - 
[`gravitino-azure-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-azure-bundle)
 is the Gravitino ADLS jar with Hadoop environment(3.3.1), `hadoop-azure.jar` 
and all packages needed to access ADLS.
 - 
[`gravitino-azure-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-azure)
 is a condensed version of the Gravitino ADLS bundle jar without Hadoop 
environment and `hadoop-azure.jar`.
-- `hadoop-azure-3.2.0.jar` and `azure-storage-7.0.0.jar` can be found in the 
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
+- `hadoop-azure-3.3.4.jar` and `azure-storage-7.0.1.jar` can be found in the 
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
 
 Please choose the correct jar according to your environment.
 
diff --git a/docs/fileset-catalog-with-gcs.md b/docs/fileset-catalog-with-gcs.md
index 52c085e302..57f643a272 100644
--- a/docs/fileset-catalog-with-gcs.md
+++ b/docs/fileset-catalog-with-gcs.md
@@ -297,12 +297,12 @@ Or use the bundle jar with Hadoop environment if there is 
no Hadoop environment:
 
 ### Using Spark to access the fileset
 
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop 
environment(Hadoop 3.2.0)** and JDK8 to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop 
environment(Hadoop 3.3.4)** to access the fileset:
 
 Before running the following code, you need to install required packages:
 
 ```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
 pip install apache-gravitino==${GRAVITINO_VERSION}
 ```
 Then you can run the following code:
@@ -318,10 +318,8 @@ catalog_name = "your_gcs_catalog"
 schema_name = "your_gcs_schema"
 fileset_name = "your_gcs_fileset"
 
-# JDK8
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
 to the submit args.
 os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-gcp-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/gcs-connector-hadoop3-2.2.22-shaded.jar
 --master local[1] pyspark-shell"
-# JDK17
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-gcp-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/gcs-connector-hadoop3-2.2.22-shaded.jar
 --conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
 --master local[1] pyspark-shell"
 spark = SparkSession.builder
     .appName("gcs_fielset_test")
     .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.Gvfs")
diff --git a/docs/fileset-catalog-with-oss.md b/docs/fileset-catalog-with-oss.md
index 5666898cd7..43dcbef922 100644
--- a/docs/fileset-catalog-with-oss.md
+++ b/docs/fileset-catalog-with-oss.md
@@ -312,12 +312,12 @@ Or use the bundle jar with Hadoop environment if there is 
no Hadoop environment:
 
 ### Using Spark to access the fileset
 
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop 
environment(Hadoop 3.2.0)** to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop 
environment(Hadoop 3.3.4)** to access the fileset:
 
 Before running the following code, you need to install required packages:
 
 ```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
 pip install apache-gravitino==${GRAVITINO_VERSION}
 ```
 Then you can run the following code:
@@ -333,7 +333,15 @@ catalog_name = "your_oss_catalog"
 schema_name = "your_oss_schema"
 fileset_name = "your_oss_fileset"
 
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-aliyun-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/aliyun-sdk-oss-2.8.3.jar,/path/to/hadoop-aliyun-3.2.0.jar,/path/to/jdom-1.1.jar
 --master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
 to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = (
+    "--jars /path/to/gravitino-aliyun-{gravitino-version}.jar,"
+    "/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,"
+    "/path/to/aliyun-sdk-oss-3.13.0.jar,"
+    "/path/to/hadoop-aliyun-3.3.4.jar,"
+    "/path/to/jdom2-2.0.6 "
+    "--master local[1] pyspark-shell"
+)
 spark = SparkSession.builder
     .appName("oss_fileset_test")
     .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -368,7 +376,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-aliyun-bundle-{gr
 
 - 
[`gravitino-aliyun-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aliyun-bundle)
 is the Gravitino Aliyun jar with Hadoop environment(3.3.1) and `hadoop-oss` 
jar.
 - 
[`gravitino-aliyun-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aliyun)
 is a condensed version of the Gravitino Aliyun bundle jar without Hadoop 
environment and `hadoop-aliyun` jar.
--`hadoop-aliyun-3.2.0.jar` and `aliyun-sdk-oss-2.8.3.jar` can be found in the 
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
+-`hadoop-aliyun-3.3.4.jar`, `jdom2-2.0.6.jar`, and `aliyun-sdk-oss-3.13.0.jar` 
can be found in the Hadoop distribution in the 
`${HADOOP_HOME}/share/hadoop/tools/lib` directory.
 
 Please choose the correct jar according to your environment.
 
diff --git a/docs/fileset-catalog-with-s3.md b/docs/fileset-catalog-with-s3.md
index 59931ba2d8..774fe9ab54 100644
--- a/docs/fileset-catalog-with-s3.md
+++ b/docs/fileset-catalog-with-s3.md
@@ -315,12 +315,12 @@ Or use the bundle jar with Hadoop environment if there is 
no Hadoop environment:
 
 ### Using Spark to access the fileset
 
-The following Python code demonstrates how to use **PySpark 3.1.3 with Hadoop 
environment(Hadoop 3.2.0)** to access the fileset:
+The following Python code demonstrates how to use **PySpark 3.5.0 with Hadoop 
environment(Hadoop 3.3.4)** to access the fileset:
 
 Before running the following code, you need to install required packages:
 
 ```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
 pip install apache-gravitino==${GRAVITINO_VERSION}
 ```
 Then you can run the following code:
@@ -336,7 +336,8 @@ catalog_name = "your_s3_catalog"
 schema_name = "your_s3_schema"
 fileset_name = "your_s3_fileset"
 
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-aws-${gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-${gravitino-version}-SNAPSHOT.jar,/path/to/hadoop-aws-3.2.0.jar,/path/to/aws-java-sdk-bundle-1.11.375.jar
 --master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf 
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" 
--conf 
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
 to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-aws-${gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-${gravitino-version}-SNAPSHOT.jar,/path/to/hadoop-aws-3.3.4.jar,/path/to/aws-java-sdk-bundle-1.12.262.jar
 --master local[1] pyspark-shell"
 spark = SparkSession.builder
     .appName("s3_fileset_test")
     .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -370,7 +371,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars 
/path/to/gravitino-aws-bundle-${grav
 
 - 
[`gravitino-aws-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aws-bundle)
 is the Gravitino AWS jar with Hadoop environment(3.3.1) and `hadoop-aws` jar.
 - 
[`gravitino-aws-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aws)
 is a condensed version of the Gravitino AWS bundle jar without Hadoop 
environment and `hadoop-aws` jar.
-- `hadoop-aws-3.2.0.jar` and `aws-java-sdk-bundle-1.11.375.jar` can be found 
in the Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` 
directory. 
+- `hadoop-aws-3.3.4.jar` and `aws-java-sdk-bundle-1.12.262.jar` can be found 
in the Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` 
directory.
 
 Please choose the correct jar according to your environment.
 

Reply via email to