This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new b2e749d187 [#8431] improvement(bundles): Remove unnecessary relocate
in fileset bundle jars. (#8432)
b2e749d187 is described below
commit b2e749d1875f65e6269db833da44e80f25bf695c
Author: Mini Yu <[email protected]>
AuthorDate: Tue Sep 9 10:08:49 2025 +0800
[#8431] improvement(bundles): Remove unnecessary relocate in fileset bundle
jars. (#8432)
### What changes were proposed in this pull request?
Remove unnecessary relocate in gcp/adls bundle jars.
### Why are the changes needed?
To avoid potential problem.
Fix: #8431
Fix: #8391
### Does this PR introduce _any_ user-facing change?
N/A.
### How was this patch tested?
Existing tests.
---------
Co-authored-by: mchades <[email protected]>
Co-authored-by: yuhui <[email protected]>
---
bundles/aliyun/build.gradle.kts | 2 --
bundles/azure-bundle/build.gradle.kts | 1 -
bundles/azure/build.gradle.kts | 3 ---
bundles/gcp-bundle/build.gradle.kts | 1 -
bundles/gcp/build.gradle.kts | 5 -----
docs/fileset-catalog-with-adls.md | 12 +++++-------
docs/fileset-catalog-with-gcs.md | 8 +++-----
docs/fileset-catalog-with-oss.md | 16 ++++++++++++----
docs/fileset-catalog-with-s3.md | 9 +++++----
9 files changed, 25 insertions(+), 32 deletions(-)
diff --git a/bundles/aliyun/build.gradle.kts b/bundles/aliyun/build.gradle.kts
index c84dfc8184..76e73c8669 100644
--- a/bundles/aliyun/build.gradle.kts
+++ b/bundles/aliyun/build.gradle.kts
@@ -83,7 +83,6 @@ tasks.withType(ShadowJar::class.java) {
relocate("com.google",
"org.apache.gravitino.aliyun.shaded.com.google.common")
relocate("com.sun.activation",
"org.apache.gravitino.aliyun.shaded.com.sun.activation")
relocate("com.sun.istack",
"org.apache.gravitino.aliyun.shaded.com.sun.istack")
- relocate("com.sun.jersey",
"org.apache.gravitino.aliyun.shaded.com.sun.jersey")
relocate("com.sun.xml", "org.apache.gravitino.aliyun.shaded.com.sun.xml")
relocate("okhttp3", "org.apache.gravitino.aliyun.shaded.okhttp3")
relocate("okio", "org.apache.gravitino.aliyun.shaded.okio")
@@ -91,7 +90,6 @@ tasks.withType(ShadowJar::class.java) {
relocate("org.apache.http",
"org.apache.gravitino.aliyun.shaded.org.apache.http")
relocate("org.checkerframework",
"org.apache.gravitino.aliyun.shaded.org.checkerframework")
relocate("org.jacoco.agent.rt",
"org.apache.gravitino.aliyun.shaded.org.jacoco.agent.rt")
- relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom")
mergeServiceFiles()
}
diff --git a/bundles/azure-bundle/build.gradle.kts
b/bundles/azure-bundle/build.gradle.kts
index 6ff704ea28..df6be64a83 100644
--- a/bundles/azure-bundle/build.gradle.kts
+++ b/bundles/azure-bundle/build.gradle.kts
@@ -52,7 +52,6 @@ tasks.withType(ShadowJar::class.java) {
relocate("com.microsoft.aad",
"org.apache.gravitino.azure.shaded.com.microsoft.aad")
relocate("com.nimbusds", "org.apache.gravitino.azure.shaded.com.nimbusds")
relocate("com.sun.jna", "org.apache.gravitino.azure.shaded.com.sun.jna")
- relocate("com.sun.xml", "org.apache.gravitino.azure.shaded.com.sun.xml")
relocate("io.netty", "org.apache.gravitino.azure.shaded.io.netty")
relocate("net.minidev", "org.apache.gravitino.azure.shaded.net.minidev")
relocate("net.jcip.annotations",
"org.apache.gravitino.azure.shaded.net.jcip.annotations")
diff --git a/bundles/azure/build.gradle.kts b/bundles/azure/build.gradle.kts
index a11df2ed00..aeb3b7406d 100644
--- a/bundles/azure/build.gradle.kts
+++ b/bundles/azure/build.gradle.kts
@@ -71,15 +71,12 @@ tasks.withType(ShadowJar::class.java) {
relocate("com.microsoft.aad",
"org.apache.gravitino.azure.shaded.com.microsoft.aad")
relocate("com.nimbusds", "org.apache.gravitino.azure.shaded.com.nimbusds")
relocate("com.sun.jna", "org.apache.gravitino.azure.shaded.com.sun.jna")
- relocate("com.sun.xml", "org.apache.gravitino.azure.shaded.com.sun.xml")
relocate("io.netty", "org.apache.gravitino.azure.shaded.io.netty")
relocate("net.minidev", "org.apache.gravitino.azure.shaded.net.minidev")
relocate("net.jcip.annotations",
"org.apache.gravitino.azure.shaded.net.jcip.annotations")
relocate("org.apache.commons",
"org.apache.gravitino.azure.shaded.org.apache.commons")
- relocate("org.apache.httpcomponents",
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
relocate("org.checkerframework",
"org.apache.gravitino.azure.shaded.org.checkerframework")
relocate("org.codehaus.stax2",
"org.apache.gravitino.azure.shaded.org.codehaus.stax2")
- relocate("org.eclipse.jetty",
"org.apache.gravitino.azure.shaded.org.eclipse.jetty")
relocate("org.objectweb.asm",
"org.apache.gravitino.azure.shaded.org.objectweb.asm")
relocate("org.reactivestreams",
"org.apache.gravitino.azure.shaded.org.reactivestreams")
relocate("reactor", "org.apache.gravitino.azure.shaded.reactor")
diff --git a/bundles/gcp-bundle/build.gradle.kts
b/bundles/gcp-bundle/build.gradle.kts
index df3d976b8f..e8df40e4eb 100644
--- a/bundles/gcp-bundle/build.gradle.kts
+++ b/bundles/gcp-bundle/build.gradle.kts
@@ -50,7 +50,6 @@ tasks.withType(ShadowJar::class.java) {
relocate("com.google.errorprone",
"org.apache.gravitino.gcp.shaded.com.google.errorprone")
relocate("com.google.gson",
"org.apache.gravitino.gcp.shaded.com.google.gson")
relocate("com.google.j2objc",
"org.apache.gravitino.gcp.shaded.com.google.j2objc")
- relocate("com.google.longrunning",
"org.apache.gravitino.gcp.shaded.com.google.longrunning")
relocate("com.google.protobuf",
"org.apache.gravitino.gcp.shaded.com.google.protobuf")
relocate("com.google.thirdparty",
"org.apache.gravitino.gcp.shaded.com.google.thirdparty")
relocate("io.grpc", "org.apache.gravitino.gcp.shaded.io.grpc")
diff --git a/bundles/gcp/build.gradle.kts b/bundles/gcp/build.gradle.kts
index 4be8d0a154..90234f7538 100644
--- a/bundles/gcp/build.gradle.kts
+++ b/bundles/gcp/build.gradle.kts
@@ -61,17 +61,13 @@ tasks.withType(ShadowJar::class.java) {
}
// Relocate dependencies to avoid conflicts
- relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml")
relocate("com.google.api", "org.apache.gravitino.gcp.shaded.com.google.api")
relocate("com.google.auth",
"org.apache.gravitino.gcp.shaded.com.google.auth")
relocate("com.google.auto",
"org.apache.gravitino.gcp.shaded.com.google.auto")
relocate("com.google.common",
"org.apache.gravitino.gcp.shaded.com.google.common")
relocate("com.google.errorprone",
"org.apache.gravitino.gcp.shaded.com.google.errorprone")
relocate("com.google.gson",
"org.apache.gravitino.gcp.shaded.com.google.gson")
- relocate("com.google.iam", "org.apache.gravitino.gcp.shaded.com.google.iam")
relocate("com.google.j2objc",
"org.apache.gravitino.gcp.shaded.com.google.j2objc")
- relocate("com.google.longrunning",
"org.apache.gravitino.gcp.shaded.com.google.longrunning")
- relocate("com.google.protobuf",
"org.apache.gravitino.gcp.shaded.com.google.protobuf")
relocate("com.google.thirdparty",
"org.apache.gravitino.gcp.shaded.com.google.thirdparty")
relocate("io.grpc", "org.apache.gravitino.gcp.shaded.io.grpc")
relocate("io.opencensus", "org.apache.gravitino.gcp.shaded.io.opencensus")
@@ -79,7 +75,6 @@ tasks.withType(ShadowJar::class.java) {
relocate("org.apache.http",
"org.apache.gravitino.gcp.shaded.org.apache.http")
relocate("org.apache.httpcomponents",
"org.apache.gravitino.gcp.shaded.org.apache.httpcomponents")
relocate("org.checkerframework",
"org.apache.gravitino.gcp.shaded.org.checkerframework")
- relocate("org.eclipse.jetty",
"org.apache.gravitino.gcp.shaded.org.eclipse.jetty")
mergeServiceFiles()
}
diff --git a/docs/fileset-catalog-with-adls.md
b/docs/fileset-catalog-with-adls.md
index 276fcbce39..089d2454b0 100644
--- a/docs/fileset-catalog-with-adls.md
+++ b/docs/fileset-catalog-with-adls.md
@@ -306,12 +306,12 @@ Or use the bundle jar with Hadoop environment if there is
no Hadoop environment:
### Using Spark to access the fileset
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop
environment(Hadoop 3.2.0)** and JDK8 to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop
environment(Hadoop 3.3.4)** to access the fileset:
Before running the following code, you need to install required packages:
```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
pip install apache-gravitino==${GRAVITINO_VERSION}
```
Then you can run the following code:
@@ -326,10 +326,8 @@ metalake_name = "test"
catalog_name = "your_adls_catalog"
schema_name = "your_adls_schema"
fileset_name = "your_adls_fileset"
-# JDK8
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.2.0.jar,/path/to/azure-storage-7.0.0.jar,/path/to/wildfly-openssl-1.0.4.Final.jar
--master local[1] pyspark-shell"
-# JDK17
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.2.0.jar,/path/to/azure-storage-7.0.0.jar,/path/to/wildfly-openssl-1.0.4.Final.jar
--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-azure-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/hadoop-azure-3.3.4.jar,/path/to/azure-storage-7.0.1.jar,/path/to/wildfly-openssl-1.0.7.Final.jar
--master local[1] pyspark-shell"
spark = SparkSession.builder
.appName("adls_fileset_test")
.config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -378,7 +376,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = (
-
[`gravitino-azure-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-azure-bundle)
is the Gravitino ADLS jar with Hadoop environment(3.3.1), `hadoop-azure.jar`
and all packages needed to access ADLS.
-
[`gravitino-azure-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-azure)
is a condensed version of the Gravitino ADLS bundle jar without Hadoop
environment and `hadoop-azure.jar`.
-- `hadoop-azure-3.2.0.jar` and `azure-storage-7.0.0.jar` can be found in the
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
+- `hadoop-azure-3.3.4.jar` and `azure-storage-7.0.1.jar` can be found in the
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
Please choose the correct jar according to your environment.
diff --git a/docs/fileset-catalog-with-gcs.md b/docs/fileset-catalog-with-gcs.md
index 52c085e302..57f643a272 100644
--- a/docs/fileset-catalog-with-gcs.md
+++ b/docs/fileset-catalog-with-gcs.md
@@ -297,12 +297,12 @@ Or use the bundle jar with Hadoop environment if there is
no Hadoop environment:
### Using Spark to access the fileset
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop
environment(Hadoop 3.2.0)** and JDK8 to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop
environment(Hadoop 3.3.4)** to access the fileset:
Before running the following code, you need to install required packages:
```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
pip install apache-gravitino==${GRAVITINO_VERSION}
```
Then you can run the following code:
@@ -318,10 +318,8 @@ catalog_name = "your_gcs_catalog"
schema_name = "your_gcs_schema"
fileset_name = "your_gcs_fileset"
-# JDK8
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
to the submit args.
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-gcp-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/gcs-connector-hadoop3-2.2.22-shaded.jar
--master local[1] pyspark-shell"
-# JDK17
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-gcp-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/gcs-connector-hadoop3-2.2.22-shaded.jar
--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--master local[1] pyspark-shell"
spark = SparkSession.builder
.appName("gcs_fielset_test")
.config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs")
diff --git a/docs/fileset-catalog-with-oss.md b/docs/fileset-catalog-with-oss.md
index 5666898cd7..43dcbef922 100644
--- a/docs/fileset-catalog-with-oss.md
+++ b/docs/fileset-catalog-with-oss.md
@@ -312,12 +312,12 @@ Or use the bundle jar with Hadoop environment if there is
no Hadoop environment:
### Using Spark to access the fileset
-The following code snippet shows how to use **PySpark 3.1.3 with Hadoop
environment(Hadoop 3.2.0)** to access the fileset:
+The following code snippet shows how to use **PySpark 3.5.0 with Hadoop
environment(Hadoop 3.3.4)** to access the fileset:
Before running the following code, you need to install required packages:
```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
pip install apache-gravitino==${GRAVITINO_VERSION}
```
Then you can run the following code:
@@ -333,7 +333,15 @@ catalog_name = "your_oss_catalog"
schema_name = "your_oss_schema"
fileset_name = "your_oss_fileset"
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-aliyun-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,/path/to/aliyun-sdk-oss-2.8.3.jar,/path/to/hadoop-aliyun-3.2.0.jar,/path/to/jdom-1.1.jar
--master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = (
+ "--jars /path/to/gravitino-aliyun-{gravitino-version}.jar,"
+ "/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar,"
+ "/path/to/aliyun-sdk-oss-3.13.0.jar,"
+ "/path/to/hadoop-aliyun-3.3.4.jar,"
+ "/path/to/jdom2-2.0.6 "
+ "--master local[1] pyspark-shell"
+)
spark = SparkSession.builder
.appName("oss_fileset_test")
.config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -368,7 +376,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-aliyun-bundle-{gr
-
[`gravitino-aliyun-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aliyun-bundle)
is the Gravitino Aliyun jar with Hadoop environment(3.3.1) and `hadoop-oss`
jar.
-
[`gravitino-aliyun-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aliyun)
is a condensed version of the Gravitino Aliyun bundle jar without Hadoop
environment and `hadoop-aliyun` jar.
--`hadoop-aliyun-3.2.0.jar` and `aliyun-sdk-oss-2.8.3.jar` can be found in the
Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib` directory.
+-`hadoop-aliyun-3.3.4.jar`, `jdom2-2.0.6.jar`, and `aliyun-sdk-oss-3.13.0.jar`
can be found in the Hadoop distribution in the
`${HADOOP_HOME}/share/hadoop/tools/lib` directory.
Please choose the correct jar according to your environment.
diff --git a/docs/fileset-catalog-with-s3.md b/docs/fileset-catalog-with-s3.md
index 59931ba2d8..774fe9ab54 100644
--- a/docs/fileset-catalog-with-s3.md
+++ b/docs/fileset-catalog-with-s3.md
@@ -315,12 +315,12 @@ Or use the bundle jar with Hadoop environment if there is
no Hadoop environment:
### Using Spark to access the fileset
-The following Python code demonstrates how to use **PySpark 3.1.3 with Hadoop
environment(Hadoop 3.2.0)** to access the fileset:
+The following Python code demonstrates how to use **PySpark 3.5.0 with Hadoop
environment(Hadoop 3.3.4)** to access the fileset:
Before running the following code, you need to install required packages:
```bash
-pip install pyspark==3.1.3
+pip install pyspark==3.5.0
pip install apache-gravitino==${GRAVITINO_VERSION}
```
Then you can run the following code:
@@ -336,7 +336,8 @@ catalog_name = "your_s3_catalog"
schema_name = "your_s3_schema"
fileset_name = "your_s3_fileset"
-os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-aws-${gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-${gravitino-version}-SNAPSHOT.jar,/path/to/hadoop-aws-3.2.0.jar,/path/to/aws-java-sdk-bundle-1.11.375.jar
--master local[1] pyspark-shell"
+# JDK8 as follows, JDK17 will be slightly different, you need to add '--conf
\"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"
--conf
\"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\"'
to the submit args.
+os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-aws-${gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-${gravitino-version}-SNAPSHOT.jar,/path/to/hadoop-aws-3.3.4.jar,/path/to/aws-java-sdk-bundle-1.12.262.jar
--master local[1] pyspark-shell"
spark = SparkSession.builder
.appName("s3_fileset_test")
.config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs")
@@ -370,7 +371,7 @@ os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars
/path/to/gravitino-aws-bundle-${grav
-
[`gravitino-aws-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aws-bundle)
is the Gravitino AWS jar with Hadoop environment(3.3.1) and `hadoop-aws` jar.
-
[`gravitino-aws-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-aws)
is a condensed version of the Gravitino AWS bundle jar without Hadoop
environment and `hadoop-aws` jar.
-- `hadoop-aws-3.2.0.jar` and `aws-java-sdk-bundle-1.11.375.jar` can be found
in the Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib`
directory.
+- `hadoop-aws-3.3.4.jar` and `aws-java-sdk-bundle-1.12.262.jar` can be found
in the Hadoop distribution in the `${HADOOP_HOME}/share/hadoop/tools/lib`
directory.
Please choose the correct jar according to your environment.