This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 09ecb0c18c89 [HUDI-9246] Confine protobuf shading to the hudi-io
package (#13065)
09ecb0c18c89 is described below
commit 09ecb0c18c897fced60f1383cdf6316bae0c9914
Author: Tim Brown <[email protected]>
AuthorDate: Tue Jul 29 07:34:17 2025 -0400
[HUDI-9246] Confine protobuf shading to the hudi-io package (#13065)
---
hudi-client/hudi-client-common/pom.xml | 1 +
hudi-client/hudi-spark-client/pom.xml | 1 +
.../apache/spark/HoodieSparkKryoRegistrar.scala | 12 ++++++++-
hudi-common/pom.xml | 1 +
hudi-hadoop-common/pom.xml | 1 +
hudi-io/pom.xml | 30 ++++++++++++++++++++++
hudi-spark-datasource/hudi-spark/pom.xml | 1 +
hudi-sync/hudi-hive-sync/pom.xml | 1 +
hudi-utilities/pom.xml | 1 +
packaging/hudi-integ-test-bundle/pom.xml | 1 +
packaging/hudi-utilities-bundle/pom.xml | 4 ---
packaging/hudi-utilities-slim-bundle/pom.xml | 4 ---
pom.xml | 6 -----
13 files changed, 49 insertions(+), 15 deletions(-)
diff --git a/hudi-client/hudi-client-common/pom.xml
b/hudi-client/hudi-client-common/pom.xml
index 68c0fabc7b8e..98fef5b79fa6 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -47,6 +47,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
diff --git a/hudi-client/hudi-spark-client/pom.xml
b/hudi-client/hudi-spark-client/pom.xml
index fb65be6af2c6..587cf8a9c505 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -59,6 +59,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
diff --git
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/HoodieSparkKryoRegistrar.scala
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/HoodieSparkKryoRegistrar.scala
index 46c898b13db7..fcfe78b0bf0c 100644
---
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/HoodieSparkKryoRegistrar.scala
+++
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/HoodieSparkKryoRegistrar.scala
@@ -32,6 +32,7 @@ import com.esotericsoftware.kryo.serializers.JavaSerializer
import com.google.protobuf.Message
import com.twitter.chill.protobuf.ProtobufSerializer
import org.apache.spark.serializer.KryoRegistrator
+import org.slf4j.LoggerFactory
/**
* NOTE: PLEASE READ CAREFULLY BEFORE CHANGING
@@ -50,6 +51,8 @@ import org.apache.spark.serializer.KryoRegistrator
* </ol>
*/
class HoodieSparkKryoRegistrar extends HoodieCommonKryoRegistrar with
KryoRegistrator {
+ private val log = LoggerFactory.getLogger(classOf[HoodieSparkKryoRegistrar])
+
override def registerClasses(kryo: Kryo): Unit = {
///////////////////////////////////////////////////////////////////////////
@@ -74,7 +77,14 @@ class HoodieSparkKryoRegistrar extends
HoodieCommonKryoRegistrar with KryoRegist
// So we replace it with [[HadoopStorageConfiguration]] for Spark.
kryo.register(classOf[HadoopStorageConfiguration], new JavaSerializer())
// NOTE: Protobuf objects are not serializable by default using kryo, need
to register them explicitly.
- kryo.addDefaultSerializer(classOf[Message], new ProtobufSerializer())
+ // Only initialize this serializer if Protobuf is on the classpath.
+ try {
+ if (Class.forName(classOf[Message].getName, false,
getClass.getClassLoader) != null) {
+ kryo.addDefaultSerializer(classOf[Message], new ProtobufSerializer())
+ }
+ } catch {
+ case _: ClassNotFoundException | _: NoClassDefFoundError =>
log.warn("Protobuf classes not found on the classpath, skipping Protobuf
serializer registration.")
+ }
}
/**
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index 6c3484b57703..a2d75a27ad47 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -107,6 +107,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
diff --git a/hudi-hadoop-common/pom.xml b/hudi-hadoop-common/pom.xml
index d84ef2cd7f97..cf66da7a694e 100644
--- a/hudi-hadoop-common/pom.xml
+++ b/hudi-hadoop-common/pom.xml
@@ -77,6 +77,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<!-- Hadoop -->
diff --git a/hudi-io/pom.xml b/hudi-io/pom.xml
index 59c79935dc92..94b05512575d 100644
--- a/hudi-io/pom.xml
+++ b/hudi-io/pom.xml
@@ -96,6 +96,36 @@
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${maven-shade-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <createDependencyReducedPom>false</createDependencyReducedPom>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <shadedClassifierName>shaded</shadedClassifierName>
+ <artifactSet>
+ <includes>
+ <!-- native HFile reader uses protobuf -->
+ <include>com.google.protobuf:protobuf-java</include>
+ </includes>
+ </artifactSet>
+ <relocations>
+ <relocation>
+ <pattern>com.google.protobuf.</pattern>
+
<shadedPattern>org.apache.hudi.com.google.protobuf.</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml
b/hudi-spark-datasource/hudi-spark/pom.xml
index bbe5888fdd03..045dadb473c8 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -205,6 +205,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml
index ddf220094728..7b37d1200472 100644
--- a/hudi-sync/hudi-hive-sync/pom.xml
+++ b/hudi-sync/hudi-hive-sync/pom.xml
@@ -53,6 +53,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 9546940eaa6d..d20007c11a7f 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -172,6 +172,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
+ <classifier>shaded</classifier>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
diff --git a/packaging/hudi-integ-test-bundle/pom.xml
b/packaging/hudi-integ-test-bundle/pom.xml
index 38556013cdd0..a2e6792a033e 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -121,6 +121,7 @@
<include>org.apache.parquet:parquet-avro</include>
<include>com.twitter:parquet-avro</include>
<include>com.twitter.common:objectsize</include>
+ <include>com.google.protobuf:protobuf-java</include>
<include>io.confluent:kafka-avro-serializer</include>
<include>io.confluent:common-config</include>
diff --git a/packaging/hudi-utilities-bundle/pom.xml
b/packaging/hudi-utilities-bundle/pom.xml
index 67f5cb49476e..e4e57cbedfc3 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -233,10 +233,6 @@
<pattern>org.apache.httpcomponents.</pattern>
<shadedPattern>org.apache.hudi.aws.org.apache.httpcomponents.</shadedPattern>
</relocation>
- <relocation>
- <pattern>com.google.protobuf.</pattern>
-
<shadedPattern>org.apache.hudi.com.google.protobuf.</shadedPattern>
- </relocation>
<relocation>
<pattern>com.uber.m3.</pattern>
<shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml
b/packaging/hudi-utilities-slim-bundle/pom.xml
index 2bf20fa3d496..feed197ef2de 100644
--- a/packaging/hudi-utilities-slim-bundle/pom.xml
+++ b/packaging/hudi-utilities-slim-bundle/pom.xml
@@ -162,10 +162,6 @@
<pattern>org.openjdk.jol.</pattern>
<shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern>
</relocation>
- <relocation>
- <pattern>com.google.protobuf.</pattern>
-
<shadedPattern>org.apache.hudi.com.google.protobuf.</shadedPattern>
- </relocation>
<relocation>
<pattern>com.uber.m3.</pattern>
<shadedPattern>org.apache.hudi.com.uber.m3.</shadedPattern>
diff --git a/pom.xml b/pom.xml
index 94f6f92fcc8c..4d09ce229fe4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -464,8 +464,6 @@
<!-- afterburner module for jackson performance -->
<include>com.fasterxml.jackson.module:jackson-module-afterburner</include>
<include>com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version}</include>
- <!-- native HFile reader uses protobuf -->
- <include>com.google.protobuf:protobuf-java</include>
</includes>
</artifactSet>
<relocations>
@@ -483,10 +481,6 @@
<shadedPattern>org.apache.hudi.com.fasterxml.jackson.module
</shadedPattern>
</relocation>
- <relocation>
- <pattern>com.google.protobuf.</pattern>
-
<shadedPattern>org.apache.hudi.com.google.protobuf.</shadedPattern>
- </relocation>
</relocations>
</configuration>
</plugin>