This is an automated email from the ASF dual-hosted git repository.

codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 31e13db1f0 [HUDI-4023] Decouple hudi-spark from 
hudi-utilities-slim-bundle (#5641)
31e13db1f0 is described below

commit 31e13db1f0e12e107cc02c60dec3e52a8914a5b2
Author: Sagar Sumit <sagarsumi...@gmail.com>
AuthorDate: Thu May 26 11:28:49 2022 +0530

    [HUDI-4023] Decouple hudi-spark from hudi-utilities-slim-bundle (#5641)
---
 .../hudi/utilities/deltastreamer/DeltaSync.java    |   2 -
 packaging/hudi-utilities-slim-bundle/README.md     |  89 ++++++++++++-
 packaging/hudi-utilities-slim-bundle/pom.xml       | 143 +++------------------
 pom.xml                                            |   7 +
 4 files changed, 109 insertions(+), 132 deletions(-)

diff --git 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index a4a7e10abc..0ae72f94b8 100644
--- 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -605,8 +605,6 @@ public class DeltaSync implements Serializable {
     long totalErrorRecords = 
writeStatusRDD.mapToDouble(WriteStatus::getTotalErrorRecords).sum().longValue();
     long totalRecords = 
writeStatusRDD.mapToDouble(WriteStatus::getTotalRecords).sum().longValue();
     boolean hasErrors = totalErrorRecords > 0;
-    long hiveSyncTimeMs = 0;
-    long metaSyncTimeMs = 0;
     if (!hasErrors || cfg.commitOnErrors) {
       HashMap<String, String> checkpointCommitMetadata = new HashMap<>();
       if (checkpointStr != null) {
diff --git a/packaging/hudi-utilities-slim-bundle/README.md 
b/packaging/hudi-utilities-slim-bundle/README.md
index 58353c403d..60ee739153 100644
--- a/packaging/hudi-utilities-slim-bundle/README.md
+++ b/packaging/hudi-utilities-slim-bundle/README.md
@@ -17,6 +17,89 @@
 
 # Usage of hudi-utilities-slim-bundle
 
-Starting from versions 0.11, Hudi provides hudi-utilities-slim-bundle which 
excludes hudi-spark-datasource modules.
-This new bundle is intended to be used with Hudi Spark bundle together, if 
using hudi-utilities-bundle solely
-introduces problems for a specific Spark version.
\ No newline at end of file
+Starting from versions 0.11, Hudi provides hudi-utilities-slim-bundle which 
excludes hudi-spark-datasource modules. This new bundle is intended to be used 
with Hudi Spark bundle together, if using
+hudi-utilities-bundle solely introduces problems for a specific Spark version.
+
+## Example with Spark 2.4.7
+
+* Build Hudi: `mvn clean install -DskipTests`
+* Run deltastreamer
+
+```
+bin/spark-submit \
+  --driver-memory 4g --executor-memory 2g --num-executors 3 --executor-cores 1 
\
+  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+  --conf spark.sql.catalogImplementation=hive \
+  --conf spark.driver.maxResultSize=1g \
+  --conf spark.ui.port=6679 \
+  --packages org.apache.spark:spark-avro_2.11:2.4.7 \
+  --jars 
/path/to/hudi/packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-0.12.0-SNAPSHOT.jar
 \
+  --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer `ls 
/path/to/hudi/packaging/hudi-utilities-slim-bundle/target/hudi-utilities-slim-bundle_2.11-0.12.0-SNAPSHOT.jar`
 \
+  --props `ls /path/to/hudi/dfs-source.properties` \
+  --source-class org.apache.hudi.utilities.sources.ParquetDFSSource  \
+  --schemaprovider-class 
org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
+  --source-ordering-field tpep_dropoff_datetime   \
+  --table-type COPY_ON_WRITE \
+  --target-base-path file:\/\/\/tmp/hudi-ny-taxi-spark24/   \
+  --target-table ny_hudi_tbl  \
+  --op UPSERT  \
+  --continuous \
+  --source-limit 5000000 \
+  --min-sync-interval-seconds 60
+```
+
+## Example with Spark 3.1.2
+
+* Build Hudi: `mvn clean install -DskipTests -Dspark3.1 -Dscala-2.12`
+* Run deltastreamer
+
+```
+bin/spark-submit \
+  --driver-memory 4g --executor-memory 2g --num-executors 3 --executor-cores 1 
\
+  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+  --conf spark.sql.catalogImplementation=hive \
+  --conf spark.driver.maxResultSize=1g \
+  --conf spark.ui.port=6679 \
+  --packages org.apache.spark:spark-avro_2.12:3.1.2 \
+  --jars 
/path/to/hudi/packaging/hudi-spark-bundle/target/hudi-spark3.1-bundle_2.12-0.12.0-SNAPSHOT.jar
 \
+  --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer `ls 
/path/to/hudi/packaging/hudi-utilities-slim-bundle/target/hudi-utilities-slim-bundle_2.12-0.12.0-SNAPSHOT.jar`
 \
+  --props `ls /path/to/hudi/dfs-source.properties` \
+  --source-class org.apache.hudi.utilities.sources.ParquetDFSSource  \
+  --schemaprovider-class 
org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
+  --source-ordering-field tpep_dropoff_datetime   \
+  --table-type COPY_ON_WRITE \
+  --target-base-path file:\/\/\/tmp/hudi-ny-taxi-spark31/   \
+  --target-table ny_hudi_tbl  \
+  --op UPSERT  \
+  --continuous \
+  --source-limit 5000000 \
+  --min-sync-interval-seconds 60
+```
+
+## Example with Spark 3.2.0
+
+* Build Hudi: `mvn clean install -DskipTests -Dspark3.2 -Dscala-2.12`
+* Run deltastreamer
+
+```
+bin/spark-submit \
+  --driver-memory 4g --executor-memory 2g --num-executors 3 --executor-cores 1 
\
+  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+  --conf spark.sql.catalogImplementation=hive \
+  --conf spark.driver.maxResultSize=1g \
+  --conf spark.ui.port=6679 \
+  --packages org.apache.spark:spark-avro_2.12:3.2.0 \
+  --jars 
/path/to/hudi/packaging/hudi-spark-bundle/target/hudi-spark3.2-bundle_2.12-0.12.0-SNAPSHOT.jar
 \
+  --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer `ls 
/path/to/hudi/packaging/hudi-utilities-slim-bundle/target/hudi-utilities-slim-bundle_2.12-0.12.0-SNAPSHOT.jar`
 \
+  --props `ls /path/to/hudi/dfs-source.properties` \
+  --source-class org.apache.hudi.utilities.sources.ParquetDFSSource  \
+  --schemaprovider-class 
org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
+  --source-ordering-field tpep_dropoff_datetime   \
+  --table-type COPY_ON_WRITE \
+  --target-base-path file:\/\/\/tmp/hudi-ny-taxi-spark32/   \
+  --target-table ny_hudi_tbl  \
+  --op UPSERT  \
+  --continuous \
+  --source-limit 5000000 \
+  --min-sync-interval-seconds 60
+```
diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml 
b/packaging/hudi-utilities-slim-bundle/pom.xml
index 60f0af9d64..993e2ad7fd 100644
--- a/packaging/hudi-utilities-slim-bundle/pom.xml
+++ b/packaging/hudi-utilities-slim-bundle/pom.xml
@@ -77,7 +77,7 @@
                 <transformer 
implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
                 </transformer>
                 <transformer 
implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
-                    <addHeader>true</addHeader>
+                  <addHeader>true</addHeader>
                 </transformer>
                 <transformer 
implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
                   <resource>META-INF/LICENSE</resource>
@@ -92,10 +92,7 @@
                 <includes>
                   <include>org.apache.hudi:hudi-common</include>
                   <include>org.apache.hudi:hudi-client-common</include>
-                  <include>org.apache.hudi:hudi-spark-client</include>
                   
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
-                  <include>org.apache.hudi:hudi-hive-sync</include>
-                  <include>org.apache.hudi:hudi-sync-common</include>
                   <include>org.apache.hudi:hudi-hadoop-mr</include>
                   <include>org.apache.hudi:hudi-timeline-service</include>
                   <include>org.apache.hudi:hudi-aws</include>
@@ -136,13 +133,6 @@
                   
<include>org.apache.kafka:kafka_${scala.binary.version}</include>
                   <include>com.101tec:zkclient</include>
                   <include>org.apache.kafka:kafka-clients</include>
-
-                  <include>org.apache.hive:hive-common</include>
-                  <include>org.apache.hive:hive-service</include>
-                  <include>org.apache.hive:hive-service-rpc</include>
-                  <include>org.apache.hive:hive-metastore</include>
-                  <include>org.apache.hive:hive-jdbc</include>
-
                   <include>org.apache.hbase:hbase-client</include>
                   <include>org.apache.hbase:hbase-common</include>
                   <include>org.apache.hbase:hbase-hadoop-compat</include>
@@ -178,10 +168,6 @@
                   <pattern>com.beust.jcommander.</pattern>
                   
<shadedPattern>org.apache.hudi.com.beust.jcommander.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.jdbc.</pattern>
-                  
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.jdbc.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.commons.io.</pattern>
                   
<shadedPattern>org.apache.hudi.org.apache.commons.io.</shadedPattern>
@@ -205,10 +191,6 @@
                   <pattern>org.apache.hadoop.hive.metastore.</pattern>
                   
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.metastore.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.common.</pattern>
-                  
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.common.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.hadoop.hive.common.</pattern>
                   
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.common.</shadedPattern>
@@ -217,10 +199,6 @@
                   <pattern>org.apache.hadoop.hive.conf.</pattern>
                   
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.conf.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.service.</pattern>
-                  
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hive.service.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.hadoop.hive.service.</pattern>
                   
<shadedPattern>${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.service.</shadedPattern>
@@ -344,116 +322,27 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-client-common</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark-client</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-hive-sync</artifactId>
+      <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <exclusions>
         <exclusion>
-          <groupId>javax.servlet</groupId>
-          <artifactId>servlet-api</artifactId>
+          <groupId>org.apache.hudi</groupId>
+          <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hudi</groupId>
+          <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hudi</groupId>
+          <artifactId>${hudi.spark.module}_${scala.binary.version}</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hudi</groupId>
+          <artifactId>${hudi.spark.common.module}</artifactId>
         </exclusion>
       </exclusions>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-spark_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>${hudi.spark.module}_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>${hudi.spark.common.module}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hudi</groupId>
-      <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <!-- Hive -->
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-service</artifactId>
-      <version>${hive.version}</version>
-      <scope>${utilities.bundle.hive.scope}</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-service-rpc</artifactId>
-      <version>${hive.version}</version>
-      <scope>${utilities.bundle.hive.scope}</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-jdbc</artifactId>
-      <version>${hive.version}</version>
-      <scope>${utilities.bundle.hive.scope}</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-metastore</artifactId>
-      <version>${hive.version}</version>
-      <scope>${utilities.bundle.hive.scope}</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>${hive.groupid}</groupId>
-      <artifactId>hive-common</artifactId>
-      <version>${hive.version}</version>
-      <scope>${utilities.bundle.hive.scope}</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.htrace</groupId>
-      <artifactId>htrace-core</artifactId>
-      <version>${htrace.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <!-- zookeeper -->
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-framework</artifactId>
-      <version>${zk-curator.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-client</artifactId>
-      <version>${zk-curator.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-recipes</artifactId>
-      <version>${zk-curator.version}</version>
-    </dependency>
   </dependencies>
 
   <profiles>
diff --git a/pom.xml b/pom.xml
index d898d34d35..1188ec620a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,6 +99,7 @@
     <pulsar.version>2.8.1</pulsar.version>
     <confluent.version>5.3.4</confluent.version>
     <glassfish.version>2.17</glassfish.version>
+    <glassfish.el.version>3.0.1-b12</glassfish.el.version>
     <parquet.version>1.10.1</parquet.version>
     <junit.jupiter.version>5.7.0-M1</junit.jupiter.version>
     <junit.vintage.version>5.7.0-M1</junit.vintage.version>
@@ -556,6 +557,12 @@
         <artifactId>jersey-container-servlet-core</artifactId>
         <version>${glassfish.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.glassfish</groupId>
+        <artifactId>javax.el</artifactId>
+        <version>${glassfish.el.version}</version>
+        <scope>provided</scope>
+      </dependency>
 
       <!-- Avro -->
       <dependency>

Reply via email to