[ https://issues.apache.org/jira/browse/HUDI-3700?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ethan Guo updated HUDI-3700: ---------------------------- Description: When we build hudi-utilities-bundle, the Spark profile can affect the bundle jar. This causes incompatibility between hudi-utilities-bundle and some Spark versions. When the hudi-utilities-bundle is built with the Spark version that is going to be used for the ingestion, there is no error. For example: When running deltastreamer with hudi-utilities-bundle_2.12-0.10.1.jar using Spark 3.1.2, the ingestion job throws java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3Adapter. {code:java} /Users/ethan/Work/lib/spark-3.1.2-bin-hadoop3.2/bin/spark-submit \ --master local[6] \ --driver-memory 6g --executor-memory 2g --num-executors 6 --executor-cores 1 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.sql.catalogImplementation=hive \ --conf spark.driver.maxResultSize=1g \ --conf spark.speculation=true \ --conf spark.speculation.multiplier=1.0 \ --conf spark.speculation.quantile=0.5 \ --conf spark.ui.port=6679 \ --conf spark.eventLog.enabled=true \ --conf spark.eventLog.dir=/Users/ethan/Work/data/hudi/spark-logs \ --packages org.apache.spark:spark-avro_2.12:3.1.2 \ --jars /Users/ethan/Work/repo/hudi-benchmarks/target/hudi-benchmarks-0.1-SNAPSHOT.jar \ --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ /Users/ethan/Work/lib/hudi_releases/0.10.1/hudi-utilities-bundle_2.12-0.10.1.jar \ --props /Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/ds_cow_before.properties \ --source-class BenchmarkDataSource \ --source-ordering-field ts \ --target-base-path file:/Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/test_table \ --target-table test_table \ --table-type COPY_ON_WRITE \ --op INSERT >> ds_before.log 2>&1 {code} {code:java} Exception in thread "main" java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3Adapter at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:35) at org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:48) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:48) at org.apache.hudi.HoodieSparkUtils$.createRddInternal(HoodieSparkUtils.scala:144) at org.apache.hudi.HoodieSparkUtils$.createRdd(HoodieSparkUtils.scala:136) at org.apache.hudi.HoodieSparkUtils.createRdd(HoodieSparkUtils.scala) at org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.lambda$fetchNewDataInAvroFormat$1(SourceFormatAdapter.java:79) at org.apache.hudi.common.util.Option.map(Option.java:107) at org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:70) at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:425) at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:290) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:193) at org.apache.hudi.common.util.Option.ifPresent(Option.java:96) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:191) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:514) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) {code} For latest master: When running deltastreamer using Spark 3.1.2, the ingestion job throws different exceptions using different Spark profiles. {code:java} java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3_2Adapter at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:37) at org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:44) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:44) at org.apache.hudi.AvroConversionUtils$.$anonfun$createInternalRowToAvroConverter$1(AvroConversionUtils.scala:79) at org.apache.hudi.HoodieSparkUtils$.$anonfun$createRdd$5(HoodieSparkUtils.scala:161) at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) at scala.collection.Iterator$SliceIterator.next(Iterator.scala:273) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62) at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49) at scala.collection.TraversableOnce.to(TraversableOnce.scala:366) at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364) at scala.collection.AbstractIterator.to(Iterator.scala:1431) at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358) at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431) at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345) at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339) at scala.collection.AbstractIterator.toArray(Iterator.scala:1431) at org.apache.spark.rdd.RDD.$anonfun$take$2(RDD.scala:1449) at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2254) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} {code:java} java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: org/apache/parquet/schema/LogicalTypeAnnotation$LogicalTypeAnnotationVisitor at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121) at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1440) at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1350) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1414) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1237) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384) at org.apache.spark.rdd.RDD.iterator(RDD.scala:335) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} was: When we build hudi-utilities-bundle, the Spark profile can affect the bundle jar. This causes incompatibility between hudi-utilities-bundle and some Spark versions. For example: When running deltastreamer with hudi-utilities-bundle_2.12-0.10.1.jar using Spark 3.1.2, the ingestion job throws java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3Adapter. {code:java} /Users/ethan/Work/lib/spark-3.1.2-bin-hadoop3.2/bin/spark-submit \ --master local[6] \ --driver-memory 6g --executor-memory 2g --num-executors 6 --executor-cores 1 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.sql.catalogImplementation=hive \ --conf spark.driver.maxResultSize=1g \ --conf spark.speculation=true \ --conf spark.speculation.multiplier=1.0 \ --conf spark.speculation.quantile=0.5 \ --conf spark.ui.port=6679 \ --conf spark.eventLog.enabled=true \ --conf spark.eventLog.dir=/Users/ethan/Work/data/hudi/spark-logs \ --packages org.apache.spark:spark-avro_2.12:3.1.2 \ --jars /Users/ethan/Work/repo/hudi-benchmarks/target/hudi-benchmarks-0.1-SNAPSHOT.jar \ --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ /Users/ethan/Work/lib/hudi_releases/0.10.1/hudi-utilities-bundle_2.12-0.10.1.jar \ --props /Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/ds_cow_before.properties \ --source-class BenchmarkDataSource \ --source-ordering-field ts \ --target-base-path file:/Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/test_table \ --target-table test_table \ --table-type COPY_ON_WRITE \ --op INSERT >> ds_before.log 2>&1 {code} {code:java} Exception in thread "main" java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3Adapter at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:35) at org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:48) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:48) at org.apache.hudi.HoodieSparkUtils$.createRddInternal(HoodieSparkUtils.scala:144) at org.apache.hudi.HoodieSparkUtils$.createRdd(HoodieSparkUtils.scala:136) at org.apache.hudi.HoodieSparkUtils.createRdd(HoodieSparkUtils.scala) at org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.lambda$fetchNewDataInAvroFormat$1(SourceFormatAdapter.java:79) at org.apache.hudi.common.util.Option.map(Option.java:107) at org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:70) at org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:425) at org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:290) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:193) at org.apache.hudi.common.util.Option.ifPresent(Option.java:96) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:191) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:514) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) {code} For latest master: When running deltastreamer using Spark 3.1.2, the ingestion job throws different exceptions using different Spark profiles. {code:java} java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3_2Adapter at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:37) at org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:44) at org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:44) at org.apache.hudi.AvroConversionUtils$.$anonfun$createInternalRowToAvroConverter$1(AvroConversionUtils.scala:79) at org.apache.hudi.HoodieSparkUtils$.$anonfun$createRdd$5(HoodieSparkUtils.scala:161) at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) at scala.collection.Iterator$SliceIterator.next(Iterator.scala:273) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62) at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49) at scala.collection.TraversableOnce.to(TraversableOnce.scala:366) at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364) at scala.collection.AbstractIterator.to(Iterator.scala:1431) at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358) at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431) at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345) at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339) at scala.collection.AbstractIterator.toArray(Iterator.scala:1431) at org.apache.spark.rdd.RDD.$anonfun$take$2(RDD.scala:1449) at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2254) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} {code:java} java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: org/apache/parquet/schema/LogicalTypeAnnotation$LogicalTypeAnnotationVisitor at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121) at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1440) at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1350) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1414) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1237) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384) at org.apache.spark.rdd.RDD.iterator(RDD.scala:335) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} > Revisit hudi-utilities-bundle build wrt Spark versions > ------------------------------------------------------ > > Key: HUDI-3700 > URL: https://issues.apache.org/jira/browse/HUDI-3700 > Project: Apache Hudi > Issue Type: Task > Components: spark > Reporter: Ethan Guo > Priority: Blocker > Fix For: 0.11.0 > > > When we build hudi-utilities-bundle, the Spark profile can affect the bundle > jar. This causes incompatibility between hudi-utilities-bundle and some > Spark versions. When the hudi-utilities-bundle is built with the Spark > version that is going to be used for the ingestion, there is no error. > > For example: > When running deltastreamer with hudi-utilities-bundle_2.12-0.10.1.jar using > Spark 3.1.2, the ingestion job throws java.lang.ClassNotFoundException: > org.apache.spark.sql.adapter.Spark3Adapter. > {code:java} > /Users/ethan/Work/lib/spark-3.1.2-bin-hadoop3.2/bin/spark-submit \ > --master local[6] \ > --driver-memory 6g --executor-memory 2g --num-executors 6 > --executor-cores 1 \ > --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ > --conf spark.sql.catalogImplementation=hive \ > --conf spark.driver.maxResultSize=1g \ > --conf spark.speculation=true \ > --conf spark.speculation.multiplier=1.0 \ > --conf spark.speculation.quantile=0.5 \ > --conf spark.ui.port=6679 \ > --conf spark.eventLog.enabled=true \ > --conf spark.eventLog.dir=/Users/ethan/Work/data/hudi/spark-logs \ > --packages org.apache.spark:spark-avro_2.12:3.1.2 \ > --jars > /Users/ethan/Work/repo/hudi-benchmarks/target/hudi-benchmarks-0.1-SNAPSHOT.jar > \ > --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \ > > /Users/ethan/Work/lib/hudi_releases/0.10.1/hudi-utilities-bundle_2.12-0.10.1.jar > \ > --props > /Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/ds_cow_before.properties > \ > --source-class BenchmarkDataSource \ > --source-ordering-field ts \ > --target-base-path > file:/Users/ethan/Work/scripts/hbase-upgrade-testing/hudi_0_10_1_cow/test_table > \ > --target-table test_table \ > --table-type COPY_ON_WRITE \ > --op INSERT >> ds_before.log 2>&1 {code} > > {code:java} > Exception in thread "main" java.lang.ClassNotFoundException: > org.apache.spark.sql.adapter.Spark3Adapter > at java.net.URLClassLoader.findClass(URLClassLoader.java:382) > at java.lang.ClassLoader.loadClass(ClassLoader.java:418) > at java.lang.ClassLoader.loadClass(ClassLoader.java:351) > at > org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:35) > at > org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) > at > org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:48) > at > org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:48) > at > org.apache.hudi.HoodieSparkUtils$.createRddInternal(HoodieSparkUtils.scala:144) > at org.apache.hudi.HoodieSparkUtils$.createRdd(HoodieSparkUtils.scala:136) > at org.apache.hudi.HoodieSparkUtils.createRdd(HoodieSparkUtils.scala) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.lambda$fetchNewDataInAvroFormat$1(SourceFormatAdapter.java:79) > at org.apache.hudi.common.util.Option.map(Option.java:107) > at > org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter.fetchNewDataInAvroFormat(SourceFormatAdapter.java:70) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.readFromSource(DeltaSync.java:425) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:290) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$2(HoodieDeltaStreamer.java:193) > at org.apache.hudi.common.util.Option.ifPresent(Option.java:96) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:191) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:514) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) {code} > For latest master: > When running deltastreamer using Spark 3.1.2, the ingestion job throws > different exceptions using different Spark profiles. > {code:java} > java.lang.ClassNotFoundException: org.apache.spark.sql.adapter.Spark3_2Adapter > at java.net.URLClassLoader.findClass(URLClassLoader.java:382) > at java.lang.ClassLoader.loadClass(ClassLoader.java:418) > at java.lang.ClassLoader.loadClass(ClassLoader.java:351) > at > org.apache.hudi.SparkAdapterSupport.sparkAdapter(SparkAdapterSupport.scala:37) > at > org.apache.hudi.SparkAdapterSupport.sparkAdapter$(SparkAdapterSupport.scala:29) > at > org.apache.hudi.HoodieSparkUtils$.sparkAdapter$lzycompute(HoodieSparkUtils.scala:44) > at > org.apache.hudi.HoodieSparkUtils$.sparkAdapter(HoodieSparkUtils.scala:44) > at > org.apache.hudi.AvroConversionUtils$.$anonfun$createInternalRowToAvroConverter$1(AvroConversionUtils.scala:79) > at > org.apache.hudi.HoodieSparkUtils$.$anonfun$createRdd$5(HoodieSparkUtils.scala:161) > at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) > at scala.collection.Iterator$SliceIterator.next(Iterator.scala:273) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62) > at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49) > at scala.collection.TraversableOnce.to(TraversableOnce.scala:366) > at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364) > at scala.collection.AbstractIterator.to(Iterator.scala:1431) > at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358) > at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431) > at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345) > at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1431) > at org.apache.spark.rdd.RDD.$anonfun$take$2(RDD.scala:1449) > at > org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2254) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:131) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) {code} > {code:java} > java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: > org/apache/parquet/schema/LogicalTypeAnnotation$LogicalTypeAnnotationVisitor > at > org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:121) > at > scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at > org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) > at > org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1440) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1350) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1414) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1237) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:335) at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at > org.apache.spark.scheduler.Task.run(Task.scala:131) at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > > {code} > -- This message was sent by Atlassian Jira (v8.20.1#820001)