[ https://issues.apache.org/jira/browse/HUDI-7236?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jonathan Vexler updated HUDI-7236: ---------------------------------- Status: In Progress (was: Open) > MIT fails when attempting to change partition > --------------------------------------------- > > Key: HUDI-7236 > URL: https://issues.apache.org/jira/browse/HUDI-7236 > Project: Apache Hudi > Issue Type: Bug > Components: spark-sql > Reporter: Jonathan Vexler > Assignee: Jonathan Vexler > Priority: Blocker > Labels: pull-request-available > Fix For: 0.14.1 > > > With the following > {code:java} > DROP TABLE IF EXISTS datalake_person_spark;DROP TABLE IF EXISTS > datalake_partial_update_partition_change_hudi; > CREATE TABLE datalake_person_sparkUSING PARQUETPARTITIONED BY > (inc_day)asSELECT1 as id, 2 as version, 'str_2' as name, CAST('2023-01-01 > 12:12:12' as TIMESTAMP) as birthDate, '2023-10-02' as inc_day; > CREATE TABLE datalake_partial_update_partition_change_hudi ( id INT, > version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) > USING hudi PARTITIONED BY (inc_day) TBLPROPERTIES ( 'primaryKey' > = 'id', 'type' = 'cow' ); > INSERT INTO datalake_partial_update_partition_change_hudiSELECT 1 as id, 1 as > version, 'str_1' as name,CAST('2023-01-01 11:11:11' AS TIMESTAMP) as > birthDate,CAST('2023-10-01' AS DATE) as inc_day; > select * from datalake_partial_update_partition_change_hudi; > set hoodie.index.type=GLOBAL_SIMPLE;set > hoodie.simple.index.update.partition.path=true; > MERGE INTO datalake_partial_update_partition_change_hudi t using (select * > from datalake_person_spark) as s on t.id=s.id when matched THEN UPDATE SET > * ; {code} > We get exception > {code:java} > Failed to upsert for commit time > 20231215212435126org.apache.hudi.exception.HoodieUpsertException: Failed to > upsert for commit time 20231215212435126 at > org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:70) > at > org.apache.hudi.table.action.commit.SparkUpsertCommitActionExecutor.execute(SparkUpsertCommitActionExecutor.java:44) > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.upsert(HoodieSparkCopyOnWriteTable.java:114) > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.upsert(HoodieSparkCopyOnWriteTable.java:103) > at > org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:142) > at > org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:224) > at > org.apache.hudi.HoodieSparkSqlWriterInternal.liftedTree1$1(HoodieSparkSqlWriter.scala:514) > at > org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:512) > at > org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:203) > at > org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:120) > at > org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.executeUpsert(MergeIntoHoodieTableCommand.scala:469) > at > org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.run(MergeIntoHoodieTableCommand.scala:283) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84) > at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) > at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560) > at > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94) > at > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81) > at > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220) at > org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at > org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at > org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:622) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at > org.apache.spark.sql.SparkSession.sql(SparkSession.scala:617) at > org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$13(TestMergeIntoTable.scala:321) > at > org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$13$adapted(TestMergeIntoTable.scala:265) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withTempDir(HoodieSparkSqlTestBase.scala:77) > at > org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$12(TestMergeIntoTable.scala:265) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$3(HoodieSparkSqlTestBase.scala:216) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withSQLConf(HoodieSparkSqlTestBase.scala:188) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$1(HoodieSparkSqlTestBase.scala:215) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$1$adapted(HoodieSparkSqlTestBase.scala:207) > at scala.collection.immutable.List.foreach(List.scala:431) at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withRecordType(HoodieSparkSqlTestBase.scala:207) > at > org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$11(TestMergeIntoTable.scala:265) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$test$1(HoodieSparkSqlTestBase.scala:85) > at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at > org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at > org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at > org.scalatest.Transformer.apply(Transformer.scala:22) at > org.scalatest.Transformer.apply(Transformer.scala:20) at > org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189) > at org.scalatest.TestSuite.withFixture(TestSuite.scala:196) at > org.scalatest.TestSuite.withFixture$(TestSuite.scala:195) at > org.scalatest.funsuite.AnyFunSuite.withFixture(AnyFunSuite.scala:1562) > at > org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187) > at > org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at > org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199) > at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181) > at org.scalatest.funsuite.AnyFunSuite.runTest(AnyFunSuite.scala:1562) at > org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232) > at > org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) > at scala.collection.immutable.List.foreach(List.scala:431) at > org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at > org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) at > org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) at > org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232) > at > org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231) > at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562) at > org.scalatest.Suite.run(Suite.scala:1112) at > org.scalatest.Suite.run$(Suite.scala:1094) at > org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562) > at > org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236) > at org.scalatest.SuperEngine.runImpl(Engine.scala:535) at > org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236) > at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235) > at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.org$scalatest$BeforeAndAfterAll$$super$run(HoodieSparkSqlTestBase.scala:44) > at > org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) > at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at > org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at > org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.run(HoodieSparkSqlTestBase.scala:44) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1314) > at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1308) > at scala.collection.immutable.List.foreach(List.scala:431) at > org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1308) at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993) > at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1474) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971) > at org.scalatest.tools.Runner$.run(Runner.scala:798) at > org.scalatest.tools.Runner.run(Runner.scala) at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)Caused > by: org.apache.spark.SparkException: Job aborted due to stage failure: Task > 1 in stage 37.0 failed 1 times, most recent failure: Lost task 1.0 in stage > 37.0 (TID 168) (jonathans-mbp executor driver): > java.lang.NullPointerException at > org.apache.hudi.avro.HoodieAvroUtils.getActualSchemaFromUnion(HoodieAvroUtils.java:1182) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:874) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:845) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:829) > at > org.apache.hudi.common.model.HoodieAvroRecord.prependMetaFields(HoodieAvroRecord.java:132) > at > org.apache.hudi.index.HoodieIndexUtils.mergeIncomingWithExistingRecord(HoodieIndexUtils.java:259) > at > org.apache.hudi.index.HoodieIndexUtils.lambda$mergeForPartitionUpdatesIfNeeded$8d2a44d8$1(HoodieIndexUtils.java:316) > at > org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:137) > at > org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at > org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352) > at > org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1535) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1445) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1509) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1332) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:327) at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:329) at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:329) at > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) > at org.apache.spark.scheduler.Task.run(Task.scala:136) at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Driver stacktrace: at > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607) > at > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182) > at scala.Option.foreach(Option.scala:407) at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2249) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2268) at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2293) at > org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1021) at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:406) at > org.apache.spark.rdd.RDD.collect(RDD.scala:1020) at > org.apache.spark.rdd.PairRDDFunctions.$anonfun$countByKey$1(PairRDDFunctions.scala:367) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:406) at > org.apache.spark.rdd.PairRDDFunctions.countByKey(PairRDDFunctions.scala:367) > at org.apache.spark.api.java.JavaPairRDD.countByKey(JavaPairRDD.scala:314) > at > org.apache.hudi.data.HoodieJavaPairRDD.countByKey(HoodieJavaPairRDD.java:105) > at > org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.buildProfile(BaseSparkCommitActionExecutor.java:196) > at > org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:170) > at > org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:82) > at > org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:63) > ... 104 moreCaused by: java.lang.NullPointerException at > org.apache.hudi.avro.HoodieAvroUtils.getActualSchemaFromUnion(HoodieAvroUtils.java:1182) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:874) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:845) > at > org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:829) > at > org.apache.hudi.common.model.HoodieAvroRecord.prependMetaFields(HoodieAvroRecord.java:132) > at > org.apache.hudi.index.HoodieIndexUtils.mergeIncomingWithExistingRecord(HoodieIndexUtils.java:259) > at > org.apache.hudi.index.HoodieIndexUtils.lambda$mergeForPartitionUpdatesIfNeeded$8d2a44d8$1(HoodieIndexUtils.java:316) > at > org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:137) > at > org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at > org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352) > at > org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1535) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1445) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1509) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1332) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:327) at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:329) at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) at > org.apache.spark.rdd.RDD.iterator(RDD.scala:329) at > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) > at org.apache.spark.scheduler.Task.run(Task.scala:136) at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)