[ 
https://issues.apache.org/jira/browse/HUDI-7236?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Vexler updated HUDI-7236:
----------------------------------
    Status: In Progress  (was: Open)

> MIT fails when attempting to change partition
> ---------------------------------------------
>
>                 Key: HUDI-7236
>                 URL: https://issues.apache.org/jira/browse/HUDI-7236
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark-sql
>            Reporter: Jonathan Vexler
>            Assignee: Jonathan Vexler
>            Priority: Blocker
>              Labels: pull-request-available
>             Fix For: 0.14.1
>
>
> With the following 
> {code:java}
> DROP TABLE IF EXISTS datalake_person_spark;DROP TABLE IF EXISTS 
> datalake_partial_update_partition_change_hudi;
> CREATE TABLE datalake_person_sparkUSING PARQUETPARTITIONED BY 
> (inc_day)asSELECT1 as id, 2 as version, 'str_2' as name, CAST('2023-01-01 
> 12:12:12' as TIMESTAMP) as birthDate, '2023-10-02' as inc_day;
> CREATE TABLE datalake_partial_update_partition_change_hudi (    id INT,    
> version INT,    name STRING,    birthDate TIMESTAMP,    inc_day STRING)    
> USING hudi    PARTITIONED BY (inc_day)    TBLPROPERTIES (        'primaryKey' 
> = 'id',        'type' = 'cow'    );
> INSERT INTO datalake_partial_update_partition_change_hudiSELECT 1 as id, 1 as 
> version, 'str_1' as name,CAST('2023-01-01 11:11:11' AS TIMESTAMP) as 
> birthDate,CAST('2023-10-01' AS DATE) as inc_day;
> select * from datalake_partial_update_partition_change_hudi;
> set hoodie.index.type=GLOBAL_SIMPLE;set 
> hoodie.simple.index.update.partition.path=true;
> MERGE INTO datalake_partial_update_partition_change_hudi t using (select * 
> from  datalake_person_spark) as s  on t.id=s.id when matched THEN UPDATE SET 
> * ; {code}
> We get exception 
> {code:java}
> Failed to upsert for commit time 
> 20231215212435126org.apache.hudi.exception.HoodieUpsertException: Failed to 
> upsert for commit time 20231215212435126 at 
> org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:70)
>    at 
> org.apache.hudi.table.action.commit.SparkUpsertCommitActionExecutor.execute(SparkUpsertCommitActionExecutor.java:44)
>  at 
> org.apache.hudi.table.HoodieSparkCopyOnWriteTable.upsert(HoodieSparkCopyOnWriteTable.java:114)
>        at 
> org.apache.hudi.table.HoodieSparkCopyOnWriteTable.upsert(HoodieSparkCopyOnWriteTable.java:103)
>        at 
> org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:142)
>       at 
> org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:224)   
> at 
> org.apache.hudi.HoodieSparkSqlWriterInternal.liftedTree1$1(HoodieSparkSqlWriter.scala:514)
>    at 
> org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:512)
>    at 
> org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:203)
>    at 
> org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:120)  
> at 
> org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.executeUpsert(MergeIntoHoodieTableCommand.scala:469)
>    at 
> org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.run(MergeIntoHoodieTableCommand.scala:283)
>      at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
>     at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
>        at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
>  at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
>    at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
>    at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
>     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) 
> at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>        at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
>  at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
>  at 
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
>  at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
>     at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
>     at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
>      at 
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
>         at 
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
>        at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>        at 
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
>        at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
>        at 
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
>         at 
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
>     at 
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
>        at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)       at 
> org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)   at 
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at 
> org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)       at 
> org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:622)     
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at 
> org.apache.spark.sql.SparkSession.sql(SparkSession.scala:617)        at 
> org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$13(TestMergeIntoTable.scala:321)
>    at 
> org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$13$adapted(TestMergeIntoTable.scala:265)
>    at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withTempDir(HoodieSparkSqlTestBase.scala:77)
>         at 
> org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$12(TestMergeIntoTable.scala:265)
>    at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$3(HoodieSparkSqlTestBase.scala:216)
>  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)    
>    at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withSQLConf(HoodieSparkSqlTestBase.scala:188)
>        at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$1(HoodieSparkSqlTestBase.scala:215)
>  at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$withRecordType$1$adapted(HoodieSparkSqlTestBase.scala:207)
>  at scala.collection.immutable.List.foreach(List.scala:431)      at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.withRecordType(HoodieSparkSqlTestBase.scala:207)
>     at 
> org.apache.spark.sql.hudi.TestMergeIntoTable.$anonfun$new$11(TestMergeIntoTable.scala:265)
>    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)  
>      at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.$anonfun$test$1(HoodieSparkSqlTestBase.scala:85)
>     at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)        at 
> org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)       at 
> org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)      at 
> org.scalatest.Transformer.apply(Transformer.scala:22)        at 
> org.scalatest.Transformer.apply(Transformer.scala:20)        at 
> org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189)
>       at org.scalatest.TestSuite.withFixture(TestSuite.scala:196)     at 
> org.scalatest.TestSuite.withFixture$(TestSuite.scala:195)    at 
> org.scalatest.funsuite.AnyFunSuite.withFixture(AnyFunSuite.scala:1562)       
> at 
> org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187)
>         at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199)
>  at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)      at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199)    
> at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181) 
>   at org.scalatest.funsuite.AnyFunSuite.runTest(AnyFunSuite.scala:1562)   at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232)
>         at 
> org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)      
> at scala.collection.immutable.List.foreach(List.scala:431)      at 
> org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)       at 
> org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) at 
> org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)     at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232)   
> at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231)  
> at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562)  at 
> org.scalatest.Suite.run(Suite.scala:1112)    at 
> org.scalatest.Suite.run$(Suite.scala:1094)   at 
> org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562)
>  at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236)
>      at org.scalatest.SuperEngine.runImpl(Engine.scala:535)  at 
> org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236)        
> at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235)     
>   at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.org$scalatest$BeforeAndAfterAll$$super$run(HoodieSparkSqlTestBase.scala:44)
>  at 
> org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)   
> at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)     at 
> org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)    at 
> org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.run(HoodieSparkSqlTestBase.scala:44)
>         at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)    at 
> org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1314)
>      at 
> org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1308)
>      at scala.collection.immutable.List.foreach(List.scala:431)      at 
> org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1308) at 
> org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993)
>   at 
> org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971)
>   at 
> org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1474)
>     at 
> org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971)
>       at org.scalatest.tools.Runner$.run(Runner.scala:798)    at 
> org.scalatest.tools.Runner.run(Runner.scala) at 
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43)
>        at 
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)Caused
>  by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 
> 1 in stage 37.0 failed 1 times, most recent failure: Lost task 1.0 in stage 
> 37.0 (TID 168) (jonathans-mbp executor driver): 
> java.lang.NullPointerException      at 
> org.apache.hudi.avro.HoodieAvroUtils.getActualSchemaFromUnion(HoodieAvroUtils.java:1182)
>      at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:874)
>     at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:845)
>     at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:829)
>     at 
> org.apache.hudi.common.model.HoodieAvroRecord.prependMetaFields(HoodieAvroRecord.java:132)
>    at 
> org.apache.hudi.index.HoodieIndexUtils.mergeIncomingWithExistingRecord(HoodieIndexUtils.java:259)
>     at 
> org.apache.hudi.index.HoodieIndexUtils.lambda$mergeForPartitionUpdatesIfNeeded$8d2a44d8$1(HoodieIndexUtils.java:316)
>  at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:137)
>  at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)       
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)       at 
> org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
>        at 
> org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
>         at 
> org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1535)
>       at 
> org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1445)
>   at 
> org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1509) 
> at 
> org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1332)
>        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376) at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:327)     at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)      at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:329)     at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)      at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:329)     at 
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
>  at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)   
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) 
>   at org.apache.spark.scheduler.Task.run(Task.scala:136)  at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
>      at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)    
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)      
>   at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:750)
> Driver stacktrace:    at 
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
>  at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
>        at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
>        at 
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)     
> at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)  
>   at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)   at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)  
> at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
>       at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
>       at scala.Option.foreach(Option.scala:407)       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
>  at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
>  at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
>    at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
>    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)      at 
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)       
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)        at 
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)        at 
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)        at 
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)        at 
> org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1021)  at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>        at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>        at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)    at 
> org.apache.spark.rdd.RDD.collect(RDD.scala:1020)     at 
> org.apache.spark.rdd.PairRDDFunctions.$anonfun$countByKey$1(PairRDDFunctions.scala:367)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>        at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>        at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)    at 
> org.apache.spark.rdd.PairRDDFunctions.countByKey(PairRDDFunctions.scala:367) 
> at org.apache.spark.api.java.JavaPairRDD.countByKey(JavaPairRDD.scala:314)    
>   at 
> org.apache.hudi.data.HoodieJavaPairRDD.countByKey(HoodieJavaPairRDD.java:105) 
>        at 
> org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.buildProfile(BaseSparkCommitActionExecutor.java:196)
>        at 
> org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:170)
>     at 
> org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:82)
>      at 
> org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:63)
>    ... 104 moreCaused by: java.lang.NullPointerException   at 
> org.apache.hudi.avro.HoodieAvroUtils.getActualSchemaFromUnion(HoodieAvroUtils.java:1182)
>      at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:874)
>     at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:845)
>     at 
> org.apache.hudi.avro.HoodieAvroUtils.rewriteRecordWithNewSchema(HoodieAvroUtils.java:829)
>     at 
> org.apache.hudi.common.model.HoodieAvroRecord.prependMetaFields(HoodieAvroRecord.java:132)
>    at 
> org.apache.hudi.index.HoodieIndexUtils.mergeIncomingWithExistingRecord(HoodieIndexUtils.java:259)
>     at 
> org.apache.hudi.index.HoodieIndexUtils.lambda$mergeForPartitionUpdatesIfNeeded$8d2a44d8$1(HoodieIndexUtils.java:316)
>  at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:137)
>  at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>       at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)       
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)       at 
> org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
>        at 
> org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
>         at 
> org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1535)
>       at 
> org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1445)
>   at 
> org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1509) 
> at 
> org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1332)
>        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376) at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:327)     at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)      at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:329)     at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)      at 
> org.apache.spark.rdd.RDD.iterator(RDD.scala:329)     at 
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
>  at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)   
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) 
>   at org.apache.spark.scheduler.Task.run(Task.scala:136)  at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
>      at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)    
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)      
>   at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:750) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to