[ https://issues.apache.org/jira/browse/HUDI-2986?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Raymond Xu updated HUDI-2986: ----------------------------- Fix Version/s: 0.10.1 > Deltastreamer continuous mode run into Too many open files exception > -------------------------------------------------------------------- > > Key: HUDI-2986 > URL: https://issues.apache.org/jira/browse/HUDI-2986 > Project: Apache Hudi > Issue Type: Bug > Components: DeltaStreamer, Writer Core > Reporter: Raymond Xu > Assignee: Raymond Xu > Priority: Blocker > Labels: core-flow-ds, sev:critical > Fix For: 0.11.0, 0.10.1 > > > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 6 in stage 35202.0 failed 4 times, most recent failure: Lost task 6.3 in > stage 35202.0 (TID 1172485, ip-10-211-53-165.infra.usw2.zdsys.com, executor > 1): java.io.FileNotFoundException: > /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e > (Too many open files) > at java.io.FileOutputStream.open0(Native Method) > at java.io.FileOutputStream.open(FileOutputStream.java:270) > at java.io.FileOutputStream.<init>(FileOutputStream.java:213) > at > org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106) > at > org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119) > at > org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2136) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2124) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2123) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2123) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:994) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2384) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2333) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2322) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:805) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2097) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2194) > at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1143) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) > at org.apache.spark.rdd.RDD.fold(RDD.scala:1137) > at > org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:35) > at > org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35) > at > org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) > at > org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34) > at org.apache.spark.api.java.JavaDoubleRDD.sum(JavaDoubleRDD.scala:165) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.writeToSink(DeltaSync.java:447) > at > org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:281) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:587) > ... 4 more > Caused by: java.io.FileNotFoundException: > /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e > (Too many open files) > at java.io.FileOutputStream.open0(Native Method) > at java.io.FileOutputStream.open(FileOutputStream.java:270) > at java.io.FileOutputStream.<init>(FileOutputStream.java:213) > at > org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106) > at > org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119) > at > org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) > ... 3 more -- This message was sent by Atlassian Jira (v8.20.1#820001)