[ 
https://issues.apache.org/jira/browse/HUDI-2986?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

sivabalan narayanan updated HUDI-2986:
--------------------------------------
    Labels: core-flow-ds sev:critical  (was: sev:critical)

> Deltastreamer continuous mode run into Too many open files exception
> --------------------------------------------------------------------
>
>                 Key: HUDI-2986
>                 URL: https://issues.apache.org/jira/browse/HUDI-2986
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: DeltaStreamer, Writer Core
>            Reporter: Raymond Xu
>            Priority: Blocker
>              Labels: core-flow-ds, sev:critical
>
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task 6 in stage 35202.0 failed 4 times, most recent failure: Lost task 6.3 in 
> stage 35202.0 (TID 1172485, ip-10-211-53-165.infra.usw2.zdsys.com, executor 
> 1): java.io.FileNotFoundException: 
> /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e
>  (Too many open files)
>       at java.io.FileOutputStream.open0(Native Method)
>       at java.io.FileOutputStream.open(FileOutputStream.java:270)
>       at java.io.FileOutputStream.<init>(FileOutputStream.java:213)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251)
>       at 
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
>       at org.apache.spark.scheduler.Task.run(Task.scala:123)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> Driver stacktrace:
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2136)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2124)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2123)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>       at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2123)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994)
>       at scala.Option.foreach(Option.scala:257)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:994)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2384)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2333)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2322)
>       at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>       at 
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:805)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2097)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2194)
>       at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1143)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>       at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
>       at org.apache.spark.rdd.RDD.fold(RDD.scala:1137)
>       at 
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:35)
>       at 
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35)
>       at 
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>       at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
>       at 
> org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34)
>       at org.apache.spark.api.java.JavaDoubleRDD.sum(JavaDoubleRDD.scala:165)
>       at 
> org.apache.hudi.utilities.deltastreamer.DeltaSync.writeToSink(DeltaSync.java:447)
>       at 
> org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:281)
>       at 
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:587)
>       ... 4 more
> Caused by: java.io.FileNotFoundException: 
> /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e
>  (Too many open files)
>       at java.io.FileOutputStream.open0(Native Method)
>       at java.io.FileOutputStream.open(FileOutputStream.java:270)
>       at java.io.FileOutputStream.<init>(FileOutputStream.java:213)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119)
>       at 
> org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251)
>       at 
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
>       at org.apache.spark.scheduler.Task.run(Task.scala:123)
>       at 
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
>       ... 3 more



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to