[ https://issues.apache.org/jira/browse/SPARK-2491?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Guoqiang Li updated SPARK-2491: ------------------------------- Component/s: YARN > When an OOM is thrown,the executor does not stop properly. > ---------------------------------------------------------- > > Key: SPARK-2491 > URL: https://issues.apache.org/jira/browse/SPARK-2491 > Project: Spark > Issue Type: Bug > Components: YARN > Reporter: Guoqiang Li > > The executor log: > {code} > # > # java.lang.OutOfMemoryError: Java heap space > # -XX:OnOutOfMemoryError="kill %p" > # Executing /bin/sh -c "kill 44942"... > 14/07/15 10:38:29 ERROR CoarseGrainedExecutorBackend: RECEIVED SIGNAL 15: > SIGTERM > 14/07/15 10:38:29 ERROR ExecutorUncaughtExceptionHandler: Uncaught exception > in thread Thread[Connection manager future execution context-6,5,main] > java.lang.OutOfMemoryError: Java heap space > at java.nio.HeapByteBuffer.<init>(HeapByteBuffer.java:57) > at java.nio.ByteBuffer.allocate(ByteBuffer.java:331) > at org.apache.spark.storage.BlockMessage.set(BlockMessage.scala:94) > at > org.apache.spark.storage.BlockMessage$.fromByteBuffer(BlockMessage.scala:176) > at > org.apache.spark.storage.BlockMessageArray.set(BlockMessageArray.scala:63) > at > org.apache.spark.storage.BlockMessageArray$.fromBufferMessage(BlockMessageArray.scala:109) > at > org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator$$anonfun$sendRequest$1.applyOrElse(BlockFetcherIterator.scala:125) > at > org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator$$anonfun$sendRequest$1.applyOrElse(BlockFetcherIterator.scala:122) > at > scala.concurrent.Future$$anonfun$onSuccess$1.apply(Future.scala:117) > at > scala.concurrent.Future$$anonfun$onSuccess$1.apply(Future.scala:115) > at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > 14/07/15 10:38:29 WARN HadoopRDD: Exception in RecordReader.close() > java.io.IOException: Filesystem closed > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:703) > at > org.apache.hadoop.hdfs.DFSInputStream.close(DFSInputStream.java:619) > at java.io.FilterInputStream.close(FilterInputStream.java:181) > at org.apache.hadoop.util.LineReader.close(LineReader.java:150) > at > org.apache.hadoop.mapred.LineRecordReader.close(LineRecordReader.java:243) > at org.apache.spark.rdd.HadoopRDD$$anon$1.close(HadoopRDD.scala:226) > at > org.apache.spark.util.NextIterator.closeIfNeeded(NextIterator.scala:63) > at > org.apache.spark.rdd.HadoopRDD$$anon$1$$anonfun$1.apply$mcV$sp(HadoopRDD.scala:197) > at > org.apache.spark.TaskContext$$anonfun$executeOnCompleteCallbacks$1.apply(TaskContext.scala:63) > at > org.apache.spark.TaskContext$$anonfun$executeOnCompleteCallbacks$1.apply(TaskContext.scala:63) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.TaskContext.executeOnCompleteCallbacks(TaskContext.scala:63) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:156) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:97) > at org.apache.spark.scheduler.Task.run(Task.scala:51) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > ----------------- > 14/07/15 10:38:30 INFO Executor: Running task ID 969 > 14/07/15 10:38:30 INFO BlockManager: Found block broadcast_0 locally > 14/07/15 10:38:30 INFO HadoopRDD: Input split: > hdfs://10dian72.domain.test:8020/input/lbs/recommend/toona/rating/20140712/part-00007:0+68016537 > 14/07/15 10:38:30 ERROR Executor: Exception in task ID 969 > java.io.FileNotFoundException: > /yarn/nm/usercache/spark/appcache/application_1404728465401_0070/spark-local-20140715103235-ffda/2e/merged_shuffle_4_85_0 > (No such file or directory) > at java.io.FileOutputStream.open(Native Method) > at java.io.FileOutputStream.<init>(FileOutputStream.java:221) > at > org.apache.spark.storage.DiskBlockObjectWriter.open(BlockObjectWriter.scala:116) > at > org.apache.spark.storage.DiskBlockObjectWriter.write(BlockObjectWriter.scala:177) > at > org.apache.spark.shuffle.hash.HashShuffleWriter$$anonfun$write$1.apply(HashShuffleWriter.scala:59) > at > org.apache.spark.shuffle.hash.HashShuffleWriter$$anonfun$write$1.apply(HashShuffleWriter.scala:57) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at > org.apache.spark.shuffle.hash.HashShuffleWriter.write(HashShuffleWriter.scala:57) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:147) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:97) > at org.apache.spark.scheduler.Task.run(Task.scala:51) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > 14/07/15 10:38:30 INFO Executor: java.io.FileNotFoundException > (java.io.FileNotFoundException: > /yarn/nm/usercache/spark/appcache/application_1404728465401_0070/spark-local-20140715103235-ffda/2e/merged_shuffle_4_85_0 > (No such file or directory)} > {code} -- This message was sent by Atlassian JIRA (v6.2#6252)