[ https://issues.apache.org/jira/browse/SPARK-2506?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14063171#comment-14063171 ]
uncleGen commented on SPARK-2506: --------------------------------- Here is a simple PR fix this problem: https://github.com/apache/spark/pull/1429 > In yarn-cluster mode, ApplicationMaster does not clean up correctly at the > end of the job if users call sc.stop manually > ------------------------------------------------------------------------------------------------------------------------ > > Key: SPARK-2506 > URL: https://issues.apache.org/jira/browse/SPARK-2506 > Project: Spark > Issue Type: Bug > Components: Block Manager, Spark Core, YARN > Affects Versions: 1.0.1 > Reporter: uncleGen > Priority: Minor > > when i call sc.stop manually, some strange ERRORs will appear: > 1. in driver log: > INFO [Thread-116] YarnAllocationHandler: Completed container > container_1400565786114_79510_01_000041 (state: COMPLETE, exit status: 0) > WARN [Thread-4] BlockManagerMaster: Error sending message to > BlockManagerMaster in 3 attempts > akka.pattern.AskTimeoutException: > Recipient[Actor[akka://spark/user/BlockManagerMaster#1994513092]] had already > been terminated. > at akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:134) > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:236) > at > org.apache.spark.storage.BlockManagerMaster.tell(BlockManagerMaster.scala:216) > at > org.apache.spark.storage.BlockManagerMaster.stop(BlockManagerMaster.scala:208) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:86) > at org.apache.spark.SparkContext.stop(SparkContext.scala:993) > at TestWeibo$.main(TestWeibo.scala:46) > at TestWeibo.main(TestWeibo.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:192) > INFO [Thread-116] ApplicationMaster: Allocating 1 containers to make up for > (potentially) lost containers > INFO [Thread-116] YarnAllocationHandler: Will Allocate 1 executor containers, > each with 9600 memory > 2: in executor log: > WARN [Connection manager future execution context-13] BlockManagerMaster: > Error sending message to BlockManagerMaster in 1 attempts > java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] > at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) > at > scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) > at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) > at > scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) > at scala.concurrent.Await$.result(package.scala:107) > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:237) > at > org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:51) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:113) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(BlockManager.scala:158) > at org.apache.spark.util.Utils$.tryOrExit(Utils.scala:790) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:158) > at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) > at > akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) > at > java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) > at java.lang.Thread.run(Thread.java:662) > WARN [Connection manager future execution context-13] BlockManagerMaster: > Error sending message to BlockManagerMaster in 2 attempts > java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] > at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) > at > scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) > at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) > at > scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) > at scala.concurrent.Await$.result(package.scala:107) > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:237) > at > org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:51) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:113) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(BlockManager.scala:158) > at org.apache.spark.util.Utils$.tryOrExit(Utils.scala:790) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:158) > at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) > at > akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) > at > java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) > at java.lang.Thread.run(Thread.java:662) > WARN [Connection manager future execution context-13] BlockManagerMaster: > Error sending message to BlockManagerMaster in 3 attempts > java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] > at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) > at > scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) > at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) > at > scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) > at scala.concurrent.Await$.result(package.scala:107) > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:237) > at > org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:51) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:113) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(BlockManager.scala:158) > at org.apache.spark.util.Utils$.tryOrExit(Utils.scala:790) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:158) > at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) > at > akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) > at > java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) > at java.lang.Thread.run(Thread.java:662) > ERROR [Connection manager future execution context-13] > ExecutorUncaughtExceptionHandler: Uncaught exception in thread > Thread[Connection manager future execution context-13,5,main] > org.apache.spark.SparkException: Error sending message to BlockManagerMaster > [message = HeartBeat(BlockManagerId(3, r64a13037.cm10.tbsite.net, 56614, 0))] > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:251) > at > org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:51) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:113) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(BlockManager.scala:158) > at org.apache.spark.util.Utils$.tryOrExit(Utils.scala:790) > at > org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:158) > at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) > at > akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) > at > java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) > at java.lang.Thread.run(Thread.java:662) -- This message was sent by Atlassian JIRA (v6.2#6252)