[ 
https://issues.apache.org/jira/browse/SPARK-4759?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14235651#comment-14235651
 ] 

Davis Shepherd commented on SPARK-4759:
---------------------------------------

{{"sparkDriver-akka.actor.default-dispatcher-18" daemon prio=5 tid=7fc3853ef800 
nid=0x119e87000 waiting on condition [119e86000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1594)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"sparkDriver-akka.actor.default-dispatcher-17" daemon prio=5 tid=7fc38c007000 
nid=0x119d84000 waiting on condition [119d83000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1594)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"sparkDriver-akka.actor.default-dispatcher-14" daemon prio=5 tid=7fc38b003800 
nid=0x119b96000 waiting on condition [119b95000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at 
scala.concurrent.forkjoin.ForkJoinPool.idleAwaitWork(ForkJoinPool.java:1626)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1579)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"sparkDriver-akka.actor.default-dispatcher-15" daemon prio=5 tid=7fc38b003000 
nid=0x119a93000 waiting on condition [119a92000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1594)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"sparkDriver-akka.actor.default-dispatcher-16" daemon prio=5 tid=7fc38c006000 
nid=0x11971d000 waiting on condition [11971c000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1594)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"sparkDriver-akka.actor.default-dispatcher-13" daemon prio=5 tid=7fc38681f000 
nid=0x1193d1000 waiting on condition [1193d0000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c33229d0> (a 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:1594)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1478)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)

"task-result-getter-3" daemon prio=5 tid=7fc3871a7800 nid=0x1192ce000 waiting 
on condition [1192cd000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbeaace0> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:156)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:957)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"task-result-getter-2" daemon prio=5 tid=7fc3871a6800 nid=0x1191cb000 waiting 
on condition [1191ca000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbeaace0> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:156)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:957)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"task-result-getter-1" daemon prio=5 tid=7fc3871a6000 nid=0x1190c8000 waiting 
on condition [1190c7000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbeaace0> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:156)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:957)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"task-result-getter-0" daemon prio=5 tid=7fc38d164800 nid=0x118fc5000 waiting 
on condition [118fc4000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbeaace0> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:156)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:957)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-8" daemon prio=5 tid=7fc385409800 nid=0x118ec2000 
waiting on condition [118ec1000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-7" daemon prio=5 tid=7fc386b4b800 nid=0x118dbf000 
waiting on condition [118dbe000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-6" daemon prio=5 tid=7fc388013800 nid=0x118cbc000 
waiting on condition [118cbb000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-5" daemon prio=5 tid=7fc3869e3800 nid=0x118bb9000 
waiting on condition [118bb8000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-4" daemon prio=5 tid=7fc38689b800 nid=0x118ab6000 
waiting on condition [118ab5000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-3" daemon prio=5 tid=7fc38c005000 nid=0x1189b3000 
waiting on condition [1189b2000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-2" daemon prio=5 tid=7fc38b001800 nid=0x1188b0000 
waiting on condition [1188af000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-1" daemon prio=5 tid=7fc3871a5000 nid=0x1187ad000 
waiting on condition [1187ac000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Executor task launch worker-0" daemon prio=5 tid=7fc38d164000 nid=0x1146c9000 
waiting on condition [1146c8000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7bbed5360> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:196)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:424)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:323)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:874)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:955)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:917)
        at java.lang.Thread.run(Thread.java:695)

"Driver Heartbeater" daemon prio=5 tid=7fc385572800 nid=0x114223000 waiting on 
condition [114222000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
        at java.lang.Thread.sleep(Native Method)
        at org.apache.spark.executor.Executor$$anon$1.run(Executor.scala:383)

"Spark Context Cleaner" daemon prio=5 tid=7fc38d163000 nid=0x114120000 in 
Object.wait() [11411f000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        - waiting on <7bbea0748> (a java.lang.ref.ReferenceQueue$Lock)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:118)
        - locked <7bbea0748> (a java.lang.ref.ReferenceQueue$Lock)
        at 
org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:136)
        at 
org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
        at 
org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply(ContextCleaner.scala:134)
        at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1364)
        at 
org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:133)
        at org.apache.spark.ContextCleaner$$anon$3.run(ContextCleaner.scala:65)

"Timer-1" daemon prio=5 tid=7fc38689a800 nid=0x11401d000 in Object.wait() 
[11401c000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        - waiting on <7bbec0758> (a java.util.TaskQueue)
        at java.lang.Object.wait(Object.java:485)
        at java.util.TimerThread.mainLoop(Timer.java:483)
        - locked <7bbec0758> (a java.util.TaskQueue)
        at java.util.TimerThread.run(Timer.java:462)

"SparkListenerBus" daemon prio=5 tid=7fc387085800 nid=0x113f1a000 waiting on 
condition [113f19000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <7c3360168> (a 
java.util.concurrent.Semaphore$NonfairSync)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:156)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:811)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:969)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1281)
        at java.util.concurrent.Semaphore.acquire(Semaphore.java:286)
        at 
org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:48)
        at 
org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
        at 
org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply(LiveListenerBus.scala:47)
        at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1364)
        at 
org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:46)}}

> Deadlock in complex spark job.
> ------------------------------
>
>                 Key: SPARK-4759
>                 URL: https://issues.apache.org/jira/browse/SPARK-4759
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 1.1.1
>         Environment: Java version "1.7.0_51"
> Java(TM) SE Runtime Environment (build 1.7.0_51-b13)
> Java HotSpot(TM) 64-Bit Server VM (build 24.51-b03, mixed mode)
> Mac OSX 10.10.1
> Using local spark context
>            Reporter: Davis Shepherd
>         Attachments: SparkBugReplicator.scala
>
>
> The attached test class runs two identical jobs that perform some iterative 
> computation on an RDD[(Int, Int)]. This computation involves 
>   *taking new data merging it with the previous result
>   *caching and checkpointing the new result
>   *rinse and repeat
> The first time the job is run, it runs successfully, and the spark context is 
> shut down. The second time the job is run with a new spark context in the 
> same process, the job hangs indefinitely, only having scheduled a subset of 
> the necessary tasks for the final stage.
> Ive been able to produce a test case that reproduces the issue, and I've 
> added some comments where some knockout experimentation has left some 
> breadcrumbs as to where the issue might be.  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to