[ 
https://issues.apache.org/jira/browse/SPARK-15905?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15328608#comment-15328608
 ] 

Tejas Patil commented on SPARK-15905:
-------------------------------------

Another instance but this time not via console progress bar. This job has been 
stuck for 15+ hours.

{noformat}
"dispatcher-event-loop-23" #60 daemon prio=5 os_prio=0 tid=0x00007f981e206000 
nid=0x685f8 runnable [0x00007f8c0f1ef000]
   java.lang.Thread.State: RUNNABLE
        at java.io.FileOutputStream.writeBytes(Native Method)
        at java.io.FileOutputStream.write(FileOutputStream.java:326)
        at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
        - locked <0x00007f8d48167058> (a java.io.BufferedOutputStream)
        at java.io.PrintStream.write(PrintStream.java:480)
        - locked <0x00007f8d48167020> (a java.io.PrintStream)
        at sun.nio.cs.StreamEncoder.writeBytes(StreamEncoder.java:221)
        at sun.nio.cs.StreamEncoder.implFlushBuffer(StreamEncoder.java:291)
        at sun.nio.cs.StreamEncoder.implFlush(StreamEncoder.java:295)
        at sun.nio.cs.StreamEncoder.flush(StreamEncoder.java:141)
        - locked <0x00007f8d48237680> (a java.io.OutputStreamWriter)
        at java.io.OutputStreamWriter.flush(OutputStreamWriter.java:229)
        at org.apache.log4j.helpers.QuietWriter.flush(QuietWriter.java:59)
        at org.apache.log4j.WriterAppender.subAppend(WriterAppender.java:324)
        at org.apache.log4j.WriterAppender.append(WriterAppender.java:162)
        at org.apache.log4j.AppenderSkeleton.doAppend(AppenderSkeleton.java:251)
        - locked <0x00007f8d48235ee0> (a org.apache.log4j.ConsoleAppender)
        at 
org.apache.log4j.helpers.AppenderAttachableImpl.appendLoopOnAppenders(AppenderAttachableImpl.java:66)
        at org.apache.log4j.Category.callAppenders(Category.java:206)
        - locked <0x00007f8d481bf1e8> (a org.apache.log4j.spi.RootLogger)
        at org.apache.log4j.Category.forcedLog(Category.java:391)
        at org.apache.log4j.Category.log(Category.java:856)
        at org.slf4j.impl.Log4jLoggerAdapter.warn(Log4jLoggerAdapter.java:400)
        at org.apache.spark.Logging$class.logWarning(Logging.scala:70)
        at 
org.apache.spark.scheduler.TaskSetManager.logWarning(TaskSetManager.scala:52)
        at 
org.apache.spark.scheduler.TaskSetManager.handleFailedTask(TaskSetManager.scala:721)
        at 
org.apache.spark.scheduler.TaskSetManager$$anonfun$executorLost$6.apply(TaskSetManager.scala:813)
        at 
org.apache.spark.scheduler.TaskSetManager$$anonfun$executorLost$6.apply(TaskSetManager.scala:807)
        at 
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
        at 
scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
        at 
scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
        at 
scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
        at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
        at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
        at 
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
        at 
org.apache.spark.scheduler.TaskSetManager.executorLost(TaskSetManager.scala:807)
        at 
org.apache.spark.scheduler.Pool$$anonfun$executorLost$1.apply(Pool.scala:87)
        at 
org.apache.spark.scheduler.Pool$$anonfun$executorLost$1.apply(Pool.scala:87)
        at scala.collection.Iterator$class.foreach(Iterator.scala:727)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
        at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
        at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
        at org.apache.spark.scheduler.Pool.executorLost(Pool.scala:87)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl.removeExecutor(TaskSchedulerImpl.scala:536)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl.executorLost(TaskSchedulerImpl.scala:474)
        - locked <0x00007f8d5850e1e0> (a 
org.apache.spark.scheduler.TaskSchedulerImpl)
        at 
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.removeExecutor(CoarseGrainedSchedulerBackend.scala:263)
        at 
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint$$anonfun$onDisconnected$1.apply(CoarseGrainedSchedulerBackend.scala:202)
        at 
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint$$anonfun$onDisconnected$1.apply(CoarseGrainedSchedulerBackend.scala:202)
        at scala.Option.foreach(Option.scala:236)
        at 
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.onDisconnected(CoarseGrainedSchedulerBackend.scala:202)
        at 
org.apache.spark.rpc.netty.Inbox$$anonfun$process$1.apply$mcV$sp(Inbox.scala:142)
        at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:204)
        at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100)
        at 
org.apache.spark.rpc.netty.Dispatcher$MessageLoop.run(Dispatcher.scala:215)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
{noformat}

> Driver hung while writing to console progress bar
> -------------------------------------------------
>
>                 Key: SPARK-15905
>                 URL: https://issues.apache.org/jira/browse/SPARK-15905
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 1.6.1
>            Reporter: Tejas Patil
>            Priority: Minor
>
> This leads to driver being not able to get heartbeats from its executors and 
> job being stuck. After looking at the locking dependency amongst the driver 
> threads per the jstack, this is where the driver seems to be stuck.
> {noformat}
> "refresh progress" #113 daemon prio=5 os_prio=0 tid=0x00007f7986cbc800 
> nid=0x7887d runnable [0x00007f6d3507a000]
>    java.lang.Thread.State: RUNNABLE
>         at java.io.FileOutputStream.writeBytes(Native Method)
>         at java.io.FileOutputStream.write(FileOutputStream.java:326)
>         at 
> java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)
>         at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140)
>         - locked <0x00007f6eb81dd290> (a java.io.BufferedOutputStream)
>         at java.io.PrintStream.write(PrintStream.java:482)
>        - locked <0x00007f6eb81dd258> (a java.io.PrintStream)
>         at sun.nio.cs.StreamEncoder.writeBytes(StreamEncoder.java:221)
>         at sun.nio.cs.StreamEncoder.implFlushBuffer(StreamEncoder.java:291)
>         at sun.nio.cs.StreamEncoder.flushBuffer(StreamEncoder.java:104)
>         - locked <0x00007f6eb81dd400> (a java.io.OutputStreamWriter)
>         at java.io.OutputStreamWriter.flushBuffer(OutputStreamWriter.java:185)
>         at java.io.PrintStream.write(PrintStream.java:527)
>         - locked <0x00007f6eb81dd258> (a java.io.PrintStream)
>         at java.io.PrintStream.print(PrintStream.java:669)
>         at 
> org.apache.spark.ui.ConsoleProgressBar.show(ConsoleProgressBar.scala:99)
>         at 
> org.apache.spark.ui.ConsoleProgressBar.org$apache$spark$ui$ConsoleProgressBar$$refresh(ConsoleProgressBar.scala:69)
>         - locked <0x00007f6ed33b48a0> (a 
> org.apache.spark.ui.ConsoleProgressBar)
>         at 
> org.apache.spark.ui.ConsoleProgressBar$$anon$1.run(ConsoleProgressBar.scala:53)
>         at java.util.TimerThread.mainLoop(Timer.java:555)
>         at java.util.TimerThread.run(Timer.java:505)
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to