Repository: spark
Updated Branches:
  refs/heads/master 85c5424d4 -> 2dc048081


[SPARK-17532] Add lock debugging info to thread dumps.

## What changes were proposed in this pull request?

This adds information to the web UI thread dump page about the JVM locks
held by threads and the locks that threads are blocked waiting to
acquire. This should help find cases where lock contention is causing
Spark applications to run slowly.
## How was this patch tested?

Tested by applying this patch and viewing the change in the web UI.

![thread-lock-info](https://cloud.githubusercontent.com/assets/87915/18493057/6e5da870-79c3-11e6-8c20-f54c18a37544.png)

Additions:
- A "Thread Locking" column with the locks held by the thread or that are 
blocking the thread
- Links from the a blocked thread to the thread holding the lock
- Stack frames show where threads are inside `synchronized` blocks, "holding 
Monitor(...)"

Author: Ryan Blue <b...@apache.org>

Closes #15088 from rdblue/SPARK-17532-add-thread-lock-info.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2dc04808
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2dc04808
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2dc04808

Branch: refs/heads/master
Commit: 2dc048081668665f85623839d5f663b402e42555
Parents: 85c5424
Author: Ryan Blue <b...@apache.org>
Authored: Wed Nov 2 00:08:30 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Wed Nov 2 00:08:30 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/ui/static/table.js         |  3 +-
 .../spark/ui/exec/ExecutorThreadDumpPage.scala  | 12 +++++++
 .../apache/spark/util/ThreadStackTrace.scala    |  6 +++-
 .../scala/org/apache/spark/util/Utils.scala     | 34 +++++++++++++++++---
 4 files changed, 49 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2dc04808/core/src/main/resources/org/apache/spark/ui/static/table.js
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js 
b/core/src/main/resources/org/apache/spark/ui/static/table.js
index 14b06bf..0315ebf 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/table.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -36,7 +36,7 @@ function toggleThreadStackTrace(threadId, forceAdd) {
     if (stackTrace.length == 0) {
         var stackTraceText = $('#' + threadId + "_td_stacktrace").html()
         var threadCell = $("#thread_" + threadId + "_tr")
-        threadCell.after("<tr id=\"" + threadId +"_stacktrace\" 
class=\"accordion-body\"><td colspan=\"3\"><pre>" +
+        threadCell.after("<tr id=\"" + threadId +"_stacktrace\" 
class=\"accordion-body\"><td colspan=\"4\"><pre>" +
             stackTraceText +  "</pre></td></tr>")
     } else {
         if (!forceAdd) {
@@ -73,6 +73,7 @@ function onMouseOverAndOut(threadId) {
     $("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover");
     $("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover");
     $("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover");
+    $("#" + threadId + "_td_locking").toggleClass("threaddump-td-mouseover");
 }
 
 function onSearchStringChange() {

http://git-wip-us.apache.org/repos/asf/spark/blob/2dc04808/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala 
b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index a0ef80d..c6a0744 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -48,6 +48,16 @@ private[ui] class ExecutorThreadDumpPage(parent: 
ExecutorsTab) extends WebUIPage
           }
       }.map { thread =>
         val threadId = thread.threadId
+        val blockedBy = thread.blockedByThreadId match {
+          case Some(blockedByThreadId) =>
+            <div>
+              Blocked by <a href={s"#${thread.blockedByThreadId}_td_id"}>
+              Thread {thread.blockedByThreadId} {thread.blockedByLock}</a>
+            </div>
+          case None => Text("")
+        }
+        val heldLocks = thread.holdingLocks.mkString(", ")
+
         <tr id={s"thread_${threadId}_tr"} class="accordion-heading"
             onclick={s"toggleThreadStackTrace($threadId, false)"}
             onmouseover={s"onMouseOverAndOut($threadId)"}
@@ -55,6 +65,7 @@ private[ui] class ExecutorThreadDumpPage(parent: 
ExecutorsTab) extends WebUIPage
           <td id={s"${threadId}_td_id"}>{threadId}</td>
           <td id={s"${threadId}_td_name"}>{thread.threadName}</td>
           <td id={s"${threadId}_td_state"}>{thread.threadState}</td>
+          <td id={s"${threadId}_td_locking"}>{blockedBy}{heldLocks}</td>
           <td id={s"${threadId}_td_stacktrace"} 
class="hidden">{thread.stackTrace}</td>
         </tr>
       }
@@ -86,6 +97,7 @@ private[ui] class ExecutorThreadDumpPage(parent: 
ExecutorsTab) extends WebUIPage
           <th onClick="collapseAllThreadStackTrace(false)">Thread ID</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread Name</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread State</th>
+          <th onClick="collapseAllThreadStackTrace(false)">Thread Locks</th>
         </thead>
         <tbody>{dumpRows}</tbody>
       </table>

http://git-wip-us.apache.org/repos/asf/spark/blob/2dc04808/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala 
b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
index d4e0ad9..b121798 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -24,4 +24,8 @@ private[spark] case class ThreadStackTrace(
   threadId: Long,
   threadName: String,
   threadState: Thread.State,
-  stackTrace: String)
+  stackTrace: String,
+  blockedByThreadId: Option[Long],
+  blockedByLock: String,
+  holdingLocks: Seq[String])
+

http://git-wip-us.apache.org/repos/asf/spark/blob/2dc04808/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6027b07..22c28fb 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util
 
 import java.io._
-import java.lang.management.ManagementFactory
+import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo}
 import java.net._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
@@ -2096,15 +2096,41 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  private implicit class Lock(lock: LockInfo) {
+    def lockString: String = {
+      lock match {
+        case monitor: MonitorInfo =>
+          s"Monitor(${lock.getClassName}@${lock.getIdentityHashCode}})"
+        case _ =>
+          s"Lock(${lock.getClassName}@${lock.getIdentityHashCode}})"
+      }
+    }
+  }
+
   /** Return a thread dump of all threads' stacktraces.  Used to capture dumps 
for the web UI */
   def getThreadDump(): Array[ThreadStackTrace] = {
     // We need to filter out null values here because dumpAllThreads() may 
return null array
     // elements for threads that are dead / don't exist.
     val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, 
true).filter(_ != null)
     threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
-      val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n")
-      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName,
-        threadInfo.getThreadState, stackTrace)
+      val monitors = threadInfo.getLockedMonitors.map(m => 
m.getLockedStackFrame -> m).toMap
+      val stackTrace = threadInfo.getStackTrace.map { frame =>
+        monitors.get(frame) match {
+          case Some(monitor) =>
+            monitor.getLockedStackFrame.toString + s" => holding 
${monitor.lockString}"
+          case None =>
+            frame.toString
+        }
+      }.mkString("\n")
+
+      // use a set to dedup re-entrant locks that are held at multiple places
+      val heldLocks = (threadInfo.getLockedSynchronizers.map(_.lockString)
+          ++ threadInfo.getLockedMonitors.map(_.lockString)
+        ).toSet
+
+      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, 
threadInfo.getThreadState,
+        stackTrace, if (threadInfo.getLockOwnerId < 0) None else 
Some(threadInfo.getLockOwnerId),
+        Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""), 
heldLocks.toSeq)
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to