spark git commit: [SPARK-17802] Improved caller context logging.

srowen Wed, 26 Oct 2016 05:32:27 -0700

Repository: spark
Updated Branches:
  refs/heads/master 5d0f81da4 -> 402205ddf



[SPARK-17802] Improved caller context logging.

## What changes were proposed in this pull request?

[SPARK-16757](https://issues.apache.org/jira/browse/SPARK-16757) sets the 
hadoop `CallerContext` when calling hadoop/hdfs apis to make spark applications 
more diagnosable in hadoop/hdfs logs. However, the 
`org.apache.hadoop.ipc.CallerContext` class is only added since [hadoop 
2.8](https://issues.apache.org/jira/browse/HDFS-9184), which is not officially 
releaed yet. So each time `utils.CallerContext.setCurrentContext()` is called 
(e.g [when a task is 
created](https://github.com/apache/spark/blob/b678e46/core/src/main/scala/org/apache/spark/scheduler/Task.scala#L95-L96)),
 a "java.lang.ClassNotFoundException: org.apache.hadoop.ipc.CallerContext"
error is logged, which pollutes the spark logs when there are lots of tasks.

This patch improves this behaviour by only logging the `ClassNotFoundException` 
once.

## How was this patch tested?

Existing tests.

Author: Shuai Lin <linshuai2...@gmail.com>

Closes #15377 from lins05/spark-17802-improve-callercontext-logging.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/402205dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/402205dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/402205dd

Branch: refs/heads/master
Commit: 402205ddf749e7478683ce1b0443df63b46b03fd
Parents: 5d0f81d
Author: Shuai Lin <linshuai2...@gmail.com>
Authored: Wed Oct 26 14:31:47 2016 +0200
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Oct 26 14:31:47 2016 +0200

----------------------------------------------------------------------
 .../scala/org/apache/spark/util/Utils.scala     | 48 ++++++++++++++------
 .../org/apache/spark/util/UtilsSuite.scala      |  7 +--
 2 files changed, 36 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/402205dd/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index bfc6094..e57eb0d 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2508,6 +2508,26 @@ private[spark] object Utils extends Logging {
   }
 }
 
+private[util] object CallerContext extends Logging {
+  val callerContextSupported: Boolean = {
+    SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", 
false) && {
+      try {
+        // scalastyle:off classforname
+        Class.forName("org.apache.hadoop.ipc.CallerContext")
+        Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
+        // scalastyle:on classforname
+        true
+      } catch {
+        case _: ClassNotFoundException =>
+          false
+        case NonFatal(e) =>
+          logWarning("Fail to load the CallerContext class", e)
+          false
+      }
+    }
+  }
+}
+
 /**
  * An utility class used to set up Spark caller contexts to HDFS and Yarn. The 
`context` will be
  * constructed by parameters passed in.
@@ -2554,21 +2574,21 @@ private[spark] class CallerContext(
    * Set up the caller context [[context]] by invoking Hadoop CallerContext 
API of
    * [[org.apache.hadoop.ipc.CallerContext]], which was added in hadoop 2.8.
    */
-  def setCurrentContext(): Boolean = {
-    var succeed = false
-    try {
-      // scalastyle:off classforname
-      val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext")
-      val Builder = 
Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
-      // scalastyle:on classforname
-      val builderInst = 
Builder.getConstructor(classOf[String]).newInstance(context)
-      val hdfsContext = Builder.getMethod("build").invoke(builderInst)
-      callerContext.getMethod("setCurrent", callerContext).invoke(null, 
hdfsContext)
-      succeed = true
-    } catch {
-      case NonFatal(e) => logInfo("Fail to set Spark caller context", e)
+  def setCurrentContext(): Unit = {
+    if (CallerContext.callerContextSupported) {
+      try {
+        // scalastyle:off classforname
+        val callerContext = 
Class.forName("org.apache.hadoop.ipc.CallerContext")
+        val builder = 
Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
+        // scalastyle:on classforname
+        val builderInst = 
builder.getConstructor(classOf[String]).newInstance(context)
+        val hdfsContext = builder.getMethod("build").invoke(builderInst)
+        callerContext.getMethod("setCurrent", callerContext).invoke(null, 
hdfsContext)
+      } catch {
+        case NonFatal(e) =>
+          logWarning("Fail to set Spark caller context", e)
+      }
     }
-    succeed
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/402205dd/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 4dda80f..aeb2969 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -843,14 +843,11 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties with Logging {
 
   test("Set Spark CallerContext") {
     val context = "test"
-    try {
+    new CallerContext(context).setCurrentContext()
+    if (CallerContext.callerContextSupported) {
       val callerContext = 
Utils.classForName("org.apache.hadoop.ipc.CallerContext")
-      assert(new CallerContext(context).setCurrentContext())
       assert(s"SPARK_$context" ===
         callerContext.getMethod("getCurrent").invoke(null).toString)
-    } catch {
-      case e: ClassNotFoundException =>
-        assert(!new CallerContext(context).setCurrentContext())
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17802] Improved caller context logging.

Reply via email to