Repository: spark Updated Branches: refs/heads/master 5d0f81da4 -> 402205ddf
[SPARK-17802] Improved caller context logging. ## What changes were proposed in this pull request? [SPARK-16757](https://issues.apache.org/jira/browse/SPARK-16757) sets the hadoop `CallerContext` when calling hadoop/hdfs apis to make spark applications more diagnosable in hadoop/hdfs logs. However, the `org.apache.hadoop.ipc.CallerContext` class is only added since [hadoop 2.8](https://issues.apache.org/jira/browse/HDFS-9184), which is not officially releaed yet. So each time `utils.CallerContext.setCurrentContext()` is called (e.g [when a task is created](https://github.com/apache/spark/blob/b678e46/core/src/main/scala/org/apache/spark/scheduler/Task.scala#L95-L96)), a "java.lang.ClassNotFoundException: org.apache.hadoop.ipc.CallerContext" error is logged, which pollutes the spark logs when there are lots of tasks. This patch improves this behaviour by only logging the `ClassNotFoundException` once. ## How was this patch tested? Existing tests. Author: Shuai Lin <linshuai2...@gmail.com> Closes #15377 from lins05/spark-17802-improve-callercontext-logging. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/402205dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/402205dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/402205dd Branch: refs/heads/master Commit: 402205ddf749e7478683ce1b0443df63b46b03fd Parents: 5d0f81d Author: Shuai Lin <linshuai2...@gmail.com> Authored: Wed Oct 26 14:31:47 2016 +0200 Committer: Sean Owen <so...@cloudera.com> Committed: Wed Oct 26 14:31:47 2016 +0200 ---------------------------------------------------------------------- .../scala/org/apache/spark/util/Utils.scala | 48 ++++++++++++++------ .../org/apache/spark/util/UtilsSuite.scala | 7 +-- 2 files changed, 36 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/402205dd/core/src/main/scala/org/apache/spark/util/Utils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index bfc6094..e57eb0d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2508,6 +2508,26 @@ private[spark] object Utils extends Logging { } } +private[util] object CallerContext extends Logging { + val callerContextSupported: Boolean = { + SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && { + try { + // scalastyle:off classforname + Class.forName("org.apache.hadoop.ipc.CallerContext") + Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") + // scalastyle:on classforname + true + } catch { + case _: ClassNotFoundException => + false + case NonFatal(e) => + logWarning("Fail to load the CallerContext class", e) + false + } + } + } +} + /** * An utility class used to set up Spark caller contexts to HDFS and Yarn. The `context` will be * constructed by parameters passed in. @@ -2554,21 +2574,21 @@ private[spark] class CallerContext( * Set up the caller context [[context]] by invoking Hadoop CallerContext API of * [[org.apache.hadoop.ipc.CallerContext]], which was added in hadoop 2.8. */ - def setCurrentContext(): Boolean = { - var succeed = false - try { - // scalastyle:off classforname - val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext") - val Builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") - // scalastyle:on classforname - val builderInst = Builder.getConstructor(classOf[String]).newInstance(context) - val hdfsContext = Builder.getMethod("build").invoke(builderInst) - callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext) - succeed = true - } catch { - case NonFatal(e) => logInfo("Fail to set Spark caller context", e) + def setCurrentContext(): Unit = { + if (CallerContext.callerContextSupported) { + try { + // scalastyle:off classforname + val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext") + val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") + // scalastyle:on classforname + val builderInst = builder.getConstructor(classOf[String]).newInstance(context) + val hdfsContext = builder.getMethod("build").invoke(builderInst) + callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext) + } catch { + case NonFatal(e) => + logWarning("Fail to set Spark caller context", e) + } } - succeed } } http://git-wip-us.apache.org/repos/asf/spark/blob/402205dd/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 4dda80f..aeb2969 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -843,14 +843,11 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { test("Set Spark CallerContext") { val context = "test" - try { + new CallerContext(context).setCurrentContext() + if (CallerContext.callerContextSupported) { val callerContext = Utils.classForName("org.apache.hadoop.ipc.CallerContext") - assert(new CallerContext(context).setCurrentContext()) assert(s"SPARK_$context" === callerContext.getMethod("getCurrent").invoke(null).toString) - } catch { - case e: ClassNotFoundException => - assert(!new CallerContext(context).setCurrentContext()) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org