Repository: spark
Updated Branches:
  refs/heads/master e40ea8742 -> a0846c4b6


[SPARK-6716] Change SparkContext.DRIVER_IDENTIFIER from <driver> to driver

Currently, the driver's executorId is set to `<driver>`. This choice of ID was 
present in older Spark versions, but it has started to cause problems now that 
executorIds are used in more contexts, such as Ganglia metric names or driver 
thread-dump links the web UI. The angle brackets must be escaped when embedding 
this ID in XML or as part of URLs and this has led to multiple problems:

- https://issues.apache.org/jira/browse/SPARK-6484
- https://issues.apache.org/jira/browse/SPARK-4313

The simplest solution seems to be to change this id to something that does not 
contain any special characters, such as `driver`.

I'm not sure whether we can perform this change in a patch release, since this 
ID may be considered a stable API by metrics users, but it's probably okay to 
do this in a major release as long as we document it in the release notes.

Author: Josh Rosen <joshro...@databricks.com>

Closes #5372 from JoshRosen/driver-id-fix and squashes the following commits:

42d3c10 [Josh Rosen] Clarify comment
0c5d04b [Josh Rosen] Add backwards-compatibility in BlockManagerId.isDriver
7ff12e0 [Josh Rosen] Change SparkContext.DRIVER_IDENTIFIER from <driver> to 
driver


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a0846c4b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a0846c4b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a0846c4b

Branch: refs/heads/master
Commit: a0846c4b635eac8d8637c83d490177f881952d27
Parents: e40ea87
Author: Josh Rosen <joshro...@databricks.com>
Authored: Mon Apr 6 23:33:16 2015 -0700
Committer: Josh Rosen <joshro...@databricks.com>
Committed: Mon Apr 6 23:33:16 2015 -0700

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/SparkContext.scala | 12 +++++++++++-
 .../scala/org/apache/spark/storage/BlockManagerId.scala |  5 ++++-
 .../org/apache/spark/storage/BlockManagerSuite.scala    |  6 ++++++
 3 files changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 942c597..3f1a7dd 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1901,7 +1901,17 @@ object SparkContext extends Logging {
 
   private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = 
"spark.job.interruptOnCancel"
 
-  private[spark] val DRIVER_IDENTIFIER = "<driver>"
+  /**
+   * Executor id for the driver.  In earlier versions of Spark, this was 
`<driver>`, but this was
+   * changed to `driver` because the angle brackets caused escaping issues in 
URLs and XML (see
+   * SPARK-6716 for more details).
+   */
+  private[spark] val DRIVER_IDENTIFIER = "driver"
+
+  /**
+   * Legacy version of DRIVER_IDENTIFIER, retained for backwards-compatibility.
+   */
+  private[spark] val LEGACY_DRIVER_IDENTIFIER = "<driver>"
 
   // The following deprecated objects have already been copied to `object 
AccumulatorParam` to
   // make the compiler find them automatically. They are duplicate codes only 
for backward

http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index a6f1ebf..69ac375 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -60,7 +60,10 @@ class BlockManagerId private (
 
   def port: Int = port_
 
-  def isDriver: Boolean = { executorId == SparkContext.DRIVER_IDENTIFIER }
+  def isDriver: Boolean = {
+    executorId == SparkContext.DRIVER_IDENTIFIER ||
+      executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER
+  }
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException 
{
     out.writeUTF(executorId_)

http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala 
b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 283090e..6dc5bc4 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -139,6 +139,12 @@ class BlockManagerSuite extends FunSuite with Matchers 
with BeforeAndAfterEach
     assert(id2_.eq(id1), "Deserialized id2 is not the same object as original 
id1")
   }
 
+  test("BlockManagerId.isDriver() backwards-compatibility with legacy driver 
ids (SPARK-6716)") {
+    assert(BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "XXX", 1).isDriver)
+    assert(BlockManagerId(SparkContext.LEGACY_DRIVER_IDENTIFIER, "XXX", 
1).isDriver)
+    assert(!BlockManagerId("notADriverIdentifier", "XXX", 1).isDriver)
+  }
+
   test("master + 1 manager interaction") {
     store = makeBlockManager(20000)
     val a1 = new Array[Byte](4000)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to