Repository: spark Updated Branches: refs/heads/master e40ea8742 -> a0846c4b6
[SPARK-6716] Change SparkContext.DRIVER_IDENTIFIER from <driver> to driver Currently, the driver's executorId is set to `<driver>`. This choice of ID was present in older Spark versions, but it has started to cause problems now that executorIds are used in more contexts, such as Ganglia metric names or driver thread-dump links the web UI. The angle brackets must be escaped when embedding this ID in XML or as part of URLs and this has led to multiple problems: - https://issues.apache.org/jira/browse/SPARK-6484 - https://issues.apache.org/jira/browse/SPARK-4313 The simplest solution seems to be to change this id to something that does not contain any special characters, such as `driver`. I'm not sure whether we can perform this change in a patch release, since this ID may be considered a stable API by metrics users, but it's probably okay to do this in a major release as long as we document it in the release notes. Author: Josh Rosen <joshro...@databricks.com> Closes #5372 from JoshRosen/driver-id-fix and squashes the following commits: 42d3c10 [Josh Rosen] Clarify comment 0c5d04b [Josh Rosen] Add backwards-compatibility in BlockManagerId.isDriver 7ff12e0 [Josh Rosen] Change SparkContext.DRIVER_IDENTIFIER from <driver> to driver Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a0846c4b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a0846c4b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a0846c4b Branch: refs/heads/master Commit: a0846c4b635eac8d8637c83d490177f881952d27 Parents: e40ea87 Author: Josh Rosen <joshro...@databricks.com> Authored: Mon Apr 6 23:33:16 2015 -0700 Committer: Josh Rosen <joshro...@databricks.com> Committed: Mon Apr 6 23:33:16 2015 -0700 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/SparkContext.scala | 12 +++++++++++- .../scala/org/apache/spark/storage/BlockManagerId.scala | 5 ++++- .../org/apache/spark/storage/BlockManagerSuite.scala | 6 ++++++ 3 files changed, 21 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/main/scala/org/apache/spark/SparkContext.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 942c597..3f1a7dd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1901,7 +1901,17 @@ object SparkContext extends Logging { private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = "spark.job.interruptOnCancel" - private[spark] val DRIVER_IDENTIFIER = "<driver>" + /** + * Executor id for the driver. In earlier versions of Spark, this was `<driver>`, but this was + * changed to `driver` because the angle brackets caused escaping issues in URLs and XML (see + * SPARK-6716 for more details). + */ + private[spark] val DRIVER_IDENTIFIER = "driver" + + /** + * Legacy version of DRIVER_IDENTIFIER, retained for backwards-compatibility. + */ + private[spark] val LEGACY_DRIVER_IDENTIFIER = "<driver>" // The following deprecated objects have already been copied to `object AccumulatorParam` to // make the compiler find them automatically. They are duplicate codes only for backward http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala index a6f1ebf..69ac375 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala @@ -60,7 +60,10 @@ class BlockManagerId private ( def port: Int = port_ - def isDriver: Boolean = { executorId == SparkContext.DRIVER_IDENTIFIER } + def isDriver: Boolean = { + executorId == SparkContext.DRIVER_IDENTIFIER || + executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER + } override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException { out.writeUTF(executorId_) http://git-wip-us.apache.org/repos/asf/spark/blob/a0846c4b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 283090e..6dc5bc4 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -139,6 +139,12 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach assert(id2_.eq(id1), "Deserialized id2 is not the same object as original id1") } + test("BlockManagerId.isDriver() backwards-compatibility with legacy driver ids (SPARK-6716)") { + assert(BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "XXX", 1).isDriver) + assert(BlockManagerId(SparkContext.LEGACY_DRIVER_IDENTIFIER, "XXX", 1).isDriver) + assert(!BlockManagerId("notADriverIdentifier", "XXX", 1).isDriver) + } + test("master + 1 manager interaction") { store = makeBlockManager(20000) val a1 = new Array[Byte](4000) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org