spark git commit: [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs()
Repository: spark Updated Branches: refs/heads/branch-1.3 07a401a7b - 7e5e4d82b [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs() This method is performance-sensitive and this change wasn't necessary. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7e5e4d82 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7e5e4d82 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7e5e4d82 Branch: refs/heads/branch-1.3 Commit: 7e5e4d82be7509edb64c71ca6189add556589613 Parents: 07a401a Author: Josh Rosen joshro...@databricks.com Authored: Tue Feb 17 17:45:16 2015 -0800 Committer: Josh Rosen joshro...@databricks.com Committed: Tue Feb 17 17:47:43 2015 -0800 -- .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7e5e4d82/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 9c355d7..8b62d24 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -190,13 +190,15 @@ class DAGScheduler( } private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized { -cacheLocs.getOrElseUpdate(rdd.id, { +// Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times +if (!cacheLocs.contains(rdd.id)) { val blockIds = rdd.partitions.indices.map(index = RDDBlockId(rdd.id, index)).toArray[BlockId] val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster) - blockIds.map { id = + cacheLocs(rdd.id) = blockIds.map { id = locs.getOrElse(id, Nil).map(bm = TaskLocation(bm.host, bm.executorId)) } -}) +} +cacheLocs(rdd.id) } private def clearCacheLocs(): Unit = cacheLocs.synchronized { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs()
Repository: spark Updated Branches: refs/heads/master d46d6246d - a51fc7ef9 [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs() This method is performance-sensitive and this change wasn't necessary. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a51fc7ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a51fc7ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a51fc7ef Branch: refs/heads/master Commit: a51fc7ef9adb6a41c4857918217f800858fced2c Parents: d46d624 Author: Josh Rosen joshro...@databricks.com Authored: Tue Feb 17 17:45:16 2015 -0800 Committer: Josh Rosen joshro...@databricks.com Committed: Tue Feb 17 17:45:16 2015 -0800 -- .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a51fc7ef/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 9c355d7..8b62d24 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -190,13 +190,15 @@ class DAGScheduler( } private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized { -cacheLocs.getOrElseUpdate(rdd.id, { +// Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times +if (!cacheLocs.contains(rdd.id)) { val blockIds = rdd.partitions.indices.map(index = RDDBlockId(rdd.id, index)).toArray[BlockId] val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster) - blockIds.map { id = + cacheLocs(rdd.id) = blockIds.map { id = locs.getOrElse(id, Nil).map(bm = TaskLocation(bm.host, bm.executorId)) } -}) +} +cacheLocs(rdd.id) } private def clearCacheLocs(): Unit = cacheLocs.synchronized { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org