Repository: spark Updated Branches: refs/heads/branch-1.4 73cf5def0 -> 67ad12d79
[SPARK-8392] RDDOperationGraph: getting cached nodes is slow ```def getAllNodes: Seq[RDDOperationNode] = { _childNodes ++ _childClusters.flatMap(_.childNodes) }``` when the ```_childClusters``` has so many nodes, the process will hang on. I think we can improve the efficiency here. Author: xutingjun <xuting...@huawei.com> Closes #6839 from XuTingjun/DAGImprove and squashes the following commits: 53b03ea [xutingjun] change code to more concise and easier to read f98728b [xutingjun] fix words: node -> nodes f87c663 [xutingjun] put the filter inside 81f9fd2 [xutingjun] put the filter inside (cherry picked from commit e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c) Signed-off-by: Andrew Or <and...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67ad12d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67ad12d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67ad12d7 Branch: refs/heads/branch-1.4 Commit: 67ad12d793a8f0f8137d0a2e0c0d80bd1b5284f2 Parents: 73cf5de Author: xutingjun <xuting...@huawei.com> Authored: Wed Jun 17 22:31:01 2015 -0700 Committer: Andrew Or <and...@databricks.com> Committed: Wed Jun 17 22:31:39 2015 -0700 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 +- .../scala/org/apache/spark/ui/scope/RDDOperationGraph.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/67ad12d7/core/src/main/scala/org/apache/spark/ui/UIUtils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 65162f4..7898039 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -362,7 +362,7 @@ private[spark] object UIUtils extends Logging { { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } } { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } } { - g.rootCluster.getAllNodes.filter(_.cached).map { n => + g.rootCluster.getCachedNodes.map { n => <div class="cached-rdd">{n.id}</div> } } http://git-wip-us.apache.org/repos/asf/spark/blob/67ad12d7/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala index d6a5085..ffea981 100644 --- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala @@ -66,9 +66,9 @@ private[ui] class RDDOperationCluster(val id: String, private var _name: String) _childClusters += childCluster } - /** Return all the nodes container in this cluster, including ones nested in other clusters. */ - def getAllNodes: Seq[RDDOperationNode] = { - _childNodes ++ _childClusters.flatMap(_.childNodes) + /** Return all the nodes which are cached. */ + def getCachedNodes: Seq[RDDOperationNode] = { + _childNodes.filter(_.cached) ++ _childClusters.flatMap(_.getCachedNodes) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org