[ https://issues.apache.org/jira/browse/SPARK-12431?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15082081#comment-15082081 ]
Edward Seidl commented on SPARK-12431: -------------------------------------- here's a patch demonstrating what I'm on about... {code} diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 869caa3..10a3d7c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -103,6 +103,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab * memory, otherwise saving it on a file will require recomputation. */ def checkpoint(): Unit + def localCheckpoint(): Unit /** * Return whether this Graph has been checkpointed or not. diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index c88b2f6..65e24e5 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -76,6 +76,11 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( partitionsRDD.checkpoint() } + override def localCheckpoint() : this.type = { + partitionsRDD.localCheckpoint() + this + } + override def isCheckpointed: Boolean = { firstParent[(PartitionID, EdgePartition[ED, VD])].isCheckpointed } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index da95314..cc228ef 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -70,6 +70,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected ( replicatedVertexView.edges.checkpoint() } + override def localCheckpoint(): Unit = { + vertices.localCheckpoint() + replicatedVertexView.edges.localCheckpoint() + } + override def isCheckpointed: Boolean = { vertices.isCheckpointed && replicatedVertexView.edges.isCheckpointed } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index 7f4e7e9..54e8406 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -77,6 +77,11 @@ class VertexRDDImpl[VD] private[graphx] ( partitionsRDD.checkpoint() } + override def localCheckpoint(): this.type = { + partitionsRDD.localCheckpoint() + this + } + override def isCheckpointed: Boolean = { firstParent[ShippableVertexPartition[VD]].isCheckpointed } {code} > add local checkpointing to GraphX > --------------------------------- > > Key: SPARK-12431 > URL: https://issues.apache.org/jira/browse/SPARK-12431 > Project: Spark > Issue Type: Improvement > Components: GraphX > Affects Versions: 1.5.2 > Reporter: Edward Seidl > > local checkpointing was added to RDD to speed up iterative spark jobs, but > this capability hasn't been added to GraphX. Adding localCheckpoint to > GraphImpl, EdgeRDDImpl, and VertexRDDImpl greatly improved the speed of a > k-core algorithm I'm using (at the cost of fault tolerance, of course). -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org