[ 
https://issues.apache.org/jira/browse/SPARK-12431?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15082081#comment-15082081
 ] 

Edward Seidl commented on SPARK-12431:
--------------------------------------

here's a patch demonstrating what I'm on about...

{code}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 869caa3..10a3d7c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -103,6 +103,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected 
() extends Serializab
    * memory, otherwise saving it on a file will require recomputation.
    */
   def checkpoint(): Unit
+  def localCheckpoint(): Unit
 
   /**
    * Return whether this Graph has been checkpointed or not.
diff --git 
a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index c88b2f6..65e24e5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -76,6 +76,11 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] 
private[graphx] (
     partitionsRDD.checkpoint()
   }
 
+  override def localCheckpoint() : this.type = {
+      partitionsRDD.localCheckpoint()
+      this
+  }
+
   override def isCheckpointed: Boolean = {
     firstParent[(PartitionID, EdgePartition[ED, VD])].isCheckpointed
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index da95314..cc228ef 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -70,6 +70,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     replicatedVertexView.edges.checkpoint()
   }
 
+  override def localCheckpoint(): Unit = {
+      vertices.localCheckpoint()
+      replicatedVertexView.edges.localCheckpoint()
+  }
+
   override def isCheckpointed: Boolean = {
     vertices.isCheckpointed && replicatedVertexView.edges.isCheckpointed
   }
diff --git 
a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index 7f4e7e9..54e8406 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -77,6 +77,11 @@ class VertexRDDImpl[VD] private[graphx] (
     partitionsRDD.checkpoint()
   }
 
+  override def localCheckpoint(): this.type = {
+      partitionsRDD.localCheckpoint()
+      this
+  }
+
   override def isCheckpointed: Boolean = {
     firstParent[ShippableVertexPartition[VD]].isCheckpointed
   }
{code}

> add local checkpointing to GraphX
> ---------------------------------
>
>                 Key: SPARK-12431
>                 URL: https://issues.apache.org/jira/browse/SPARK-12431
>             Project: Spark
>          Issue Type: Improvement
>          Components: GraphX
>    Affects Versions: 1.5.2
>            Reporter: Edward Seidl
>
> local checkpointing was added to RDD to speed up iterative spark jobs, but 
> this capability hasn't been added to GraphX.  Adding localCheckpoint to 
> GraphImpl, EdgeRDDImpl, and VertexRDDImpl greatly improved the speed of a 
> k-core algorithm I'm using (at the cost of fault tolerance, of course).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to