[
https://issues.apache.org/jira/browse/SPARK-12431?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15082081#comment-15082081
]
Edward Seidl commented on SPARK-12431:
--
here's a patch demonstrating what I'm on about...
{code}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 869caa3..10a3d7c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -103,6 +103,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected
() extends Serializab
* memory, otherwise saving it on a file will require recomputation.
*/
def checkpoint(): Unit
+ def localCheckpoint(): Unit
/**
* Return whether this Graph has been checkpointed or not.
diff --git
a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index c88b2f6..65e24e5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -76,6 +76,11 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag]
private[graphx] (
partitionsRDD.checkpoint()
}
+ override def localCheckpoint() : this.type = {
+ partitionsRDD.localCheckpoint()
+ this
+ }
+
override def isCheckpointed: Boolean = {
firstParent[(PartitionID, EdgePartition[ED, VD])].isCheckpointed
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index da95314..cc228ef 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -70,6 +70,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
replicatedVertexView.edges.checkpoint()
}
+ override def localCheckpoint(): Unit = {
+ vertices.localCheckpoint()
+ replicatedVertexView.edges.localCheckpoint()
+ }
+
override def isCheckpointed: Boolean = {
vertices.isCheckpointed && replicatedVertexView.edges.isCheckpointed
}
diff --git
a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index 7f4e7e9..54e8406 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -77,6 +77,11 @@ class VertexRDDImpl[VD] private[graphx] (
partitionsRDD.checkpoint()
}
+ override def localCheckpoint(): this.type = {
+ partitionsRDD.localCheckpoint()
+ this
+ }
+
override def isCheckpointed: Boolean = {
firstParent[ShippableVertexPartition[VD]].isCheckpointed
}
{code}
> add local checkpointing to GraphX
> -
>
> Key: SPARK-12431
> URL: https://issues.apache.org/jira/browse/SPARK-12431
> Project: Spark
> Issue Type: Improvement
> Components: GraphX
>Affects Versions: 1.5.2
>Reporter: Edward Seidl
>
> local checkpointing was added to RDD to speed up iterative spark jobs, but
> this capability hasn't been added to GraphX. Adding localCheckpoint to
> GraphImpl, EdgeRDDImpl, and VertexRDDImpl greatly improved the speed of a
> k-core algorithm I'm using (at the cost of fault tolerance, of course).
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org