spark git commit: [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain
Repository: spark Updated Branches: refs/heads/branch-1.2 528cce8bc -> 667f7ff44 [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 The f closure of `PartitionsRDD(ZippedPartitionsRDD2)` contains a `$outer` that references EdgeRDD/VertexRDD, which causes task's serialization chain become very long in iterative GraphX applications. As a result, StackOverflow error will occur. If we set "f = null" in `clearDependencies()`, checkpoint() can cut off the long serialization chain. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3545 from JerryLead/my_core and squashes the following commits: f7faea5 [JerryLead] checkpoint() should clear the f to avoid StackOverflow error c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master (cherry picked from commit 77be8b986fd21b7bbe28aa8db1042cb22bc74fe7) Signed-off-by: Ankur Dave Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/667f7ff4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/667f7ff4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/667f7ff4 Branch: refs/heads/branch-1.2 Commit: 667f7ff440dea9b83dbf3910f26d8dbf82d343a5 Parents: 528cce8 Author: JerryLead Authored: Tue Dec 2 23:53:29 2014 -0800 Committer: Ankur Dave Committed: Tue Dec 2 23:53:38 2014 -0800 -- .../scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala| 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/667f7ff4/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala index 996f2cd..95b2dd9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala @@ -77,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag]( private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B]) => Iterator[V], +var f: (Iterator[A], Iterator[B]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], preservesPartitioning: Boolean = false) @@ -92,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag] super.clearDependencies() rdd1 = null rdd2 = null +f = null } } private[spark] class ZippedPartitionsRDD3 [A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], +var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -117,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3 rdd1 = null rdd2 = null rdd3 = null +f = null } } private[spark] class ZippedPartitionsRDD4 [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], +var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -145,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4 rdd2 = null rdd3 = null rdd4 = null +f = null } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain
Repository: spark Updated Branches: refs/heads/master 17c162f66 -> 77be8b986 [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 The f closure of `PartitionsRDD(ZippedPartitionsRDD2)` contains a `$outer` that references EdgeRDD/VertexRDD, which causes task's serialization chain become very long in iterative GraphX applications. As a result, StackOverflow error will occur. If we set "f = null" in `clearDependencies()`, checkpoint() can cut off the long serialization chain. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3545 from JerryLead/my_core and squashes the following commits: f7faea5 [JerryLead] checkpoint() should clear the f to avoid StackOverflow error c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77be8b98 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77be8b98 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77be8b98 Branch: refs/heads/master Commit: 77be8b986fd21b7bbe28aa8db1042cb22bc74fe7 Parents: 17c162f Author: JerryLead Authored: Tue Dec 2 23:53:29 2014 -0800 Committer: Ankur Dave Committed: Tue Dec 2 23:53:29 2014 -0800 -- .../scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala| 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/77be8b98/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala index 996f2cd..95b2dd9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala @@ -77,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag]( private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B]) => Iterator[V], +var f: (Iterator[A], Iterator[B]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], preservesPartitioning: Boolean = false) @@ -92,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag] super.clearDependencies() rdd1 = null rdd2 = null +f = null } } private[spark] class ZippedPartitionsRDD3 [A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], +var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -117,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3 rdd1 = null rdd2 = null rdd3 = null +f = null } } private[spark] class ZippedPartitionsRDD4 [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag]( sc: SparkContext, -f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], +var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -145,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4 rdd2 = null rdd3 = null rdd4 = null +f = null } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org