[ https://issues.apache.org/jira/browse/SPARK-16915?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Weichen Xu updated SPARK-16915: ------------------------------- Description: import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD class C1(val rdd1: RDD[Int], val t1: Int) { val bcT1 = rdd1.context.broadcast(t1) def runMap() = { rdd1.map( _ + bcT1.value).collect() } } object Main1 { def main() = { val sc = new SparkContext() val seq1 = Seq(1, 2, 3) val rdd1 = sc.parallelize(seq1) val c1 = new C1(rdd1, 3) c1.runMap() // throw org.apache.spark.SparkException: Task not serializable } } the code above will throw Exception: org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298) at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108) at org.apache.spark.SparkContext.clean(SparkContext.scala:2044) at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:366) at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:365) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) at org.apache.spark.rdd.RDD.map(RDD.scala:365) at C1.runMap(<console>:14) ... 48 elided Caused by: java.io.NotSerializableException: C1 Serialization stack: - object not serializable (class: C1, value: C1@36885319) - field (class: C1$$anonfun$runMap$1, name: $outer, type: class C1) - object (class C1$$anonfun$runMap$1, <function1>) at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46) at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100) at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295) was: import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD class C1(val rdd1: RDD[Int], val t1: Int) { val bcT1 = rdd1.context.broadcast(t1); def runMap() = { rdd1.map( _ + bcT1.value).collect(); } } object Main1 { def main() = { val sc = new SparkContext(); val seq1 = Seq(1, 2, 3); val rdd1 = sc.parallelize(seq1); val c1 = new C1(rdd1, 3); c1.runMap(); // throw org.apache.spark.SparkException: Task not serializable } } the code above will throw Exception: org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298) at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108) at org.apache.spark.SparkContext.clean(SparkContext.scala:2044) at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:366) at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:365) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) at org.apache.spark.rdd.RDD.map(RDD.scala:365) at C1.runMap(<console>:14) ... 48 elided Caused by: java.io.NotSerializableException: C1 Serialization stack: - object not serializable (class: C1, value: C1@36885319) - field (class: C1$$anonfun$runMap$1, name: $outer, type: class C1) - object (class C1$$anonfun$runMap$1, <function1>) at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46) at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100) at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295) > broadcast var cause Task not serializable exception when broadcast var is a > class member > ---------------------------------------------------------------------------------------- > > Key: SPARK-16915 > URL: https://issues.apache.org/jira/browse/SPARK-16915 > Project: Spark > Issue Type: Bug > Components: Spark Core > Reporter: Weichen Xu > > import org.apache.spark.SparkContext > import org.apache.spark.rdd.RDD > class C1(val rdd1: RDD[Int], val t1: Int) { > val bcT1 = rdd1.context.broadcast(t1) > def runMap() = { > rdd1.map( _ + bcT1.value).collect() > } > } > object Main1 { > def main() = { > val sc = new SparkContext() > val seq1 = Seq(1, 2, 3) > val rdd1 = sc.parallelize(seq1) > val c1 = new C1(rdd1, 3) > c1.runMap() // throw org.apache.spark.SparkException: Task not > serializable > } > } > the code above will throw Exception: > org.apache.spark.SparkException: Task not serializable > at > org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298) > at > org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288) > at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108) > at org.apache.spark.SparkContext.clean(SparkContext.scala:2044) > at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:366) > at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:365) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) > at org.apache.spark.rdd.RDD.map(RDD.scala:365) > at C1.runMap(<console>:14) > ... 48 elided > Caused by: java.io.NotSerializableException: C1 > Serialization stack: > - object not serializable (class: C1, value: C1@36885319) > - field (class: C1$$anonfun$runMap$1, name: $outer, type: class C1) > - object (class C1$$anonfun$runMap$1, <function1>) > at > org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40) > at > org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46) > at > org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100) > at > org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org