Davies Liu created SPARK-16173: ---------------------------------- Summary: Can't join describe() of DataFrame in Scala 2.10 Key: SPARK-16173 URL: https://issues.apache.org/jira/browse/SPARK-16173 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 1.6.1, 1.5.2, 2.0.0 Reporter: Davies Liu
descripbe() of DataFrame use Seq() (it's a Iterator actually) to create another DataFrame, which can not be serialized in Scala 2.10. {code} org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304) at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122) at org.apache.spark.SparkContext.clean(SparkContext.scala:2060) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:707) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:706) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:706) at org.apache.spark.sql.execution.ConvertToUnsafe.doExecute(rowFormatConverters.scala:38) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:82) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79) at org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79) at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.NotSerializableException: scala.collection.Iterator$$anon$11 Serialization stack: - object not serializable (class: scala.collection.Iterator$$anon$11, value: empty iterator) - field (class: scala.collection.Iterator$$anonfun$toStream$1, name: $outer, type: interface scala.collection.Iterator) - object (class scala.collection.Iterator$$anonfun$toStream$1, <function0>) - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: interface scala.Function0) - object (class scala.collection.immutable.Stream$Cons, Stream(WrappedArray(1), WrappedArray(2.0), WrappedArray(NaN), WrappedArray(2), WrappedArray(2))) - field (class: scala.collection.immutable.Stream$$anonfun$zip$1, name: $outer, type: class scala.collection.immutable.Stream) - object (class scala.collection.immutable.Stream$$anonfun$zip$1, <function0>) - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: interface scala.Function0) - object (class scala.collection.immutable.Stream$Cons, Stream((WrappedArray(1),(count,<function1>)), (WrappedArray(2.0),(mean,<function1>)), (WrappedArray(NaN),(stddev,<function1>)), (WrappedArray(2),(min,<function1>)), (WrappedArray(2),(max,<function1>)))) - field (class: scala.collection.immutable.Stream$$anonfun$map$1, name: $outer, type: class scala.collection.immutable.Stream) - object (class scala.collection.immutable.Stream$$anonfun$map$1, <function0>) - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: interface scala.Function0) - object (class scala.collection.immutable.Stream$Cons, Stream([count,1], [mean,2.0], [stddev,NaN], [min,2], [max,2])) - field (class: scala.collection.immutable.Stream$$anonfun$map$1, name: $outer, type: class scala.collection.immutable.Stream) - object (class scala.collection.immutable.Stream$$anonfun$map$1, <function0>) - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: interface scala.Function0) - object (class scala.collection.immutable.Stream$Cons, Stream([count,1], [mean,2.0], [stddev,NaN], [min,2], [max,2])) - field (class: org.apache.spark.sql.execution.LocalTableScan, name: rows, type: interface scala.collection.Seq) - object (class org.apache.spark.sql.execution.LocalTableScan, LocalTableScan [summary#633,grade#634], [[count,1],[mean,2.0],[stddev,NaN],[min,2],[max,2]] ) - field (class: org.apache.spark.sql.execution.ConvertToUnsafe, name: child, type: class org.apache.spark.sql.execution.SparkPlan) - object (class org.apache.spark.sql.execution.ConvertToUnsafe, ConvertToUnsafe +- LocalTableScan [summary#633,grade#634], [[count,1],[mean,2.0],[stddev,NaN],[min,2],[max,2]] ) - field (class: org.apache.spark.sql.execution.ConvertToUnsafe$$anonfun$1, name: $outer, type: class org.apache.spark.sql.execution.ConvertToUnsafe) - object (class org.apache.spark.sql.execution.ConvertToUnsafe$$anonfun$1, <function1>) at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47) at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101) at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301) ... 24 more {code} https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1953143414968711/3514600112485120/2850462372213371/latest.html -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org