Repository: spark Updated Branches: refs/heads/branch-1.6 573a2c97e -> 06f4fce29
[SPARK-13390][SQL][BRANCH-1.6] Fix the issue that Iterator.map().toSeq is not Serializable ## What changes were proposed in this pull request? `scala.collection.Iterator`'s methods (e.g., map, filter) will return an `AbstractIterator` which is not Serializable. E.g., ```Scala scala> val iter = Array(1, 2, 3).iterator.map(_ + 1) iter: Iterator[Int] = non-empty iterator scala> println(iter.isInstanceOf[Serializable]) false ``` If we call something like `Iterator.map(...).toSeq`, it will create a `Stream` that contains a non-serializable `AbstractIterator` field and make the `Stream` be non-serializable. This PR uses `toArray` instead of `toSeq` to fix such issue in `def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame`. ## How was the this patch tested? Jenkins tests. Author: Shixiong Zhu <shixi...@databricks.com> Closes #11334 from zsxwing/SPARK-13390. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06f4fce2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06f4fce2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06f4fce2 Branch: refs/heads/branch-1.6 Commit: 06f4fce29227f9763d9f9abff6e7459542dce261 Parents: 573a2c9 Author: Shixiong Zhu <shixi...@databricks.com> Authored: Wed Feb 24 13:35:36 2016 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Wed Feb 24 13:35:36 2016 +0000 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/SQLContext.scala | 2 +- .../org/apache/spark/sql/SQLContextSuite.scala | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/06f4fce2/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 4e26250..47fd7fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -604,7 +604,7 @@ class SQLContext private[sql]( val className = beanClass.getName val beanInfo = Introspector.getBeanInfo(beanClass) val rows = SQLContext.beansToRows(data.asScala.iterator, beanInfo, attrSeq) - DataFrame(self, LocalRelation(attrSeq, rows.toSeq)) + DataFrame(self, LocalRelation(attrSeq, rows.toArray)) } http://git-wip-us.apache.org/repos/asf/spark/blob/06f4fce2/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala index 1994dac..9bf865d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala @@ -65,4 +65,21 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext{ session2.sql("select myadd(1, 2)").explain() } } + + test("SPARK-13390: createDataFrame(java.util.List[_],Class[_]) NotSerializableException") { + val rows = new java.util.ArrayList[IntJavaBean]() + rows.add(new IntJavaBean(1)) + val sqlContext = SQLContext.getOrCreate(sc) + // Without the fix for SPARK-13390, this will throw NotSerializableException + sqlContext.createDataFrame(rows, classOf[IntJavaBean]).groupBy("int").count().collect() + } +} + +class IntJavaBean(private var i: Int) extends Serializable { + + def getInt(): Int = i + + def setInt(i: Int): Unit = { + this.i = i + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org