Repository: spark Updated Branches: refs/heads/master 6cd98c187 -> 03f3e91ff
[SPARK-10422] [SQL] String column in InMemoryColumnarCache needs to override clone method https://issues.apache.org/jira/browse/SPARK-10422 Author: Yin Huai <yh...@databricks.com> Closes #8578 from yhuai/SPARK-10422. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/03f3e91f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/03f3e91f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/03f3e91f Branch: refs/heads/master Commit: 03f3e91ff21707d8a1c7057a00f1b1cd8b743e3f Parents: 6cd98c1 Author: Yin Huai <yh...@databricks.com> Authored: Wed Sep 2 21:00:13 2015 -0700 Committer: Davies Liu <davies....@gmail.com> Committed: Wed Sep 2 21:00:13 2015 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/columnar/ColumnType.scala | 2 ++ .../columnar/InMemoryColumnarQuerySuite.scala | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/03f3e91f/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala index 531a824..ab482a3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala @@ -339,6 +339,8 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) { override def copyField(from: InternalRow, fromOrdinal: Int, to: MutableRow, toOrdinal: Int) { setField(to, toOrdinal, getField(from, fromOrdinal)) } + + override def clone(v: UTF8String): UTF8String = v.clone() } private[sql] object DATE extends NativeColumnType(DateType, 8, 4) { http://git-wip-us.apache.org/repos/asf/spark/blob/03f3e91f/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index 952637c..83db9b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -191,4 +191,24 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { ctx.table("InMemoryCache_different_data_types").collect()) ctx.dropTempTable("InMemoryCache_different_data_types") } + + test("SPARK-10422: String column in InMemoryColumnarCache needs to override clone method") { + val df = + ctx.range(1, 100).selectExpr("id % 10 as id").rdd.map(id => Tuple1(s"str_$id")).toDF("i") + val cached = df.cache() + // count triggers the caching action. It should not throw. + cached.count() + + // Make sure, the DataFrame is indeed cached. + assert(sqlContext.cacheManager.lookupCachedData(cached).nonEmpty) + + // Check result. + checkAnswer( + cached, + ctx.range(1, 100).selectExpr("id % 10 as id").rdd.map(id => Tuple1(s"str_$id")).toDF("i") + ) + + // Drop the cache. + cached.unpersist() + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org