Repository: spark Updated Branches: refs/heads/branch-1.5 de259316b -> a36784083
[SPARK-10859] [SQL] fix stats of StringType in columnar cache The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu <dav...@databricks.com> Closes #8929 from davies/pushdown_string. (cherry picked from commit ea02e5513a8f9853094d5612c962fc8c1a340f50) Signed-off-by: Yin Huai <yh...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a3678408 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a3678408 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a3678408 Branch: refs/heads/branch-1.5 Commit: a367840834b97cd6a9ecda568bb21ee6dc35fcde Parents: de25931 Author: Davies Liu <dav...@databricks.com> Authored: Mon Sep 28 14:40:40 2015 -0700 Committer: Yin Huai <yh...@databricks.com> Committed: Mon Sep 28 14:40:52 2015 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/columnar/ColumnStats.scala | 4 ++-- .../spark/sql/columnar/InMemoryColumnarQuerySuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a3678408/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala index 5cbd52b..fbd51b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala @@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats { super.gatherStats(row, ordinal) if (!row.isNullAt(ordinal)) { val value = row.getUTF8String(ordinal) - if (upper == null || value.compareTo(upper) > 0) upper = value - if (lower == null || value.compareTo(lower) < 0) lower = value + if (upper == null || value.compareTo(upper) > 0) upper = value.clone() + if (lower == null || value.compareTo(lower) < 0) lower = value.clone() sizeInBytes += STRING.actualSize(row, ordinal) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a3678408/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index 83db9b6..3a0f346 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -211,4 +211,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { // Drop the cache. cached.unpersist() } + + test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") { + val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s") + data.cache() + assert(data.count() === 10) + assert(data.filter($"s" === "3").count() === 1) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org