Repository: spark Updated Branches: refs/heads/master 7c41d1357 -> fa712b309
[SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp values In org.apache.hadoop.hive.serde2.io.TimestampWritable.set , if the next entry is null then current time stamp object is being reset. However because of this hiveinspectors:unwrap cannot use the same timestamp object without creating a copy. Author: Venkata Ramana G <ramana.gollamudihuawei.com> Author: Venkata Ramana Gollamudi <ramana.gollam...@huawei.com> Closes #3019 from gvramana/spark_4077 and squashes the following commits: 32d818f [Venkata Ramana Gollamudi] fixed check style fa01e71 [Venkata Ramana Gollamudi] cloned timestamp object as org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa712b30 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa712b30 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa712b30 Branch: refs/heads/master Commit: fa712b309c0e59943aae289dab629b34a13fe20e Parents: 7c41d13 Author: Venkata Ramana Gollamudi <ramana.gollam...@huawei.com> Authored: Fri Oct 31 11:30:28 2014 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Fri Oct 31 11:30:28 2014 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/hive/HiveInspectors.scala | 3 +++ .../resources/data/files/issue-4077-data.txt | 2 ++ .../sql/hive/execution/HiveTableScanSuite.scala | 22 ++++++++++++++++++++ 3 files changed, 27 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index c6103a1..0439ab9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -91,6 +91,9 @@ private[hive] trait HiveInspectors { if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue case hdoi: HiveDecimalObjectInspector => if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) + // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object + // if next timestamp is null, so Timestamp object is cloned + case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone() case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data) case li: ListObjectInspector => Option(li.getList(data)) http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/test/resources/data/files/issue-4077-data.txt ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt b/sql/hive/src/test/resources/data/files/issue-4077-data.txt new file mode 100644 index 0000000..18067b0 --- /dev/null +++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt @@ -0,0 +1,2 @@ +2014-12-11 00:00:00,1 +2014-12-11astring00:00:00,2 http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 2f3db95..54c0f01 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.{Row, SchemaRDD} + +import org.apache.spark.util.Utils class HiveTableScanSuite extends HiveComparisonTest { @@ -47,4 +50,23 @@ class HiveTableScanSuite extends HiveComparisonTest { TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } + + test("Spark-4077: timestamp query for null value") { + TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") + TestHive.sql( + """ + CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + """.stripMargin) + val location = + Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() + + TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") + assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() + === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null))) + TestHive.sql("DROP TABLE timestamp_query_null") + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org