git commit: [SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp values

marmbrus Fri, 31 Oct 2014 11:31:03 -0700

Repository: spark
Updated Branches:
  refs/heads/master 7c41d1357 -> fa712b309



[SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp 
values

In org.apache.hadoop.hive.serde2.io.TimestampWritable.set , if the next entry 
is null then current time stamp object is being reset.
However because of this hiveinspectors:unwrap cannot use the same timestamp 
object without creating a copy.

Author: Venkata Ramana G <ramana.gollamudihuawei.com>

Author: Venkata Ramana Gollamudi <ramana.gollam...@huawei.com>

Closes #3019 from gvramana/spark_4077 and squashes the following commits:

32d818f [Venkata Ramana Gollamudi] fixed check style
fa01e71 [Venkata Ramana Gollamudi] cloned timestamp object as 
org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time 
object


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa712b30
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa712b30
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa712b30

Branch: refs/heads/master
Commit: fa712b309c0e59943aae289dab629b34a13fe20e
Parents: 7c41d13
Author: Venkata Ramana Gollamudi <ramana.gollam...@huawei.com>
Authored: Fri Oct 31 11:30:28 2014 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Fri Oct 31 11:30:28 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/HiveInspectors.scala  |  3 +++
 .../resources/data/files/issue-4077-data.txt    |  2 ++
 .../sql/hive/execution/HiveTableScanSuite.scala | 22 ++++++++++++++++++++
 3 files changed, 27 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index c6103a1..0439ab9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -91,6 +91,9 @@ private[hive] trait HiveInspectors {
       if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue
     case hdoi: HiveDecimalObjectInspector =>
       if (data == null) null else 
BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+    // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset 
current time object
+    // if next timestamp is null, so Timestamp object is cloned
+    case ti: TimestampObjectInspector => 
ti.getPrimitiveJavaObject(data).clone()
     case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
     case li: ListObjectInspector =>
       Option(li.getList(data))

http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/test/resources/data/files/issue-4077-data.txt
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt 
b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
new file mode 100644
index 0000000..18067b0
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
@@ -0,0 +1,2 @@
+2014-12-11 00:00:00,1
+2014-12-11astring00:00:00,2

http://git-wip-us.apache.org/repos/asf/spark/blob/fa712b30/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 2f3db95..54c0f01 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,6 +18,9 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.{Row, SchemaRDD}
+
+import org.apache.spark.util.Utils
 
 class HiveTableScanSuite extends HiveComparisonTest {
 
@@ -47,4 +50,23 @@ class HiveTableScanSuite extends HiveComparisonTest {
     TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 
5").collect()
     TestHive.sql("drop table tb")
   }
+  
+  test("Spark-4077: timestamp query for null value") {
+    TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
+    TestHive.sql(
+      """
+        CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+        ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY ','
+        LINES TERMINATED BY '\n'
+      """.stripMargin)
+    val location = 
+      
Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile()
+     
+    TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE 
timestamp_query_null")
+    assert(TestHive.sql("SELECT time from timestamp_query_null limit 
2").collect() 
+      === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 
00:00:00")),Row(null)))
+    TestHive.sql("DROP TABLE timestamp_query_null")
+  }
+  
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

git commit: [SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp values

Reply via email to