spark git commit: [SPARK-9211] [SQL] [TEST] normalize line separators before generating MD5 hash

marmbrus Thu, 06 Aug 2015 14:16:47 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 ee43d355b -> 990b4bf7c



[SPARK-9211] [SQL] [TEST] normalize line separators before generating MD5 hash

The golden answer file names for the existing Hive comparison tests were 
generated using a MD5 hash of the query text which uses Unix-style line 
separator characters `\n` (LF).
This PR ensures that all occurrences of the Windows-style line separator `\r\n` 
(CR) are replaced with `\n` (LF) before generating the MD5 hash to produce an 
identical MD5 hash for golden answer file names generated on Windows.

Author: Christian Kadner <ckad...@us.ibm.com>

Closes #7563 from ckadner/SPARK-9211_working and squashes the following commits:

d541db0 [Christian Kadner] [SPARK-9211][SQL] normalize line separators before 
MD5 hash

(cherry picked from commit abfedb9cd70af60c8290bd2f5a5cec1047845ba0)
Signed-off-by: Michael Armbrust <mich...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/990b4bf7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/990b4bf7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/990b4bf7

Branch: refs/heads/branch-1.5
Commit: 990b4bf7c4e87e7579fefb7507b0276ec3893e46
Parents: ee43d35
Author: Christian Kadner <ckad...@us.ibm.com>
Authored: Thu Aug 6 14:15:42 2015 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Thu Aug 6 14:15:52 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/execution/HiveComparisonTest.scala | 2 +-
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/990b4bf7/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 638b9c8..2bdb0e1 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -124,7 +124,7 @@ abstract class HiveComparisonTest
   protected val cacheDigest = java.security.MessageDigest.getInstance("MD5")
   protected def getMd5(str: String): String = {
     val digest = java.security.MessageDigest.getInstance("MD5")
-    digest.update(str.getBytes("utf-8"))
+    digest.update(str.replaceAll(System.lineSeparator(), 
"\n").getBytes("utf-8"))
     new java.math.BigInteger(1, digest.digest).toString(16)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/990b4bf7/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index edb2755..83f9f3e 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -427,7 +427,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
       |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
       |USING 'cat' AS (tKey, tValue) ROW FORMAT SERDE
       |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM src;
-    """.stripMargin.replaceAll("\n", " "))
+    """.stripMargin.replaceAll(System.lineSeparator(), " "))
 
   test("transform with SerDe2") {
 
@@ -446,7 +446,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
         |('avro.schema.literal'='{"namespace": 
"testing.hive.avro.serde","name":
         |"src","type": "record","fields": [{"name":"key","type":"int"}]}')
         |FROM small_src
-      """.stripMargin.replaceAll("\n", " ")).collect().head
+      """.stripMargin.replaceAll(System.lineSeparator(), " ")).collect().head
 
     assert(expected(0) === res(0))
   }
@@ -458,7 +458,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
       |('serialization.last.column.takes.rest'='true') USING 'cat' AS (tKey, 
tValue)
       |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
       |WITH SERDEPROPERTIES ('serialization.last.column.takes.rest'='true') 
FROM src;
-    """.stripMargin.replaceAll("\n", " "))
+    """.stripMargin.replaceAll(System.lineSeparator(), " "))
 
   createQueryTest("transform with SerDe4",
     """
@@ -467,7 +467,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
       |('serialization.last.column.takes.rest'='true') USING 'cat' ROW FORMAT 
SERDE
       |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH 
SERDEPROPERTIES
       |('serialization.last.column.takes.rest'='true') FROM src;
-    """.stripMargin.replaceAll("\n", " "))
+    """.stripMargin.replaceAll(System.lineSeparator(), " "))
 
   createQueryTest("LIKE",
     "SELECT * FROM src WHERE value LIKE '%1%'")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9211] [SQL] [TEST] normalize line separators before generating MD5 hash

Reply via email to