This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 42e7d66b663 [SPARK-42403][CORE] JsonProtocol should handle null JSON 
strings
42e7d66b663 is described below

commit 42e7d66b66337539317bea399540792def45292c
Author: Josh Rosen <joshro...@databricks.com>
AuthorDate: Fri Feb 10 21:54:28 2023 -0800

    [SPARK-42403][CORE] JsonProtocol should handle null JSON strings
    
    ### What changes were proposed in this pull request?
    
    This PR fixes a regression introduced by #36885 which broke JsonProtocol's 
ability to parse `null` string values: the old Json4S-based parser would 
correctly parse null literals, whereas the new code rejects them via an 
overly-strict type check.
    
    This PR solves this problem by relaxing the type checking in 
`extractString` so that `null` literals in JSON can be parsed as `null` strings.
    
    ### Why are the changes needed?
    
    Fix a regression which prevents the history server from parsing certain 
types of event logs which contain null strings, including stacktraces 
containing generated code frames and ExceptionFailure messages where the 
exception message is `null`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added new unit test in JsonProtocolSuite.
    
    Closes #39973 from 
JoshRosen/SPARK-42403-handle-null-strings-in-json-protocol-read-path.
    
    Authored-by: Josh Rosen <joshro...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit 84ddd409c11e4da769c5b1f496f2b61c3d928c07)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../scala/org/apache/spark/util/JsonProtocol.scala |  2 +-
 .../org/apache/spark/util/JsonProtocolSuite.scala  | 32 ++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala 
b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 75dab8dc535..6b75971fc25 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -1611,7 +1611,7 @@ private[spark] object JsonProtocol {
     }
 
     def extractString: String = {
-      require(json.isTextual, s"Expected string, got ${json.getNodeType}")
+      require(json.isTextual || json.isNull, s"Expected string or NULL, got 
${json.getNodeType}")
       json.textValue
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala 
b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index be8a165d2d2..ea71a4b3f1b 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -778,6 +778,38 @@ class JsonProtocolSuite extends SparkFunSuite {
         |}""".stripMargin
     assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
   }
+
+  test("SPARK-42403: properly handle null string values") {
+    // Null string values can appear in a few different event types,
+    // so we test multiple known cases here:
+    val stackTraceJson =
+      """
+        |[
+        |  {
+        |    "Declaring Class": "someClass",
+        |    "Method Name": "someMethod",
+        |    "File Name": null,
+        |    "Line Number": -1
+        |  }
+        |]
+        |""".stripMargin
+    val stackTrace = JsonProtocol.stackTraceFromJson(stackTraceJson)
+    assert(stackTrace === Array(new StackTraceElement("someClass", 
"someMethod", null, -1)))
+
+    val exceptionFailureJson =
+      """
+        |{
+        |  "Reason": "ExceptionFailure",
+        |  "Class Name": "java.lang.Exception",
+        |  "Description": null,
+        |  "Stack Trace": [],
+        |  "Accumulator Updates": []
+        |}
+        |""".stripMargin
+    val exceptionFailure =
+      
JsonProtocol.taskEndReasonFromJson(exceptionFailureJson).asInstanceOf[ExceptionFailure]
+    assert(exceptionFailure.description == null)
+  }
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to