This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8230f16164b1 [SPARK-45317][SQL][CONNECT] Handle null filename in stack 
traces of exceptions
8230f16164b1 is described below

commit 8230f16164b1cbd20ca0cb052c28c9fdb8d892d1
Author: Yihong He <yihong...@databricks.com>
AuthorDate: Tue Sep 26 11:04:14 2023 +0900

    [SPARK-45317][SQL][CONNECT] Handle null filename in stack traces of 
exceptions
    
    ### What changes were proposed in this pull request?
    
    - Handle null filename in stack traces of exceptions
    - Change the filename field in protobuf to optional
    
    ### Why are the changes needed?
    
    - In Java exceptions, filename is the only field that can be nullable and 
null filename may cause NullPointerException
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    - `build/sbt "connect-client-jvm/testOnly *ClientE2ETestSuite"`
    - `build/sbt "connect-client-jvm/testOnly *ClientStreamingQuerySuite"`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Closes #43103 from heyihong/SPARK-45317.
    
    Authored-by: Yihong He <yihong...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../org/apache/spark/sql/ClientE2ETestSuite.scala  | 28 ++++++++++++++++++++++
 .../src/main/protobuf/spark/connect/base.proto     |  2 +-
 .../connect/client/GrpcExceptionConverter.scala    |  2 +-
 .../spark/sql/connect/utils/ErrorUtils.scala       | 10 +++++---
 .../service/FetchErrorDetailsHandlerSuite.scala    | 25 +++++++++++++++++++
 python/pyspark/sql/connect/proto/base_pb2.py       | 14 +++++------
 python/pyspark/sql/connect/proto/base_pb2.pyi      | 13 +++++++++-
 7 files changed, 81 insertions(+), 13 deletions(-)

diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index ec9b1698a4ee..55718ed9c0be 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -45,6 +45,34 @@ import org.apache.spark.sql.types._
 
 class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with 
PrivateMethodTester {
 
+  test(s"throw SparkException with null filename in stack trace elements") {
+    withSQLConf("spark.sql.connect.enrichError.enabled" -> "true") {
+      val session = spark
+      import session.implicits._
+
+      val throwException =
+        udf((_: String) => {
+          val testError = new SparkException("test")
+          val stackTrace = testError.getStackTrace()
+          stackTrace(0) = new StackTraceElement(
+            stackTrace(0).getClassName,
+            stackTrace(0).getMethodName,
+            null,
+            stackTrace(0).getLineNumber)
+          testError.setStackTrace(stackTrace)
+          throw testError
+        })
+
+      val ex = intercept[SparkException] {
+        Seq("1").toDS.withColumn("udf_val", throwException($"value")).collect()
+      }
+
+      assert(ex.getCause.isInstanceOf[SparkException])
+      assert(ex.getCause.getStackTrace().length > 0)
+      assert(ex.getCause.getStackTrace()(0).getFileName == null)
+    }
+  }
+
   for (enrichErrorEnabled <- Seq(false, true)) {
     test(s"cause exception - ${enrichErrorEnabled}") {
       withSQLConf("spark.sql.connect.enrichError.enabled" -> 
enrichErrorEnabled.toString) {
diff --git 
a/connector/connect/common/src/main/protobuf/spark/connect/base.proto 
b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
index e5317cae6dc8..b30c578421c2 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -808,7 +808,7 @@ message FetchErrorDetailsResponse {
     string method_name = 2;
 
     // The name of the file containing the execution point.
-    string file_name = 3;
+    optional string file_name = 3;
 
     // The line number of the source line containing the execution point.
     int32 line_number = 4;
diff --git 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
index edbc434ef964..2d86e8c1e417 100644
--- 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
+++ 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
@@ -222,7 +222,7 @@ private object GrpcExceptionConverter {
         new StackTraceElement(
           stackTraceElement.getDeclaringClass,
           stackTraceElement.getMethodName,
-          stackTraceElement.getFileName,
+          if (stackTraceElement.hasFileName) stackTraceElement.getFileName 
else null,
           stackTraceElement.getLineNumber)
       })
     }
diff --git 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
index 1abd44608cd0..78c1f723c902 100644
--- 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
+++ 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
@@ -92,13 +92,17 @@ private[connect] object ErrorUtils extends Logging {
         builder.addAllStackTrace(
           currentError.getStackTrace
             .map { stackTraceElement =>
-              FetchErrorDetailsResponse.StackTraceElement
+              val stackTraceBuilder = 
FetchErrorDetailsResponse.StackTraceElement
                 .newBuilder()
                 .setDeclaringClass(stackTraceElement.getClassName)
                 .setMethodName(stackTraceElement.getMethodName)
-                .setFileName(stackTraceElement.getFileName)
                 .setLineNumber(stackTraceElement.getLineNumber)
-                .build()
+
+              if (stackTraceElement.getFileName != null) {
+                stackTraceBuilder.setFileName(stackTraceElement.getFileName)
+              }
+
+              stackTraceBuilder.build()
             }
             .toIterable
             .asJava)
diff --git 
a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
 
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
index c0591dcc9c7b..7633fa7df5d5 100644
--- 
a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
+++ 
b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
@@ -163,4 +163,29 @@ class FetchErrorDetailsHandlerSuite extends 
SharedSparkSession with ResourceHelp
       testError = new Exception(s"test$i", testError)
     }
   }
+
+  test("null filename in stack trace elements") {
+    val testError = new Exception("test")
+    val stackTrace = testError.getStackTrace()
+    stackTrace(0) = new StackTraceElement(
+      stackTrace(0).getClassName,
+      stackTrace(0).getMethodName,
+      null,
+      stackTrace(0).getLineNumber)
+    testError.setStackTrace(stackTrace)
+
+    val errorId = UUID.randomUUID().toString()
+
+    SparkConnectService
+      .getOrCreateIsolatedSession(userId, sessionId)
+      .errorIdToError
+      .put(errorId, testError)
+
+    val response = fetchErrorDetails(userId, sessionId, errorId)
+    assert(response.hasRootErrorIdx)
+    assert(response.getRootErrorIdx == 0)
+
+    assert(response.getErrors(0).getStackTraceCount > 0)
+    assert(!response.getErrors(0).getStackTrace(0).hasFileName)
+  }
 }
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py 
b/python/pyspark/sql/connect/proto/base_pb2.py
index 0634c9b35620..0cc8085763ce 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -37,7 +37,7 @@ from pyspark.sql.connect.proto import types_pb2 as 
spark_dot_connect_dot_types__
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    
b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01
 
\x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02
 
\x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17
 [...]
+    
b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01
 
\x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02
 
\x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17
 [...]
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -200,11 +200,11 @@ if _descriptor._USE_C_DESCRIPTORS == False:
     _FETCHERRORDETAILSREQUEST._serialized_start = 11301
     _FETCHERRORDETAILSREQUEST._serialized_end = 11502
     _FETCHERRORDETAILSRESPONSE._serialized_start = 11505
-    _FETCHERRORDETAILSRESPONSE._serialized_end = 12051
+    _FETCHERRORDETAILSRESPONSE._serialized_end = 12070
     _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 11650
-    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11805
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11808
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12032
-    _SPARKCONNECTSERVICE._serialized_start = 12054
-    _SPARKCONNECTSERVICE._serialized_end = 12903
+    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 11824
+    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 11827
+    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 12051
+    _SPARKCONNECTSERVICE._serialized_start = 12073
+    _SPARKCONNECTSERVICE._serialized_end = 12922
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi 
b/python/pyspark/sql/connect/proto/base_pb2.pyi
index f154b199ce84..6320ec7bb56b 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -2816,12 +2816,20 @@ class 
FetchErrorDetailsResponse(google.protobuf.message.Message):
             *,
             declaring_class: builtins.str = ...,
             method_name: builtins.str = ...,
-            file_name: builtins.str = ...,
+            file_name: builtins.str | None = ...,
             line_number: builtins.int = ...,
         ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_file_name", b"_file_name", "file_name", b"file_name"
+            ],
+        ) -> builtins.bool: ...
         def ClearField(
             self,
             field_name: typing_extensions.Literal[
+                "_file_name",
+                b"_file_name",
                 "declaring_class",
                 b"declaring_class",
                 "file_name",
@@ -2832,6 +2840,9 @@ class 
FetchErrorDetailsResponse(google.protobuf.message.Message):
                 b"method_name",
             ],
         ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_file_name", 
b"_file_name"]
+        ) -> typing_extensions.Literal["file_name"] | None: ...
 
     class Error(google.protobuf.message.Message):
         """Error defines the schema for the representing exception."""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to