This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8d64cb4144d1 [SPARK-46360][PYTHON] Enhance error message debugging with new `getMessage` API 8d64cb4144d1 is described below commit 8d64cb4144d17107fd3758d2a46430439203c7ad Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Mon Dec 11 21:03:11 2023 -0800 [SPARK-46360][PYTHON] Enhance error message debugging with new `getMessage` API ### What changes were proposed in this pull request? This PR proposes to introduce `getMessage` to provide a standardized way for users to obtain a concise and clear error message. ### Why are the changes needed? Previously, extracting a simple and informative error message in PySpark was not straightforward. The internal `ErrorClassesReader.get_error_message` method was often used, but for JVM-originated errors not defined in `error_classes.py`, obtaining a succinct error message was challenging. The new `getMessage` API harmonizes error message retrieval across PySpark, leveraging existing JVM implementations to ensure consistency and clarity in the messages presented to the users. ### Does this PR introduce _any_ user-facing change? Yes, this PR introduces a `getMessage` for directly accessing simplified error messages in PySpark. - **Before**: No official API for simplified error messages; excessive details in the error output: ```python from pyspark.sql.utils import AnalysisException try: spark.sql("""SELECT a""") except AnalysisException as e: str(e) # "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or function parameter with name `a` cannot be resolved. SQLSTATE: 42703; line 1 pos 7;\n'Project ['a]\n+- OneRowRelation\n" ``` - **After**: The `getMessage` API provides streamlined, user-friendly error messages: ```python from pyspark.sql.utils import AnalysisException try: spark.sql("""SELECT a""") except AnalysisException as e: e.getMessage() # '[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or function parameter with name `a` cannot be resolved. SQLSTATE: 42703' ``` ### How was this patch tested? Added UTs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44292 from itholic/getMessage. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/errors/exceptions/base.py | 19 ++++++++++++++++++- python/pyspark/errors/exceptions/captured.py | 18 ++++++++++++++++++ python/pyspark/sql/tests/test_utils.py | 8 ++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py index b60800da3ff8..e40e1b2e93cb 100644 --- a/python/pyspark/errors/exceptions/base.py +++ b/python/pyspark/errors/exceptions/base.py @@ -60,6 +60,7 @@ class PySparkException(Exception): See Also -------- + :meth:`PySparkException.getMessage` :meth:`PySparkException.getMessageParameters` :meth:`PySparkException.getSqlState` """ @@ -74,6 +75,7 @@ class PySparkException(Exception): See Also -------- :meth:`PySparkException.getErrorClass` + :meth:`PySparkException.getMessage` :meth:`PySparkException.getSqlState` """ return self._message_parameters @@ -89,13 +91,28 @@ class PySparkException(Exception): See Also -------- :meth:`PySparkException.getErrorClass` + :meth:`PySparkException.getMessage` :meth:`PySparkException.getMessageParameters` """ return None + def getMessage(self) -> str: + """ + Returns full error message. + + .. versionadded:: 4.0.0 + + See Also + -------- + :meth:`PySparkException.getErrorClass` + :meth:`PySparkException.getMessageParameters` + :meth:`PySparkException.getSqlState` + """ + return f"[{self.getErrorClass()}] {self._message}" + def __str__(self) -> str: if self.getErrorClass() is not None: - return f"[{self.getErrorClass()}] {self._message}" + return self.getMessage() else: return self._message diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py index ec987e0854ea..4164bb7b428d 100644 --- a/python/pyspark/errors/exceptions/captured.py +++ b/python/pyspark/errors/exceptions/captured.py @@ -118,6 +118,24 @@ class CapturedException(PySparkException): else: return None + def getMessage(self) -> str: + assert SparkContext._gateway is not None + gw = SparkContext._gateway + + if self._origin is not None and is_instance_of( + gw, self._origin, "org.apache.spark.SparkThrowable" + ): + error_class = self._origin.getErrorClass() + message_parameters = self._origin.getMessageParameters() + + error_message = gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage( + error_class, message_parameters + ) + + return error_message + else: + return "" + def convert_exception(e: Py4JJavaError) -> CapturedException: assert e is not None diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index f633837002e2..e13b933c46ba 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -1750,6 +1750,13 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin): self.assertEqual(e.getErrorClass(), "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION") self.assertEqual(e.getSqlState(), "42703") self.assertEqual(e.getMessageParameters(), {"objectName": "`a`"}) + self.assertEqual( + e.getMessage(), + ( + "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or function " + "parameter with name `a` cannot be resolved. SQLSTATE: 42703" + ), + ) try: self.spark.sql("""SELECT assert_true(FALSE)""") @@ -1757,6 +1764,7 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin): self.assertIsNone(e.getErrorClass()) self.assertIsNone(e.getSqlState()) self.assertEqual(e.getMessageParameters(), {}) + self.assertEqual(e.getMessage(), "") if __name__ == "__main__": --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org