This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 35a99a87c51 [SPARK-46213][PYTHON] Introduce `PySparkImportError` for 
error framework
35a99a87c51 is described below

commit 35a99a87c51c504c0231715e14bdbcc89a6b63d0
Author: Haejoon Lee <haejoon....@databricks.com>
AuthorDate: Wed Dec 6 17:15:38 2023 +0900

    [SPARK-46213][PYTHON] Introduce `PySparkImportError` for error framework
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to introduce `PySparkImportError` for error framework.
    
    **NOTE**: This PR was merged from 
https://github.com/apache/spark/pull/44123, but reverted back because it's not 
parsed special character properly. So this PR also including changes for fixing 
`python/pyspark/errors/utils.py` as well.
    
    ### Why are the changes needed?
    
    For better error handling.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No API changes, but it's improve the user-facing error messages.
    
    ### How was this patch tested?
    
    The existing CI should pass
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44176 from itholic/import_error_followup.
    
    Authored-by: Haejoon Lee <haejoon....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/docs/source/reference/pyspark.errors.rst |  1 +
 python/pyspark/errors/__init__.py               |  2 ++
 python/pyspark/errors/error_classes.py          |  5 ++++
 python/pyspark/errors/exceptions/base.py        |  6 ++++
 python/pyspark/errors/utils.py                  | 11 +++++--
 python/pyspark/sql/connect/utils.py             | 36 ++++++++++++++++-------
 python/pyspark/sql/pandas/utils.py              | 39 +++++++++++++++++--------
 7 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/python/docs/source/reference/pyspark.errors.rst 
b/python/docs/source/reference/pyspark.errors.rst
index 56fdde2584c..a4997506b41 100644
--- a/python/docs/source/reference/pyspark.errors.rst
+++ b/python/docs/source/reference/pyspark.errors.rst
@@ -44,6 +44,7 @@ Classes
     PySparkRuntimeError
     PySparkTypeError
     PySparkValueError
+    PySparkImportError
     PySparkIndexError
     PythonException
     QueryExecutionException
diff --git a/python/pyspark/errors/__init__.py 
b/python/pyspark/errors/__init__.py
index 0a55084a4a5..07033d21643 100644
--- a/python/pyspark/errors/__init__.py
+++ b/python/pyspark/errors/__init__.py
@@ -39,6 +39,7 @@ from pyspark.errors.exceptions.base import (  # noqa: F401
     SparkNoSuchElementException,
     PySparkTypeError,
     PySparkValueError,
+    PySparkImportError,
     PySparkIndexError,
     PySparkAttributeError,
     PySparkRuntimeError,
@@ -70,6 +71,7 @@ __all__ = [
     "SparkNoSuchElementException",
     "PySparkTypeError",
     "PySparkValueError",
+    "PySparkImportError",
     "PySparkIndexError",
     "PySparkAttributeError",
     "PySparkRuntimeError",
diff --git a/python/pyspark/errors/error_classes.py 
b/python/pyspark/errors/error_classes.py
index 7dd5cd92705..c93ffa94149 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -1018,6 +1018,11 @@ ERROR_CLASSES_JSON = """
       "<operation> is not supported."
     ]
   },
+  "UNSUPPORTED_PACKAGE_VERSION" : {
+    "message" : [
+      "<package_name> >= <minimum_version> must be installed; however, your 
version is <current_version>."
+    ]
+  },
   "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION" : {
     "message" : [
       "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR 
KEYWORD arguments."
diff --git a/python/pyspark/errors/exceptions/base.py 
b/python/pyspark/errors/exceptions/base.py
index e7f1e4386d7..b7d8ed88ec0 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -264,3 +264,9 @@ class PySparkKeyError(PySparkException, KeyError):
     """
     Wrapper class for KeyError to support error classes.
     """
+
+
+class PySparkImportError(PySparkException, ImportError):
+    """
+    Wrapper class for ImportError to support error classes.
+    """
diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py
index a4894dcb1a6..e1f249506dd 100644
--- a/python/pyspark/errors/utils.py
+++ b/python/pyspark/errors/utils.py
@@ -16,7 +16,7 @@
 #
 
 import re
-from typing import Dict
+from typing import Dict, Match
 
 from pyspark.errors.error_classes import ERROR_CLASSES_MAP
 
@@ -40,9 +40,14 @@ class ErrorClassesReader:
             f"Undefined error message parameter for error class: 
{error_class}. "
             f"Parameters: {message_parameters}"
         )
-        table = str.maketrans("<>", "{}")
 
-        return message_template.translate(table).format(**message_parameters)
+        def replace_match(match: Match[str]) -> str:
+            return match.group().translate(str.maketrans("<>", "{}"))
+
+        # Convert <> to {} only when paired.
+        message_template = re.sub(r"<([^<>]*)>", replace_match, 
message_template)
+
+        return message_template.format(**message_parameters)
 
     def get_message_template(self, error_class: str) -> str:
         """
diff --git a/python/pyspark/sql/connect/utils.py 
b/python/pyspark/sql/connect/utils.py
index fd85d75060b..24073f3a30e 100644
--- a/python/pyspark/sql/connect/utils.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -18,6 +18,7 @@ import sys
 
 from pyspark.loose_version import LooseVersion
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, 
require_minimum_pyarrow_version
+from pyspark.errors import PySparkImportError
 
 
 def check_dependencies(mod_name: str) -> None:
@@ -45,13 +46,21 @@ def require_minimum_grpc_version() -> None:
     try:
         import grpc
     except ImportError as error:
-        raise ImportError(
-            f"grpcio >= {minimum_grpc_version} must be installed; however, it 
was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name": "grpcio",
+                "minimum_version": str(minimum_grpc_version),
+            },
         ) from error
     if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
-        raise ImportError(
-            f"grpcio >= {minimum_grpc_version} must be installed; however, "
-            f"your version was {grpc.__version__}."
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name": "grpcio",
+                "minimum_version": str(minimum_grpc_version),
+                "current_version": str(grpc.__version__),
+            },
         )
 
 
@@ -62,8 +71,12 @@ def require_minimum_grpcio_status_version() -> None:
     try:
         import grpc_status  # noqa
     except ImportError as error:
-        raise ImportError(
-            f"grpcio-status >= {minimum_grpc_version} must be installed; 
however, it was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name": "grpcio-status",
+                "minimum_version": str(minimum_grpc_version),
+            },
         ) from error
 
 
@@ -74,9 +87,12 @@ def require_minimum_googleapis_common_protos_version() -> 
None:
     try:
         import google.rpc  # noqa
     except ImportError as error:
-        raise ImportError(
-            f"googleapis-common-protos >= {minimum_common_protos_version} must 
be installed; "
-            "however, it was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name": "googleapis-common-protos",
+                "minimum_version": str(minimum_common_protos_version),
+            },
         ) from error
 
 
diff --git a/python/pyspark/sql/pandas/utils.py 
b/python/pyspark/sql/pandas/utils.py
index db60f77c391..63554c5a50c 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -16,7 +16,7 @@
 #
 
 from pyspark.loose_version import LooseVersion
-from pyspark.errors import PySparkRuntimeError
+from pyspark.errors import PySparkImportError, PySparkRuntimeError
 
 
 def require_minimum_pandas_version() -> None:
@@ -32,13 +32,21 @@ def require_minimum_pandas_version() -> None:
         have_pandas = False
         raised_error = error
     if not have_pandas:
-        raise ImportError(
-            "Pandas >= %s must be installed; however, " "it was not found." % 
minimum_pandas_version
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name": "Pandas",
+                "minimum_version": str(minimum_pandas_version),
+            },
         ) from raised_error
     if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
-        raise ImportError(
-            "Pandas >= %s must be installed; however, "
-            "your version was %s." % (minimum_pandas_version, 
pandas.__version__)
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name": "Pandas",
+                "minimum_version": str(minimum_pandas_version),
+                "current_version": str(pandas.__version__),
+            },
         )
 
 
@@ -57,14 +65,21 @@ def require_minimum_pyarrow_version() -> None:
         have_arrow = False
         raised_error = error
     if not have_arrow:
-        raise ImportError(
-            "PyArrow >= %s must be installed; however, "
-            "it was not found." % minimum_pyarrow_version
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name": "PyArrow",
+                "minimum_version": str(minimum_pyarrow_version),
+            },
         ) from raised_error
     if LooseVersion(pyarrow.__version__) < 
LooseVersion(minimum_pyarrow_version):
-        raise ImportError(
-            "PyArrow >= %s must be installed; however, "
-            "your version was %s." % (minimum_pyarrow_version, 
pyarrow.__version__)
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name": "PyArrow",
+                "minimum_version": str(minimum_pyarrow_version),
+                "current_version": str(pyarrow.__version__),
+            },
         )
     if os.environ.get("ARROW_PRE_0_15_IPC_FORMAT", "0") == "1":
         raise PySparkRuntimeError(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to