This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 75b0eb2d6017 [SPARK-46213][PYTHON] Introduce `PySparkImportError` for 
error framework
75b0eb2d6017 is described below

commit 75b0eb2d601763847507a5e715b3732db004544a
Author: Haejoon Lee <haejoon....@databricks.com>
AuthorDate: Sat Dec 2 13:15:35 2023 -0800

    [SPARK-46213][PYTHON] Introduce `PySparkImportError` for error framework
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to introduce `PySparkImportError` for error framework.
    
    ### Why are the changes needed?
    
    For better error handling.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No API changes, but it's improve the user-facing error messages.
    
    ### How was this patch tested?
    
    The existing CI should pass
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44123 from itholic/pyspark_import_error.
    
    Authored-by: Haejoon Lee <haejoon....@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 python/docs/source/reference/pyspark.errors.rst |  1 +
 python/pyspark/errors/__init__.py               |  2 ++
 python/pyspark/errors/error_classes.py          | 10 +++++++
 python/pyspark/errors/exceptions/base.py        |  6 ++++
 python/pyspark/sql/connect/utils.py             | 36 ++++++++++++++++-------
 python/pyspark/sql/pandas/utils.py              | 38 ++++++++++++++++++-------
 6 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/python/docs/source/reference/pyspark.errors.rst 
b/python/docs/source/reference/pyspark.errors.rst
index d659657afc56..88cbd405b83d 100644
--- a/python/docs/source/reference/pyspark.errors.rst
+++ b/python/docs/source/reference/pyspark.errors.rst
@@ -43,6 +43,7 @@ Classes
     PySparkRuntimeError
     PySparkTypeError
     PySparkValueError
+    PySparkImportError
     PySparkIndexError
     PythonException
     QueryExecutionException
diff --git a/python/pyspark/errors/__init__.py 
b/python/pyspark/errors/__init__.py
index d0a62537d347..923cb665d112 100644
--- a/python/pyspark/errors/__init__.py
+++ b/python/pyspark/errors/__init__.py
@@ -39,6 +39,7 @@ from pyspark.errors.exceptions.base import (  # noqa: F401
     SparkNoSuchElementException,
     PySparkTypeError,
     PySparkValueError,
+    PySparkImportError,
     PySparkIndexError,
     PySparkAttributeError,
     PySparkRuntimeError,
@@ -69,6 +70,7 @@ __all__ = [
     "SparkNoSuchElementException",
     "PySparkTypeError",
     "PySparkValueError",
+    "PySparkImportError",
     "PySparkIndexError",
     "PySparkAttributeError",
     "PySparkRuntimeError",
diff --git a/python/pyspark/errors/error_classes.py 
b/python/pyspark/errors/error_classes.py
index 289b16c9b606..e1a93aa6be1a 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -667,6 +667,11 @@ ERROR_CLASSES_JSON = """
       "Only a single trigger is allowed."
     ]
   },
+  "PACKAGE_NOT_INSTALLED" : {
+    "message" : [
+      "<package_name> >= <minimum_version> must be installed; however, it was 
not found."
+    ]
+  },
   "PIPE_FUNCTION_EXITED" : {
     "message" : [
       "Pipe function `<func_name>` exited with error code <error_code>."
@@ -908,6 +913,11 @@ ERROR_CLASSES_JSON = """
       "<operation> is not supported."
     ]
   },
+  "UNSUPPORTED_PACKAGE_VERSION" : {
+    "message" : [
+      "<package_name> >= <minimum_version> must be installed; however, your 
version is <current_version>."
+    ]
+  },
   "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION" : {
     "message" : [
       "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR 
KEYWORD arguments."
diff --git a/python/pyspark/errors/exceptions/base.py 
b/python/pyspark/errors/exceptions/base.py
index c84ca17c3dbd..4a2b31418e29 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -258,3 +258,9 @@ class PySparkPicklingError(PySparkException, PicklingError):
     """
     Wrapper class for pickle.PicklingError to support error classes.
     """
+
+
+class PySparkImportError(PySparkException, ImportError):
+    """
+    Wrapper class for ImportError to support error classes.
+    """
diff --git a/python/pyspark/sql/connect/utils.py 
b/python/pyspark/sql/connect/utils.py
index fd85d75060b5..88f26202b0b2 100644
--- a/python/pyspark/sql/connect/utils.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -18,6 +18,7 @@ import sys
 
 from pyspark.loose_version import LooseVersion
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, 
require_minimum_pyarrow_version
+from pyspark.errors import PySparkImportError
 
 
 def check_dependencies(mod_name: str) -> None:
@@ -45,13 +46,21 @@ def require_minimum_grpc_version() -> None:
     try:
         import grpc
     except ImportError as error:
-        raise ImportError(
-            f"grpcio >= {minimum_grpc_version} must be installed; however, it 
was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name:": "grpcio",
+                "minimum_version": str(minimum_grpc_version),
+            },
         ) from error
     if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
-        raise ImportError(
-            f"grpcio >= {minimum_grpc_version} must be installed; however, "
-            f"your version was {grpc.__version__}."
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name:": "grpcio",
+                "minimum_version": str(minimum_grpc_version),
+                "current_version": str(grpc.__version__),
+            },
         )
 
 
@@ -62,8 +71,12 @@ def require_minimum_grpcio_status_version() -> None:
     try:
         import grpc_status  # noqa
     except ImportError as error:
-        raise ImportError(
-            f"grpcio-status >= {minimum_grpc_version} must be installed; 
however, it was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name:": "grpcio-status",
+                "minimum_version": str(minimum_grpc_version),
+            },
         ) from error
 
 
@@ -74,9 +87,12 @@ def require_minimum_googleapis_common_protos_version() -> 
None:
     try:
         import google.rpc  # noqa
     except ImportError as error:
-        raise ImportError(
-            f"googleapis-common-protos >= {minimum_common_protos_version} must 
be installed; "
-            "however, it was not found."
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name:": "googleapis-common-protos",
+                "minimum_version": str(minimum_common_protos_version),
+            },
         ) from error
 
 
diff --git a/python/pyspark/sql/pandas/utils.py 
b/python/pyspark/sql/pandas/utils.py
index b62be2081028..25152fd5cc1b 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -16,6 +16,7 @@
 #
 
 from pyspark.loose_version import LooseVersion
+from pyspark.errors import PySparkImportError
 
 
 def require_minimum_pandas_version() -> None:
@@ -31,13 +32,21 @@ def require_minimum_pandas_version() -> None:
         have_pandas = False
         raised_error = error
     if not have_pandas:
-        raise ImportError(
-            "Pandas >= %s must be installed; however, " "it was not found." % 
minimum_pandas_version
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name:": "Pandas",
+                "minimum_version": str(minimum_pandas_version),
+            },
         ) from raised_error
     if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
-        raise ImportError(
-            "Pandas >= %s must be installed; however, "
-            "your version was %s." % (minimum_pandas_version, 
pandas.__version__)
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name:": "Pandas",
+                "minimum_version": str(minimum_pandas_version),
+                "current_version": str(pandas.__version__),
+            },
         )
 
 
@@ -56,14 +65,21 @@ def require_minimum_pyarrow_version() -> None:
         have_arrow = False
         raised_error = error
     if not have_arrow:
-        raise ImportError(
-            "PyArrow >= %s must be installed; however, "
-            "it was not found." % minimum_pyarrow_version
+        raise PySparkImportError(
+            error_class="PACKAGE_NOT_INSTALLED",
+            message_parameters={
+                "package_name:": "PyArrow",
+                "minimum_version": str(minimum_pyarrow_version),
+            },
         ) from raised_error
     if LooseVersion(pyarrow.__version__) < 
LooseVersion(minimum_pyarrow_version):
-        raise ImportError(
-            "PyArrow >= %s must be installed; however, "
-            "your version was %s." % (minimum_pyarrow_version, 
pyarrow.__version__)
+        raise PySparkImportError(
+            error_class="UNSUPPORTED_PACKAGE_VERSION",
+            message_parameters={
+                "package_name:": "PyArrow",
+                "minimum_version": str(minimum_pyarrow_version),
+                "current_version": str(pyarrow.__version__),
+            },
         )
     if os.environ.get("ARROW_PRE_0_15_IPC_FORMAT", "0") == "1":
         raise RuntimeError(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to