This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 35a99a87c51 [SPARK-46213][PYTHON] Introduce `PySparkImportError` for error framework 35a99a87c51 is described below commit 35a99a87c51c504c0231715e14bdbcc89a6b63d0 Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Wed Dec 6 17:15:38 2023 +0900 [SPARK-46213][PYTHON] Introduce `PySparkImportError` for error framework ### What changes were proposed in this pull request? This PR proposes to introduce `PySparkImportError` for error framework. **NOTE**: This PR was merged from https://github.com/apache/spark/pull/44123, but reverted back because it's not parsed special character properly. So this PR also including changes for fixing `python/pyspark/errors/utils.py` as well. ### Why are the changes needed? For better error handling. ### Does this PR introduce _any_ user-facing change? No API changes, but it's improve the user-facing error messages. ### How was this patch tested? The existing CI should pass ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44176 from itholic/import_error_followup. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/docs/source/reference/pyspark.errors.rst | 1 + python/pyspark/errors/__init__.py | 2 ++ python/pyspark/errors/error_classes.py | 5 ++++ python/pyspark/errors/exceptions/base.py | 6 ++++ python/pyspark/errors/utils.py | 11 +++++-- python/pyspark/sql/connect/utils.py | 36 ++++++++++++++++------- python/pyspark/sql/pandas/utils.py | 39 +++++++++++++++++-------- 7 files changed, 75 insertions(+), 25 deletions(-) diff --git a/python/docs/source/reference/pyspark.errors.rst b/python/docs/source/reference/pyspark.errors.rst index 56fdde2584c..a4997506b41 100644 --- a/python/docs/source/reference/pyspark.errors.rst +++ b/python/docs/source/reference/pyspark.errors.rst @@ -44,6 +44,7 @@ Classes PySparkRuntimeError PySparkTypeError PySparkValueError + PySparkImportError PySparkIndexError PythonException QueryExecutionException diff --git a/python/pyspark/errors/__init__.py b/python/pyspark/errors/__init__.py index 0a55084a4a5..07033d21643 100644 --- a/python/pyspark/errors/__init__.py +++ b/python/pyspark/errors/__init__.py @@ -39,6 +39,7 @@ from pyspark.errors.exceptions.base import ( # noqa: F401 SparkNoSuchElementException, PySparkTypeError, PySparkValueError, + PySparkImportError, PySparkIndexError, PySparkAttributeError, PySparkRuntimeError, @@ -70,6 +71,7 @@ __all__ = [ "SparkNoSuchElementException", "PySparkTypeError", "PySparkValueError", + "PySparkImportError", "PySparkIndexError", "PySparkAttributeError", "PySparkRuntimeError", diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index 7dd5cd92705..c93ffa94149 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -1018,6 +1018,11 @@ ERROR_CLASSES_JSON = """ "<operation> is not supported." ] }, + "UNSUPPORTED_PACKAGE_VERSION" : { + "message" : [ + "<package_name> >= <minimum_version> must be installed; however, your version is <current_version>." + ] + }, "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION" : { "message" : [ "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments." diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py index e7f1e4386d7..b7d8ed88ec0 100644 --- a/python/pyspark/errors/exceptions/base.py +++ b/python/pyspark/errors/exceptions/base.py @@ -264,3 +264,9 @@ class PySparkKeyError(PySparkException, KeyError): """ Wrapper class for KeyError to support error classes. """ + + +class PySparkImportError(PySparkException, ImportError): + """ + Wrapper class for ImportError to support error classes. + """ diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py index a4894dcb1a6..e1f249506dd 100644 --- a/python/pyspark/errors/utils.py +++ b/python/pyspark/errors/utils.py @@ -16,7 +16,7 @@ # import re -from typing import Dict +from typing import Dict, Match from pyspark.errors.error_classes import ERROR_CLASSES_MAP @@ -40,9 +40,14 @@ class ErrorClassesReader: f"Undefined error message parameter for error class: {error_class}. " f"Parameters: {message_parameters}" ) - table = str.maketrans("<>", "{}") - return message_template.translate(table).format(**message_parameters) + def replace_match(match: Match[str]) -> str: + return match.group().translate(str.maketrans("<>", "{}")) + + # Convert <> to {} only when paired. + message_template = re.sub(r"<([^<>]*)>", replace_match, message_template) + + return message_template.format(**message_parameters) def get_message_template(self, error_class: str) -> str: """ diff --git a/python/pyspark/sql/connect/utils.py b/python/pyspark/sql/connect/utils.py index fd85d75060b..24073f3a30e 100644 --- a/python/pyspark/sql/connect/utils.py +++ b/python/pyspark/sql/connect/utils.py @@ -18,6 +18,7 @@ import sys from pyspark.loose_version import LooseVersion from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version +from pyspark.errors import PySparkImportError def check_dependencies(mod_name: str) -> None: @@ -45,13 +46,21 @@ def require_minimum_grpc_version() -> None: try: import grpc except ImportError as error: - raise ImportError( - f"grpcio >= {minimum_grpc_version} must be installed; however, it was not found." + raise PySparkImportError( + error_class="PACKAGE_NOT_INSTALLED", + message_parameters={ + "package_name": "grpcio", + "minimum_version": str(minimum_grpc_version), + }, ) from error if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version): - raise ImportError( - f"grpcio >= {minimum_grpc_version} must be installed; however, " - f"your version was {grpc.__version__}." + raise PySparkImportError( + error_class="UNSUPPORTED_PACKAGE_VERSION", + message_parameters={ + "package_name": "grpcio", + "minimum_version": str(minimum_grpc_version), + "current_version": str(grpc.__version__), + }, ) @@ -62,8 +71,12 @@ def require_minimum_grpcio_status_version() -> None: try: import grpc_status # noqa except ImportError as error: - raise ImportError( - f"grpcio-status >= {minimum_grpc_version} must be installed; however, it was not found." + raise PySparkImportError( + error_class="PACKAGE_NOT_INSTALLED", + message_parameters={ + "package_name": "grpcio-status", + "minimum_version": str(minimum_grpc_version), + }, ) from error @@ -74,9 +87,12 @@ def require_minimum_googleapis_common_protos_version() -> None: try: import google.rpc # noqa except ImportError as error: - raise ImportError( - f"googleapis-common-protos >= {minimum_common_protos_version} must be installed; " - "however, it was not found." + raise PySparkImportError( + error_class="PACKAGE_NOT_INSTALLED", + message_parameters={ + "package_name": "googleapis-common-protos", + "minimum_version": str(minimum_common_protos_version), + }, ) from error diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py index db60f77c391..63554c5a50c 100644 --- a/python/pyspark/sql/pandas/utils.py +++ b/python/pyspark/sql/pandas/utils.py @@ -16,7 +16,7 @@ # from pyspark.loose_version import LooseVersion -from pyspark.errors import PySparkRuntimeError +from pyspark.errors import PySparkImportError, PySparkRuntimeError def require_minimum_pandas_version() -> None: @@ -32,13 +32,21 @@ def require_minimum_pandas_version() -> None: have_pandas = False raised_error = error if not have_pandas: - raise ImportError( - "Pandas >= %s must be installed; however, " "it was not found." % minimum_pandas_version + raise PySparkImportError( + error_class="PACKAGE_NOT_INSTALLED", + message_parameters={ + "package_name": "Pandas", + "minimum_version": str(minimum_pandas_version), + }, ) from raised_error if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version): - raise ImportError( - "Pandas >= %s must be installed; however, " - "your version was %s." % (minimum_pandas_version, pandas.__version__) + raise PySparkImportError( + error_class="UNSUPPORTED_PACKAGE_VERSION", + message_parameters={ + "package_name": "Pandas", + "minimum_version": str(minimum_pandas_version), + "current_version": str(pandas.__version__), + }, ) @@ -57,14 +65,21 @@ def require_minimum_pyarrow_version() -> None: have_arrow = False raised_error = error if not have_arrow: - raise ImportError( - "PyArrow >= %s must be installed; however, " - "it was not found." % minimum_pyarrow_version + raise PySparkImportError( + error_class="PACKAGE_NOT_INSTALLED", + message_parameters={ + "package_name": "PyArrow", + "minimum_version": str(minimum_pyarrow_version), + }, ) from raised_error if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version): - raise ImportError( - "PyArrow >= %s must be installed; however, " - "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__) + raise PySparkImportError( + error_class="UNSUPPORTED_PACKAGE_VERSION", + message_parameters={ + "package_name": "PyArrow", + "minimum_version": str(minimum_pyarrow_version), + "current_version": str(pyarrow.__version__), + }, ) if os.environ.get("ARROW_PRE_0_15_IPC_FORMAT", "0") == "1": raise PySparkRuntimeError( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org