This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9666bf37958e [SPARK-46231][PYTHON] Migrate all remaining `NotImplementedError` & `TypeError` into PySpark error framework 9666bf37958e is described below commit 9666bf37958e5381278ca622bf7ec4b4ccb13d79 Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Mon Dec 4 08:54:50 2023 -0800 [SPARK-46231][PYTHON] Migrate all remaining `NotImplementedError` & `TypeError` into PySpark error framework ### What changes were proposed in this pull request? This PR proposes to migrate all remaining `NotImplementedError` and `TypeError` from `pyspark/sql/*` into PySpark error framework, `PySparkNotImplementedError` with assigning dedicated error classes. ### Why are the changes needed? To improve the error handling in PySpark. ### Does this PR introduce _any_ user-facing change? No API changes, but the user-facing error messages will be improved. ### How was this patch tested? The existing CI should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44148 from itholic/not_impl_and_type. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/datasource.py | 21 +++++++++++++++++---- .../sql/tests/pandas/test_pandas_udf_grouped_agg.py | 3 +++ python/pyspark/sql/udf.py | 8 +++++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py index 1c5b6d663285..4713ca5366a7 100644 --- a/python/pyspark/sql/datasource.py +++ b/python/pyspark/sql/datasource.py @@ -19,6 +19,7 @@ from typing import final, Any, Dict, Iterator, List, Sequence, Tuple, Type, Unio from pyspark.sql import Row from pyspark.sql.types import StructType +from pyspark.errors import PySparkNotImplementedError if TYPE_CHECKING: from pyspark.sql._typing import OptionalPrimitiveType @@ -103,7 +104,10 @@ class DataSource(ABC): >>> def schema(self): ... return StructType().add("a", "int").add("b", "string") """ - raise NotImplementedError + raise PySparkNotImplementedError( + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "schema"}, + ) def reader(self, schema: StructType) -> "DataSourceReader": """ @@ -121,7 +125,10 @@ class DataSource(ABC): reader : DataSourceReader A reader instance for this data source. """ - raise NotImplementedError + raise PySparkNotImplementedError( + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "reader"}, + ) def writer(self, schema: StructType, saveMode: str) -> "DataSourceWriter": """ @@ -142,7 +149,10 @@ class DataSource(ABC): writer : DataSourceWriter A writer instance for this data source. """ - raise NotImplementedError + raise PySparkNotImplementedError( + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "writer"}, + ) class InputPartition: @@ -239,7 +249,10 @@ class DataSourceReader(ABC): >>> def partitions(self): ... return [RangeInputPartition(1, 3), RangeInputPartition(5, 10)] """ - raise NotImplementedError + raise PySparkNotImplementedError( + error_class="NOT_IMPLEMENTED", + message_parameters={"feature": "partitions"}, + ) @abstractmethod def read(self, partition: InputPartition) -> Iterator[Union[Tuple, Row]]: diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py index b500be7a9695..455bb09a7dc4 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py @@ -720,6 +720,9 @@ class GroupedAggPandasUDFTestsMixin: class GroupedAggPandasUDFTests(GroupedAggPandasUDFTestsMixin, ReusedSQLTestCase): + def test_unsupported_types(self): + super().test_unsupported_types() + pass diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index 9ffdbb218711..351bcea3f389 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -339,7 +339,13 @@ class UserDefinedFunction: try: # StructType is not yet allowed as a return type, explicitly check here to fail fast if isinstance(self._returnType_placeholder, StructType): - raise TypeError + raise PySparkNotImplementedError( + error_class="NOT_IMPLEMENTED", + message_parameters={ + "feature": f"Invalid return type with grouped aggregate Pandas UDFs: " + f"{self._returnType_placeholder}" + }, + ) to_arrow_type(self._returnType_placeholder) except TypeError: raise PySparkNotImplementedError( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org