This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new be49ca6dd71b [SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark error framework be49ca6dd71b is described below commit be49ca6dd71b87172df9d88f305f06a7b87c9ecf Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Mon Dec 4 16:18:27 2023 -0800 [SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark error framework ### What changes were proposed in this pull request? This PR proposes to migrate all remaining `AttributeError` from `pyspark/sql/*` into PySpark error framework, `PySparkAttributeError` with assigning dedicated error classes. ### Why are the changes needed? To improve the error handling in PySpark. ### Does this PR introduce _any_ user-facing change? No API changes, but the user-facing error messages will be improved. ### How was this patch tested? The existing CI should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44150 from itholic/migrate_attribute_error. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/connect/dataframe.py | 10 +++++++--- python/pyspark/sql/dataframe.py | 11 ++++++++--- python/pyspark/sql/types.py | 13 ++++++++++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index a73a24818c0c..6a1d45712163 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -14,7 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from pyspark.errors.exceptions.base import SessionNotSameException, PySparkIndexError +from pyspark.errors.exceptions.base import ( + SessionNotSameException, + PySparkIndexError, + PySparkAttributeError, +) from pyspark.sql.connect.utils import check_dependencies check_dependencies(__name__) @@ -1694,8 +1698,8 @@ class DataFrame: ) if name not in self.columns: - raise AttributeError( - "'%s' object has no attribute '%s'" % (self.__class__.__name__, name) + raise PySparkAttributeError( + error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name} ) return _to_col_with_plan_id( diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8b40b222a289..5211d874ba33 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -43,7 +43,12 @@ from py4j.java_gateway import JavaObject, JVMView from pyspark import copy_func, _NoValue from pyspark._globals import _NoValueType from pyspark.context import SparkContext -from pyspark.errors import PySparkTypeError, PySparkValueError, PySparkIndexError +from pyspark.errors import ( + PySparkTypeError, + PySparkValueError, + PySparkIndexError, + PySparkAttributeError, +) from pyspark.rdd import ( RDD, _load_from_socket, @@ -3613,8 +3618,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): +---+ """ if name not in self.columns: - raise AttributeError( - "'%s' object has no attribute '%s'" % (self.__class__.__name__, name) + raise PySparkAttributeError( + error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name} ) jc = self._jdf.apply(name) return Column(jc) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index cbfc4ab5df02..d3eed77b3838 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -55,6 +55,7 @@ from pyspark.errors import ( PySparkTypeError, PySparkValueError, PySparkIndexError, + PySparkAttributeError, PySparkKeyError, ) @@ -2574,16 +2575,22 @@ class Row(tuple): def __getattr__(self, item: str) -> Any: if item.startswith("__"): - raise AttributeError(item) + raise PySparkAttributeError( + error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item} + ) try: # it will be slow when it has many fields, # but this will not be used in normal cases idx = self.__fields__.index(item) return self[idx] except IndexError: - raise AttributeError(item) + raise PySparkAttributeError( + error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item} + ) except ValueError: - raise AttributeError(item) + raise PySparkAttributeError( + error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item} + ) def __setattr__(self, key: Any, value: Any) -> None: if key != "__fields__": --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org