This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d574dbcc859 [SPARK-44413][PYTHON] Clarify error for unsupported arg 
data type in assertDataFrameEqual
d574dbcc859 is described below

commit d574dbcc85965df4a48d608230e591cc23adb525
Author: Amanda Liu <amanda....@databricks.com>
AuthorDate: Tue Jul 18 08:50:49 2023 +0900

    [SPARK-44413][PYTHON] Clarify error for unsupported arg data type in 
assertDataFrameEqual
    
    ### What changes were proposed in this pull request?
    This PR adds an error class, `INVALID_TYPE_DF_EQUALITY_ARG`, to clarify the 
error message for unsupported argument data types when calling 
`assertDataFrameEqual`.
    
    ### Why are the changes needed?
    The fix helps clarify why an error is thrown and what is wrong when a user 
passes unsupported arg types into the `assertDataFrameEqual` util function.
    
    ### Does this PR introduce any user-facing change?
    Yes, the PR modifies error message seen by users.
    
    ### How was this patch tested?
    Modified tests in `runtime/python/pyspark/sql/tests/test_utils.py` and 
`runtime/python/pyspark/sql/tests/connect/test_utils.py`
    
    Closes #42027 from asl3/datatype-error-clarify.
    
    Authored-by: Amanda Liu <amanda....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/errors/error_classes.py |  5 +++++
 python/pyspark/sql/tests/test_utils.py | 33 +++++++++++++++++++++++--------
 python/pyspark/testing/utils.py        | 36 ++++++++++++++++++++++++----------
 3 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py 
b/python/pyspark/errors/error_classes.py
index 2cecee4da44..e45bc0797c9 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -263,6 +263,11 @@ ERROR_CLASSES_JSON = """
       "StructField does not have typeName. Use typeName on its type explicitly 
instead."
     ]
   },
+  "INVALID_TYPE_DF_EQUALITY_ARG" : {
+    "message" : [
+      "Expected type <expected_type> for `<arg_name>` but got type 
<actual_type>."
+    ]
+  },
   "INVALID_UDF_EVAL_TYPE" : {
     "message" : [
       "Eval type for UDF must be <eval_type>."
diff --git a/python/pyspark/sql/tests/test_utils.py 
b/python/pyspark/sql/tests/test_utils.py
index 9c31eb4d6bd..a1cefe7c840 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -39,6 +39,7 @@ from pyspark.sql.types import (
     IntegerType,
     BooleanType,
 )
+from pyspark.sql.dataframe import DataFrame
 
 import difflib
 
@@ -633,8 +634,12 @@ class UtilsTestsMixin:
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": pd.DataFrame},
+            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+            message_parameters={
+                "expected_type": DataFrame,
+                "arg_name": "df",
+                "actual_type": pd.DataFrame,
+            },
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -642,8 +647,12 @@ class UtilsTestsMixin:
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": pd.DataFrame},
+            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+            message_parameters={
+                "expected_type": DataFrame,
+                "arg_name": "df",
+                "actual_type": pd.DataFrame,
+            },
         )
 
     def test_assert_error_non_pyspark_df(self):
@@ -655,8 +664,12 @@ class UtilsTestsMixin:
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(dict1)},
+            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+            message_parameters={
+                "expected_type": DataFrame,
+                "arg_name": "df",
+                "actual_type": type(dict1),
+            },
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -664,8 +677,12 @@ class UtilsTestsMixin:
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(dict1)},
+            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+            message_parameters={
+                "expected_type": DataFrame,
+                "arg_name": "df",
+                "actual_type": type(dict1),
+            },
         )
 
     def test_row_order_ignored(self):
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index acbfb522f69..b8977b6fffd 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -35,7 +35,7 @@ from itertools import zip_longest
 from pyspark import SparkContext, SparkConf
 from pyspark.errors import PySparkAssertionError, PySparkException
 from pyspark.find_spark_home import _find_spark_home
-from pyspark.sql.dataframe import DataFrame as DataFrame
+from pyspark.sql.dataframe import DataFrame
 from pyspark.sql import Row
 from pyspark.sql.types import StructType, AtomicType, StructField
 
@@ -322,7 +322,7 @@ def assertDataFrameEqual(
 ):
     r"""
     A util function to assert equality between `actual` (DataFrame) and 
`expected`
-    (either DataFrame or list of Rows), with optional parameter 
`checkRowOrder`.
+    (DataFrame or list of Rows), with optional parameters `checkRowOrder`, 
`rtol`, and `atol`.
 
     .. versionadded:: 3.5.0
 
@@ -401,8 +401,12 @@ def assertDataFrameEqual(
 
         if not isinstance(actual, DataFrame) and not isinstance(actual, 
ConnectDataFrame):
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(actual)},
+                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+                message_parameters={
+                    "expected_type": DataFrame,
+                    "arg_name": "df",
+                    "actual_type": type(actual),
+                },
             )
         elif (
             not isinstance(expected, DataFrame)
@@ -410,19 +414,31 @@ def assertDataFrameEqual(
             and not isinstance(expected, List)
         ):
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(expected)},
+                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+                message_parameters={
+                    "expected_type": Union[DataFrame, List[Row]],
+                    "arg_name": "expected",
+                    "actual_type": type(expected),
+                },
             )
     except Exception:
         if not isinstance(actual, DataFrame):
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(actual)},
+                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+                message_parameters={
+                    "expected_type": DataFrame,
+                    "arg_name": "df",
+                    "actual_type": type(actual),
+                },
             )
         elif not isinstance(expected, DataFrame) and not isinstance(expected, 
List):
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(expected)},
+                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
+                message_parameters={
+                    "expected_type": Union[DataFrame, List[Row]],
+                    "arg_name": "expected",
+                    "actual_type": type(expected),
+                },
             )
 
     # special cases: empty datasets, datasets with 0 columns


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to