This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 200440e1845 [SPARK-44682][PS] Make pandas error class 
message_parameters strings
200440e1845 is described below

commit 200440e18458e84d178d9eda5a2c54bcedc634ee
Author: Amanda Liu <amanda....@databricks.com>
AuthorDate: Mon Aug 7 12:01:45 2023 +0900

    [SPARK-44682][PS] Make pandas error class message_parameters strings
    
    This PR converts the types for message_parameters for pandas error classes 
to string, to ensure ability to compare error class messages in tests.
    
    The change ensures the ability to compare error class messages in tests.
    
    No, the PR does not affect the user-facing view of the error messages.
    
    Updated `python/pyspark/pandas/tests/test_utils.py` and existing tests
    
    Closes #42348 from asl3/string-pandas-error-types.
    
    Authored-by: Amanda Liu <amanda....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit df8e52d84d1eabf48f68d09491f66a0835f41693)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/pandas/tests/test_utils.py | 16 ++++-----
 python/pyspark/testing/pandasutils.py     | 56 +++++++++++++++----------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_utils.py 
b/python/pyspark/pandas/tests/test_utils.py
index 3d658446f27..0bb03dd8749 100644
--- a/python/pyspark/pandas/tests/test_utils.py
+++ b/python/pyspark/pandas/tests/test_utils.py
@@ -208,10 +208,10 @@ class UtilsTestsMixin:
             exception=pe.exception,
             error_class="DIFFERENT_PANDAS_SERIES",
             message_parameters={
-                "left": series1,
-                "left_dtype": series1.dtype,
-                "right": series2,
-                "right_dtype": series2.dtype,
+                "left": series1.to_string(),
+                "left_dtype": str(series1.dtype),
+                "right": series2.to_string(),
+                "right_dtype": str(series2.dtype),
             },
         )
 
@@ -227,9 +227,9 @@ class UtilsTestsMixin:
             error_class="DIFFERENT_PANDAS_INDEX",
             message_parameters={
                 "left": index1,
-                "left_dtype": index1.dtype,
+                "left_dtype": str(index1.dtype),
                 "right": index2,
-                "right_dtype": index2.dtype,
+                "right_dtype": str(index2.dtype),
             },
         )
 
@@ -247,9 +247,9 @@ class UtilsTestsMixin:
             error_class="DIFFERENT_PANDAS_MULTIINDEX",
             message_parameters={
                 "left": multiindex1,
-                "left_dtype": multiindex1.dtype,
+                "left_dtype": str(multiindex1.dtype),
                 "right": multiindex2,
-                "right_dtype": multiindex2.dtype,
+                "right_dtype": str(multiindex1.dtype),
             },
         )
 
diff --git a/python/pyspark/testing/pandasutils.py 
b/python/pyspark/testing/pandasutils.py
index 58999253521..39196873482 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -124,10 +124,10 @@ def _assert_pandas_equal(
             raise PySparkAssertionError(
                 error_class="DIFFERENT_PANDAS_SERIES",
                 message_parameters={
-                    "left": left,
-                    "left_dtype": left.dtype,
-                    "right": right,
-                    "right_dtype": right.dtype,
+                    "left": left.to_string(),
+                    "left_dtype": str(left.dtype),
+                    "right": right.to_string(),
+                    "right_dtype": str(right.dtype),
                 },
             )
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
@@ -143,9 +143,9 @@ def _assert_pandas_equal(
                 error_class="DIFFERENT_PANDAS_INDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
     else:
@@ -228,10 +228,10 @@ def _assert_pandas_almost_equal(
             raise PySparkAssertionError(
                 error_class="DIFFERENT_PANDAS_SERIES",
                 message_parameters={
-                    "left": left,
-                    "left_dtype": left.dtype,
-                    "right": right,
-                    "right_dtype": right.dtype,
+                    "left": left.to_string(),
+                    "left_dtype": str(left.dtype),
+                    "right": right.to_string(),
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lnull, rnull in zip(left.isnull(), right.isnull()):
@@ -239,10 +239,10 @@ def _assert_pandas_almost_equal(
                 raise PySparkAssertionError(
                     error_class="DIFFERENT_PANDAS_SERIES",
                     message_parameters={
-                        "left": left,
-                        "left_dtype": left.dtype,
-                        "right": right,
-                        "right_dtype": right.dtype,
+                        "left": left.to_string(),
+                        "left_dtype": str(left.dtype),
+                        "right": right.to_string(),
+                        "right_dtype": str(right.dtype),
                     },
                 )
         for lval, rval in zip(left.dropna(), right.dropna()):
@@ -253,10 +253,10 @@ def _assert_pandas_almost_equal(
                     raise PySparkAssertionError(
                         error_class="DIFFERENT_PANDAS_SERIES",
                         message_parameters={
-                            "left": left,
-                            "left_dtype": left.dtype,
-                            "right": right,
-                            "right_dtype": right.dtype,
+                            "left": left.to_string(),
+                            "left_dtype": str(left.dtype),
+                            "right": right.to_string(),
+                            "right_dtype": str(right.dtype),
                         },
                     )
     elif isinstance(left, pd.MultiIndex) and isinstance(right, pd.MultiIndex):
@@ -265,9 +265,9 @@ def _assert_pandas_almost_equal(
                 error_class="DIFFERENT_PANDAS_MULTIINDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lval, rval in zip(left, right):
@@ -279,9 +279,9 @@ def _assert_pandas_almost_equal(
                         error_class="DIFFERENT_PANDAS_MULTIINDEX",
                         message_parameters={
                             "left": left,
-                            "left_dtype": left.dtype,
+                            "left_dtype": str(left.dtype),
                             "right": right,
-                            "right_dtype": right.dtype,
+                            "right_dtype": str(right.dtype),
                         },
                     )
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
@@ -290,9 +290,9 @@ def _assert_pandas_almost_equal(
                 error_class="DIFFERENT_PANDAS_INDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lnull, rnull in zip(left.isnull(), right.isnull()):
@@ -301,9 +301,9 @@ def _assert_pandas_almost_equal(
                     error_class="DIFFERENT_PANDAS_INDEX",
                     message_parameters={
                         "left": left,
-                        "left_dtype": left.dtype,
+                        "left_dtype": str(left.dtype),
                         "right": right,
-                        "right_dtype": right.dtype,
+                        "right_dtype": str(right.dtype),
                     },
                 )
         for lval, rval in zip(left.dropna(), right.dropna()):
@@ -315,9 +315,9 @@ def _assert_pandas_almost_equal(
                         error_class="DIFFERENT_PANDAS_INDEX",
                         message_parameters={
                             "left": left,
-                            "left_dtype": left.dtype,
+                            "left_dtype": str(left.dtype),
                             "right": right,
-                            "right_dtype": right.dtype,
+                            "right_dtype": str(right.dtype),
                         },
                     )
     else:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to