This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new df8e52d84d1 [SPARK-44682][PS] Make pandas error class 
message_parameters strings
df8e52d84d1 is described below

commit df8e52d84d1eabf48f68d09491f66a0835f41693
Author: Amanda Liu <amanda....@databricks.com>
AuthorDate: Mon Aug 7 12:01:45 2023 +0900

    [SPARK-44682][PS] Make pandas error class message_parameters strings
    
    ### What changes were proposed in this pull request?
    This PR converts the types for message_parameters for pandas error classes 
to string, to ensure ability to compare error class messages in tests.
    
    ### Why are the changes needed?
    The change ensures the ability to compare error class messages in tests.
    
    ### Does this PR introduce _any_ user-facing change?
    No, the PR does not affect the user-facing view of the error messages.
    
    ### How was this patch tested?
    Updated `python/pyspark/pandas/tests/test_utils.py` and existing tests
    
    Closes #42348 from asl3/string-pandas-error-types.
    
    Authored-by: Amanda Liu <amanda....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/pandas/tests/test_utils.py | 16 ++++-----
 python/pyspark/testing/pandasutils.py     | 56 +++++++++++++++----------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_utils.py 
b/python/pyspark/pandas/tests/test_utils.py
index 3d658446f27..0bb03dd8749 100644
--- a/python/pyspark/pandas/tests/test_utils.py
+++ b/python/pyspark/pandas/tests/test_utils.py
@@ -208,10 +208,10 @@ class UtilsTestsMixin:
             exception=pe.exception,
             error_class="DIFFERENT_PANDAS_SERIES",
             message_parameters={
-                "left": series1,
-                "left_dtype": series1.dtype,
-                "right": series2,
-                "right_dtype": series2.dtype,
+                "left": series1.to_string(),
+                "left_dtype": str(series1.dtype),
+                "right": series2.to_string(),
+                "right_dtype": str(series2.dtype),
             },
         )
 
@@ -227,9 +227,9 @@ class UtilsTestsMixin:
             error_class="DIFFERENT_PANDAS_INDEX",
             message_parameters={
                 "left": index1,
-                "left_dtype": index1.dtype,
+                "left_dtype": str(index1.dtype),
                 "right": index2,
-                "right_dtype": index2.dtype,
+                "right_dtype": str(index2.dtype),
             },
         )
 
@@ -247,9 +247,9 @@ class UtilsTestsMixin:
             error_class="DIFFERENT_PANDAS_MULTIINDEX",
             message_parameters={
                 "left": multiindex1,
-                "left_dtype": multiindex1.dtype,
+                "left_dtype": str(multiindex1.dtype),
                 "right": multiindex2,
-                "right_dtype": multiindex2.dtype,
+                "right_dtype": str(multiindex1.dtype),
             },
         )
 
diff --git a/python/pyspark/testing/pandasutils.py 
b/python/pyspark/testing/pandasutils.py
index 58999253521..39196873482 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -124,10 +124,10 @@ def _assert_pandas_equal(
             raise PySparkAssertionError(
                 error_class="DIFFERENT_PANDAS_SERIES",
                 message_parameters={
-                    "left": left,
-                    "left_dtype": left.dtype,
-                    "right": right,
-                    "right_dtype": right.dtype,
+                    "left": left.to_string(),
+                    "left_dtype": str(left.dtype),
+                    "right": right.to_string(),
+                    "right_dtype": str(right.dtype),
                 },
             )
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
@@ -143,9 +143,9 @@ def _assert_pandas_equal(
                 error_class="DIFFERENT_PANDAS_INDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
     else:
@@ -228,10 +228,10 @@ def _assert_pandas_almost_equal(
             raise PySparkAssertionError(
                 error_class="DIFFERENT_PANDAS_SERIES",
                 message_parameters={
-                    "left": left,
-                    "left_dtype": left.dtype,
-                    "right": right,
-                    "right_dtype": right.dtype,
+                    "left": left.to_string(),
+                    "left_dtype": str(left.dtype),
+                    "right": right.to_string(),
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lnull, rnull in zip(left.isnull(), right.isnull()):
@@ -239,10 +239,10 @@ def _assert_pandas_almost_equal(
                 raise PySparkAssertionError(
                     error_class="DIFFERENT_PANDAS_SERIES",
                     message_parameters={
-                        "left": left,
-                        "left_dtype": left.dtype,
-                        "right": right,
-                        "right_dtype": right.dtype,
+                        "left": left.to_string(),
+                        "left_dtype": str(left.dtype),
+                        "right": right.to_string(),
+                        "right_dtype": str(right.dtype),
                     },
                 )
         for lval, rval in zip(left.dropna(), right.dropna()):
@@ -253,10 +253,10 @@ def _assert_pandas_almost_equal(
                     raise PySparkAssertionError(
                         error_class="DIFFERENT_PANDAS_SERIES",
                         message_parameters={
-                            "left": left,
-                            "left_dtype": left.dtype,
-                            "right": right,
-                            "right_dtype": right.dtype,
+                            "left": left.to_string(),
+                            "left_dtype": str(left.dtype),
+                            "right": right.to_string(),
+                            "right_dtype": str(right.dtype),
                         },
                     )
     elif isinstance(left, pd.MultiIndex) and isinstance(right, pd.MultiIndex):
@@ -265,9 +265,9 @@ def _assert_pandas_almost_equal(
                 error_class="DIFFERENT_PANDAS_MULTIINDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lval, rval in zip(left, right):
@@ -279,9 +279,9 @@ def _assert_pandas_almost_equal(
                         error_class="DIFFERENT_PANDAS_MULTIINDEX",
                         message_parameters={
                             "left": left,
-                            "left_dtype": left.dtype,
+                            "left_dtype": str(left.dtype),
                             "right": right,
-                            "right_dtype": right.dtype,
+                            "right_dtype": str(right.dtype),
                         },
                     )
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
@@ -290,9 +290,9 @@ def _assert_pandas_almost_equal(
                 error_class="DIFFERENT_PANDAS_INDEX",
                 message_parameters={
                     "left": left,
-                    "left_dtype": left.dtype,
+                    "left_dtype": str(left.dtype),
                     "right": right,
-                    "right_dtype": right.dtype,
+                    "right_dtype": str(right.dtype),
                 },
             )
         for lnull, rnull in zip(left.isnull(), right.isnull()):
@@ -301,9 +301,9 @@ def _assert_pandas_almost_equal(
                     error_class="DIFFERENT_PANDAS_INDEX",
                     message_parameters={
                         "left": left,
-                        "left_dtype": left.dtype,
+                        "left_dtype": str(left.dtype),
                         "right": right,
-                        "right_dtype": right.dtype,
+                        "right_dtype": str(right.dtype),
                     },
                 )
         for lval, rval in zip(left.dropna(), right.dropna()):
@@ -315,9 +315,9 @@ def _assert_pandas_almost_equal(
                         error_class="DIFFERENT_PANDAS_INDEX",
                         message_parameters={
                             "left": left,
-                            "left_dtype": left.dtype,
+                            "left_dtype": str(left.dtype),
                             "right": right,
-                            "right_dtype": right.dtype,
+                            "right_dtype": str(right.dtype),
                         },
                     )
     else:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to