This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 200440e1845 [SPARK-44682][PS] Make pandas error class message_parameters strings 200440e1845 is described below commit 200440e18458e84d178d9eda5a2c54bcedc634ee Author: Amanda Liu <amanda....@databricks.com> AuthorDate: Mon Aug 7 12:01:45 2023 +0900 [SPARK-44682][PS] Make pandas error class message_parameters strings This PR converts the types for message_parameters for pandas error classes to string, to ensure ability to compare error class messages in tests. The change ensures the ability to compare error class messages in tests. No, the PR does not affect the user-facing view of the error messages. Updated `python/pyspark/pandas/tests/test_utils.py` and existing tests Closes #42348 from asl3/string-pandas-error-types. Authored-by: Amanda Liu <amanda....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit df8e52d84d1eabf48f68d09491f66a0835f41693) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/tests/test_utils.py | 16 ++++----- python/pyspark/testing/pandasutils.py | 56 +++++++++++++++---------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/python/pyspark/pandas/tests/test_utils.py b/python/pyspark/pandas/tests/test_utils.py index 3d658446f27..0bb03dd8749 100644 --- a/python/pyspark/pandas/tests/test_utils.py +++ b/python/pyspark/pandas/tests/test_utils.py @@ -208,10 +208,10 @@ class UtilsTestsMixin: exception=pe.exception, error_class="DIFFERENT_PANDAS_SERIES", message_parameters={ - "left": series1, - "left_dtype": series1.dtype, - "right": series2, - "right_dtype": series2.dtype, + "left": series1.to_string(), + "left_dtype": str(series1.dtype), + "right": series2.to_string(), + "right_dtype": str(series2.dtype), }, ) @@ -227,9 +227,9 @@ class UtilsTestsMixin: error_class="DIFFERENT_PANDAS_INDEX", message_parameters={ "left": index1, - "left_dtype": index1.dtype, + "left_dtype": str(index1.dtype), "right": index2, - "right_dtype": index2.dtype, + "right_dtype": str(index2.dtype), }, ) @@ -247,9 +247,9 @@ class UtilsTestsMixin: error_class="DIFFERENT_PANDAS_MULTIINDEX", message_parameters={ "left": multiindex1, - "left_dtype": multiindex1.dtype, + "left_dtype": str(multiindex1.dtype), "right": multiindex2, - "right_dtype": multiindex2.dtype, + "right_dtype": str(multiindex1.dtype), }, ) diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py index 58999253521..39196873482 100644 --- a/python/pyspark/testing/pandasutils.py +++ b/python/pyspark/testing/pandasutils.py @@ -124,10 +124,10 @@ def _assert_pandas_equal( raise PySparkAssertionError( error_class="DIFFERENT_PANDAS_SERIES", message_parameters={ - "left": left, - "left_dtype": left.dtype, - "right": right, - "right_dtype": right.dtype, + "left": left.to_string(), + "left_dtype": str(left.dtype), + "right": right.to_string(), + "right_dtype": str(right.dtype), }, ) elif isinstance(left, pd.Index) and isinstance(right, pd.Index): @@ -143,9 +143,9 @@ def _assert_pandas_equal( error_class="DIFFERENT_PANDAS_INDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) else: @@ -228,10 +228,10 @@ def _assert_pandas_almost_equal( raise PySparkAssertionError( error_class="DIFFERENT_PANDAS_SERIES", message_parameters={ - "left": left, - "left_dtype": left.dtype, - "right": right, - "right_dtype": right.dtype, + "left": left.to_string(), + "left_dtype": str(left.dtype), + "right": right.to_string(), + "right_dtype": str(right.dtype), }, ) for lnull, rnull in zip(left.isnull(), right.isnull()): @@ -239,10 +239,10 @@ def _assert_pandas_almost_equal( raise PySparkAssertionError( error_class="DIFFERENT_PANDAS_SERIES", message_parameters={ - "left": left, - "left_dtype": left.dtype, - "right": right, - "right_dtype": right.dtype, + "left": left.to_string(), + "left_dtype": str(left.dtype), + "right": right.to_string(), + "right_dtype": str(right.dtype), }, ) for lval, rval in zip(left.dropna(), right.dropna()): @@ -253,10 +253,10 @@ def _assert_pandas_almost_equal( raise PySparkAssertionError( error_class="DIFFERENT_PANDAS_SERIES", message_parameters={ - "left": left, - "left_dtype": left.dtype, - "right": right, - "right_dtype": right.dtype, + "left": left.to_string(), + "left_dtype": str(left.dtype), + "right": right.to_string(), + "right_dtype": str(right.dtype), }, ) elif isinstance(left, pd.MultiIndex) and isinstance(right, pd.MultiIndex): @@ -265,9 +265,9 @@ def _assert_pandas_almost_equal( error_class="DIFFERENT_PANDAS_MULTIINDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) for lval, rval in zip(left, right): @@ -279,9 +279,9 @@ def _assert_pandas_almost_equal( error_class="DIFFERENT_PANDAS_MULTIINDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) elif isinstance(left, pd.Index) and isinstance(right, pd.Index): @@ -290,9 +290,9 @@ def _assert_pandas_almost_equal( error_class="DIFFERENT_PANDAS_INDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) for lnull, rnull in zip(left.isnull(), right.isnull()): @@ -301,9 +301,9 @@ def _assert_pandas_almost_equal( error_class="DIFFERENT_PANDAS_INDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) for lval, rval in zip(left.dropna(), right.dropna()): @@ -315,9 +315,9 @@ def _assert_pandas_almost_equal( error_class="DIFFERENT_PANDAS_INDEX", message_parameters={ "left": left, - "left_dtype": left.dtype, + "left_dtype": str(left.dtype), "right": right, - "right_dtype": right.dtype, + "right_dtype": str(right.dtype), }, ) else: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org