This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9accb5c539f [SPARK-44363][PYTHON] Display percent of unequal rows in DataFrame comparison 9accb5c539f is described below commit 9accb5c539f6783c3e9e0147f2199ea370af26c4 Author: Amanda Liu <amanda....@databricks.com> AuthorDate: Tue Jul 11 12:49:10 2023 +0900 [SPARK-44363][PYTHON] Display percent of unequal rows in DataFrame comparison ### What changes were proposed in this pull request? This PR fixes error message display of percent of unequal rows for unequal DataFrames, in the `assertDataFrameEqual` util function. ### Why are the changes needed? The correction is needed to provide accurate error message output. ### Does this PR introduce _any_ user-facing change? Yes, the PR modifies user-facing error message for the `assertDataFrameEqual` util function. ### How was this patch tested? Modified existing tests in `runtime/python/pyspark/sql/tests/test_utils.py` and `runtime/python/pyspark/sql/tests/connect/test_utils.py` Closes #41926 from asl3/fix-percent-diff. Authored-by: Amanda Liu <amanda....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/test_utils.py | 8 ++++---- python/pyspark/testing/utils.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index 6666fa64858..1757d8dd2e1 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -142,7 +142,7 @@ class UtilsTestsMixin: ) expected_error_message = "Results do not match: " - percent_diff = 1 / 2 + percent_diff = (1 / 2) * 100 expected_error_message += "( %.5f %% )" % percent_diff diff_msg = ( "[df]" @@ -457,7 +457,7 @@ class UtilsTestsMixin: ) expected_error_message = "Results do not match: " - percent_diff = 1 / 2 + percent_diff = (1 / 2) * 100 expected_error_message += "( %.5f %% )" % percent_diff diff_msg = ( "[df]" @@ -553,7 +553,7 @@ class UtilsTestsMixin: ) expected_error_message = "Results do not match: " - percent_diff = 2 / 2 + percent_diff = (2 / 2) * 100 expected_error_message += "( %.5f %% )" % percent_diff diff_msg = ( "[df]" @@ -641,7 +641,7 @@ class UtilsTestsMixin: ) expected_error_message = "Results do not match: " - percent_diff = 2 / 3 + percent_diff = (2 / 3) * 100 expected_error_message += "( %.5f %% )" % percent_diff diff_msg = ( "[df]" diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py index c6ec6adc8af..651d57bb11d 100644 --- a/python/pyspark/testing/utils.py +++ b/python/pyspark/testing/utils.py @@ -239,7 +239,7 @@ def assertDataFrameEqual(df: DataFrame, expected: DataFrame, check_row_order: bo The expected result of the operation, for comparison with the actual result. check_row_order : bool, optional - A flag indicates whether the order of rows should be considered in the comparison. + A flag indicating whether the order of rows should be considered in the comparison. If set to `False` (default), the row order is not taken into account. If set to `True`, the order of rows is important and will be checked during comparison. @@ -258,7 +258,7 @@ def assertDataFrameEqual(df: DataFrame, expected: DataFrame, check_row_order: bo >>> assertDataFrameEqual(df1, df2) # fail # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... - PySparkAssertionError: [DIFFERENT_ROWS] Results do not match: ( 0.66667 % ) + PySparkAssertionError: [DIFFERENT_ROWS] Results do not match: ( 66.667 % ) [df] Row(id='1', amount=1000.0) <BLANKLINE> @@ -370,7 +370,7 @@ def assertDataFrameEqual(df: DataFrame, expected: DataFrame, check_row_order: bo diff_msg += "********************" + "\n\n" if not rows_equal: - percent_diff = diff_rows_cnt / len(zipped) + percent_diff = (diff_rows_cnt / len(zipped)) * 100 error_msg += "( %.5f %% )" % percent_diff error_msg += "\n" + diff_msg raise PySparkAssertionError( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org