This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5d1f976f85fe [SPARK-47969][PYTHON][TESTS][FOLLOWUP] Make Test `test_creation_index` deterministic 5d1f976f85fe is described below commit 5d1f976f85fe1ee39ca3cc4f0f2e6afa8b43e5ea Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Fri May 3 20:42:30 2024 -0700 [SPARK-47969][PYTHON][TESTS][FOLLOWUP] Make Test `test_creation_index` deterministic ### What changes were proposed in this pull request? followup https://github.com/apache/spark/pull/46200 ### Why are the changes needed? there is still non-deterministic codes in this test: ``` Traceback (most recent call last): File "/home/jenkins/python/pyspark/testing/pandasutils.py", line 91, in _assert_pandas_equal assert_frame_equal( File "/databricks/python3/lib/python3.11/site-packages/pandas/_testing/asserters.py", line 1257, in assert_frame_equal assert_index_equal( File "/databricks/python3/lib/python3.11/site-packages/pandas/_testing/asserters.py", line 407, in assert_index_equal raise_assert_detail(obj, msg, left, right) File "/databricks/python3/lib/python3.11/site-packages/pandas/_testing/asserters.py", line 665, in raise_assert_detail raise AssertionError(msg) AssertionError: DataFrame.index are different DataFrame.index values are different (75.0 %) [left]: DatetimeIndex(['2022-09-02', '2022-09-03', '2022-08-31', '2022-09-05'], dtype='datetime64[ns]', freq=None) [right]: DatetimeIndex(['2022-08-31', '2022-09-02', '2022-09-03', '2022-09-05'], dtype='datetime64[ns]', freq=None) ``` ### Does this PR introduce _any_ user-facing change? no, test only ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46378 from zhengruifeng/ps_test_create_index. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/pandas/tests/frame/test_constructor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/pyspark/pandas/tests/frame/test_constructor.py b/python/pyspark/pandas/tests/frame/test_constructor.py index d7581895c6c9..e093adfa7ba3 100644 --- a/python/pyspark/pandas/tests/frame/test_constructor.py +++ b/python/pyspark/pandas/tests/frame/test_constructor.py @@ -269,11 +269,11 @@ class FrameConstructorMixin: ps.DataFrame( data=pdf, index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]), - ), + ).sort_index(), pd.DataFrame( data=pdf, index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]), - ), + ).sort_index(), ) # test with pd.DataFrame and ps.DatetimeIndex @@ -281,11 +281,11 @@ class FrameConstructorMixin: ps.DataFrame( data=pdf, index=ps.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]), - ), + ).sort_index(), pd.DataFrame( data=pdf, index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]), - ), + ).sort_index(), ) with ps.option_context("compute.ops_on_diff_frames", True): @@ -296,13 +296,13 @@ class FrameConstructorMixin: index=pd.DatetimeIndex( ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"] ), - ), + ).sort_index(), pd.DataFrame( data=pdf, index=pd.DatetimeIndex( ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"] ), - ), + ).sort_index(), ) # test with ps.DataFrame and ps.DatetimeIndex @@ -312,13 +312,13 @@ class FrameConstructorMixin: index=ps.DatetimeIndex( ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"] ), - ), + ).sort_index(), pd.DataFrame( data=pdf, index=pd.DatetimeIndex( ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"] ), - ), + ).sort_index(), ) # test MultiIndex --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org