This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b095960c7290 [SPARK-46726][PS][TESTS] Rebalance `pyspark_pandas_connect_part?` b095960c7290 is described below commit b095960c7290d85b6433dcf33b719332c082008c Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon Jan 15 21:30:19 2024 +0800 [SPARK-46726][PS][TESTS] Rebalance `pyspark_pandas_connect_part?` ### What changes were proposed in this pull request? Rebalance `pyspark_pandas_connect_part?` ### Why are the changes needed? for testing parallelism before: https://github.com/apache/spark/actions/runs/7527560858/job/20487999563 `pyspark_pandas_connect_part0`: `Tests passed in 3979 seconds` `pyspark_pandas_connect_part1`: `Tests passed in 3585 seconds` `pyspark_pandas_connect_part2`: `Tests passed in 2724 seconds` `pyspark_pandas_connect_part3`: `Tests passed in 3276 seconds` the difference is about 20 min after: `pyspark_pandas_connect_part0`: `Tests passed in 3516 seconds` `pyspark_pandas_connect_part1`: `Tests passed in 3228 seconds` `pyspark_pandas_connect_part2`: `Tests passed in 3760 seconds` `pyspark_pandas_connect_part3`: `Tests passed in 3195 seconds` the difference is about 5 min ### Does this PR introduce _any_ user-facing change? no. test-only ### How was this patch tested? ci, https://github.com/zhengruifeng/spark/actions/runs/7527236548/job/20488637410 ### Was this patch authored or co-authored using generative AI tooling? no Closes #44741 from zhengruifeng/ps_test_rebalance_pandas_connect. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- dev/sparktestsupport/modules.py | 86 ++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 202263febc93..ad164b1a8636 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1059,6 +1059,21 @@ pyspark_pandas_connect_part0 = Module( ], python_test_goals=[ # pandas-on-Spark unittests + "pyspark.pandas.tests.connect.test_parity_categorical", + "pyspark.pandas.tests.connect.test_parity_config", + "pyspark.pandas.tests.connect.test_parity_extension", + "pyspark.pandas.tests.connect.test_parity_frame_spark", + "pyspark.pandas.tests.connect.test_parity_generic_functions", + "pyspark.pandas.tests.connect.test_parity_indexops_spark", + "pyspark.pandas.tests.connect.test_parity_internal", + "pyspark.pandas.tests.connect.test_parity_namespace", + "pyspark.pandas.tests.connect.test_parity_numpy_compat", + "pyspark.pandas.tests.connect.test_parity_repr", + "pyspark.pandas.tests.connect.test_parity_scalars", + "pyspark.pandas.tests.connect.test_parity_spark_functions", + "pyspark.pandas.tests.connect.test_parity_sql", + "pyspark.pandas.tests.connect.test_parity_typedef", + "pyspark.pandas.tests.connect.test_parity_utils", "pyspark.pandas.tests.connect.data_type_ops.test_parity_as_type", "pyspark.pandas.tests.connect.data_type_ops.test_parity_base", "pyspark.pandas.tests.connect.data_type_ops.test_parity_binary_ops", @@ -1073,37 +1088,15 @@ pyspark_pandas_connect_part0 = Module( "pyspark.pandas.tests.connect.data_type_ops.test_parity_string_ops", "pyspark.pandas.tests.connect.data_type_ops.test_parity_udt_ops", "pyspark.pandas.tests.connect.data_type_ops.test_parity_timedelta_ops", - "pyspark.pandas.tests.connect.indexes.test_parity_category", - "pyspark.pandas.tests.connect.indexes.test_parity_timedelta", "pyspark.pandas.tests.connect.plot.test_parity_frame_plot", "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_matplotlib", "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_plotly", "pyspark.pandas.tests.connect.plot.test_parity_series_plot", "pyspark.pandas.tests.connect.plot.test_parity_series_plot_matplotlib", "pyspark.pandas.tests.connect.plot.test_parity_series_plot_plotly", - "pyspark.pandas.tests.connect.test_parity_categorical", - "pyspark.pandas.tests.connect.test_parity_config", "pyspark.pandas.tests.connect.indexes.test_parity_default", - "pyspark.pandas.tests.connect.test_parity_extension", - "pyspark.pandas.tests.connect.test_parity_frame_spark", - "pyspark.pandas.tests.connect.test_parity_generic_functions", - "pyspark.pandas.tests.connect.test_parity_indexops_spark", - "pyspark.pandas.tests.connect.test_parity_internal", - "pyspark.pandas.tests.connect.test_parity_namespace", - "pyspark.pandas.tests.connect.test_parity_numpy_compat", - "pyspark.pandas.tests.connect.test_parity_repr", - "pyspark.pandas.tests.connect.resample.test_parity_error", - "pyspark.pandas.tests.connect.resample.test_parity_missing", - "pyspark.pandas.tests.connect.resample.test_parity_on", - "pyspark.pandas.tests.connect.resample.test_parity_timezone", - "pyspark.pandas.tests.connect.test_parity_scalars", - "pyspark.pandas.tests.connect.series.test_parity_datetime", - "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv", - "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic", - "pyspark.pandas.tests.connect.test_parity_spark_functions", - "pyspark.pandas.tests.connect.test_parity_sql", - "pyspark.pandas.tests.connect.test_parity_typedef", - "pyspark.pandas.tests.connect.test_parity_utils", + "pyspark.pandas.tests.connect.indexes.test_parity_category", + "pyspark.pandas.tests.connect.indexes.test_parity_timedelta", "pyspark.pandas.tests.connect.indexes.test_parity_basic", "pyspark.pandas.tests.connect.indexes.test_parity_getattr", "pyspark.pandas.tests.connect.indexes.test_parity_name", @@ -1146,11 +1139,6 @@ pyspark_pandas_connect_part0 = Module( "pyspark.pandas.tests.connect.computation.test_parity_describe", "pyspark.pandas.tests.connect.computation.test_parity_eval", "pyspark.pandas.tests.connect.computation.test_parity_melt", - "pyspark.pandas.tests.connect.frame.test_parity_attrs", - "pyspark.pandas.tests.connect.frame.test_parity_axis", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_frame", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_series", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_error", ], excluded_python_implementations=[ "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and @@ -1166,6 +1154,8 @@ pyspark_pandas_connect_part1 = Module( ], python_test_goals=[ # pandas-on-Spark unittests + "pyspark.pandas.tests.connect.frame.test_parity_attrs", + "pyspark.pandas.tests.connect.frame.test_parity_axis", "pyspark.pandas.tests.connect.frame.test_parity_constructor", "pyspark.pandas.tests.connect.frame.test_parity_conversion", "pyspark.pandas.tests.connect.frame.test_parity_reindexing", @@ -1183,14 +1173,9 @@ pyspark_pandas_connect_part1 = Module( "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_adv", "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_basic", "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_min_max", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_align", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic_slow", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_cov", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_corrwith", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_index", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series", + "pyspark.pandas.tests.connect.series.test_parity_datetime", + "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv", + "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic", "pyspark.pandas.tests.connect.series.test_parity_all_any", "pyspark.pandas.tests.connect.series.test_parity_arg_ops", "pyspark.pandas.tests.connect.series.test_parity_as_of", @@ -1203,6 +1188,7 @@ pyspark_pandas_connect_part1 = Module( "pyspark.pandas.tests.connect.series.test_parity_series", "pyspark.pandas.tests.connect.series.test_parity_sort", "pyspark.pandas.tests.connect.series.test_parity_stat", + "pyspark.pandas.tests.connect.series.test_parity_interpolate", "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic", "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod", "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div", @@ -1235,15 +1221,15 @@ pyspark_pandas_connect_part2 = Module( "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx", "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv", "pyspark.pandas.tests.connect.computation.test_parity_stats", - "pyspark.pandas.tests.connect.indexes.test_parity_append", - "pyspark.pandas.tests.connect.indexes.test_parity_intersection", - "pyspark.pandas.tests.connect.indexes.test_parity_monotonic", - "pyspark.pandas.tests.connect.indexes.test_parity_union", + "pyspark.pandas.tests.connect.computation.test_parity_missing_data", "pyspark.pandas.tests.connect.frame.test_parity_interpolate", "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error", - "pyspark.pandas.tests.connect.series.test_parity_interpolate", "pyspark.pandas.tests.connect.resample.test_parity_frame", "pyspark.pandas.tests.connect.resample.test_parity_series", + "pyspark.pandas.tests.connect.resample.test_parity_error", + "pyspark.pandas.tests.connect.resample.test_parity_missing", + "pyspark.pandas.tests.connect.resample.test_parity_on", + "pyspark.pandas.tests.connect.resample.test_parity_timezone", "pyspark.pandas.tests.connect.window.test_parity_ewm_error", "pyspark.pandas.tests.connect.window.test_parity_ewm_mean", "pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean", @@ -1263,7 +1249,17 @@ pyspark_pandas_connect_part2 = Module( "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_adv", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_count", - "pyspark.pandas.tests.connect.computation.test_parity_missing_data", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_frame", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_dot_series", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_error", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_align", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic_slow", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_cov", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_corrwith", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_index", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series", "pyspark.pandas.tests.connect.groupby.test_parity_index", "pyspark.pandas.tests.connect.groupby.test_parity_describe", "pyspark.pandas.tests.connect.groupby.test_parity_head_tail", @@ -1301,6 +1297,10 @@ pyspark_pandas_connect_part3 = Module( "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof", "pyspark.pandas.tests.connect.groupby.test_parity_stat_func", "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod", + "pyspark.pandas.tests.connect.indexes.test_parity_append", + "pyspark.pandas.tests.connect.indexes.test_parity_intersection", + "pyspark.pandas.tests.connect.indexes.test_parity_monotonic", + "pyspark.pandas.tests.connect.indexes.test_parity_union", "pyspark.pandas.tests.connect.indexes.test_parity_datetime", "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at", "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org