This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e1949f05b92 [SPARK-42996][CONNECT][PS][ML] Create & assign proper JIRA tickets for all failing tests e1949f05b92 is described below commit e1949f05b92bfce2b3254ed5d57665c6d29a282f Author: itholic <haejoon....@databricks.com> AuthorDate: Tue May 23 09:24:53 2023 +0800 [SPARK-42996][CONNECT][PS][ML] Create & assign proper JIRA tickets for all failing tests ### What changes were proposed in this pull request? This PR proposes to add comments for all pandas-on-Spark with Spark Connect failing tests, with related JIRA tickets. Created all tickets for pandas API on Spark with Spark Connect are here: SPARK-42497 ### Why are the changes needed? To assign appropriate tasks to efficiently fix the failing tests. ### Does this PR introduce _any_ user-facing change? No, it's dev-only. ### How was this patch tested? The existing CI should pass Closes #41255 from itholic/SPARK-42996. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/pandas/internal.py | 1 + .../data_type_ops/test_parity_binary_ops.py | 10 +-- .../data_type_ops/test_parity_boolean_ops.py | 18 +--- .../data_type_ops/test_parity_categorical_ops.py | 14 +-- .../connect/data_type_ops/test_parity_date_ops.py | 18 +--- .../data_type_ops/test_parity_datetime_ops.py | 10 +-- .../connect/data_type_ops/test_parity_null_ops.py | 14 +-- .../connect/data_type_ops/test_parity_num_ops.py | 18 ++-- .../data_type_ops/test_parity_string_ops.py | 18 ++-- .../data_type_ops/test_parity_timedelta_ops.py | 14 +-- .../connect/data_type_ops/test_parity_udt_ops.py | 32 +++++-- .../tests/connect/indexes/test_parity_base.py | 18 ++-- .../tests/connect/indexes/test_parity_category.py | 18 ++-- .../tests/connect/indexes/test_parity_datetime.py | 8 +- .../tests/connect/indexes/test_parity_timedelta.py | 2 +- .../tests/connect/plot/test_parity_frame_plot.py | 4 +- .../plot/test_parity_frame_plot_matplotlib.py | 4 +- .../connect/plot/test_parity_frame_plot_plotly.py | 6 +- .../plot/test_parity_series_plot_matplotlib.py | 12 +-- .../connect/plot/test_parity_series_plot_plotly.py | 4 +- .../tests/connect/test_parity_categorical.py | 16 ++-- .../pandas/tests/connect/test_parity_dataframe.py | 89 ++++++++++-------- .../tests/connect/test_parity_dataframe_slow.py | 66 ++++++++------ .../tests/connect/test_parity_default_index.py | 12 ++- .../pandas/tests/connect/test_parity_ewm.py | 4 +- .../pandas/tests/connect/test_parity_expanding.py | 80 ++++++++++++----- .../tests/connect/test_parity_frame_spark.py | 8 +- .../tests/connect/test_parity_generic_functions.py | 10 ++- .../pandas/tests/connect/test_parity_groupby.py | 50 ++++++++--- .../tests/connect/test_parity_groupby_slow.py | 14 ++- .../pandas/tests/connect/test_parity_internal.py | 2 +- .../pandas/tests/connect/test_parity_namespace.py | 10 ++- .../tests/connect/test_parity_numpy_compat.py | 8 +- .../test_parity_ops_on_diff_frames_groupby.py | 36 +++++--- ..._parity_ops_on_diff_frames_groupby_expanding.py | 28 ++++-- ...st_parity_ops_on_diff_frames_groupby_rolling.py | 28 ++++-- .../connect/test_parity_ops_on_diff_frames_slow.py | 26 ++++-- .../pandas/tests/connect/test_parity_resample.py | 4 +- .../pandas/tests/connect/test_parity_reshape.py | 4 +- .../pandas/tests/connect/test_parity_rolling.py | 80 ++++++++++++----- .../pandas/tests/connect/test_parity_series.py | 100 ++++++++++++++------- .../tests/connect/test_parity_series_string.py | 4 +- .../pandas/tests/connect/test_parity_sql.py | 8 +- .../pandas/tests/connect/test_parity_stats.py | 32 +++++-- 44 files changed, 618 insertions(+), 344 deletions(-) diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index d17d9be5114..1daae6bf07b 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -935,6 +935,7 @@ class InternalFrame: ) return sdf.select(sequential_index.alias(column_name), *scols) + # TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect. @staticmethod def attach_distributed_column(sdf: PySparkDataFrame, column_name: str) -> PySparkDataFrame: scols = [scol_for(sdf, column) for column in sdf.columns] diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py index 71bf32771e5..7d941c4c788 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py @@ -25,23 +25,23 @@ from pyspark.testing.connectutils import ReusedConnectTestCase class BinaryOpsParityTests( BinaryOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43666): Fix BinaryOps.ge to work with Spark Connect Column.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.gt to work with Spark Connect Column.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.le to work with Spark Connect Column.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.lt to work with Spark Connect Column.") def test_lt(self): super().test_lt() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py index 5bd68ce683b..52d517967eb 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py @@ -30,26 +30,10 @@ class BooleanOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_ge(self): - super().test_ge() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_gt(self): - super().test_gt() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_le(self): - super().test_le() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_lt(self): - super().test_lt() - if __name__ == "__main__": from pyspark.pandas.tests.connect.data_type_ops.test_parity_boolean_ops import * # noqa: F401 diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py index be418992d47..dc196060bfc 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py @@ -30,31 +30,31 @@ class CategoricalOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43670): Enable CategoricalOps.eq to work with Spark Connect.") def test_eq(self): super().test_eq() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43671): Enable CategoricalOps.ge to work with Spark Connect.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43672): Enable CategoricalOps.gt to work with Spark Connect.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43673): Enable CategoricalOps.le to work with Spark Connect.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43674): Enable CategoricalOps.lt to work with Spark Connect.") def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43675): Enable CategoricalOps.ne to work with Spark Connect.") def test_ne(self): super().test_ne() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py index 9e9020b2d06..e7b1c7de70d 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py @@ -30,26 +30,10 @@ class DateOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_ge(self): - super().test_ge() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_gt(self): - super().test_gt() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_le(self): - super().test_le() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_lt(self): - super().test_lt() - if __name__ == "__main__": from pyspark.pandas.tests.connect.data_type_ops.test_parity_date_ops import * # noqa: F401 diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py index 4f5be453207..697c191b743 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py @@ -30,23 +30,23 @@ class DatetimeOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43676): Fix DatetimeOps.ge to work with Spark Connect Column.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43677): Fix DatetimeOps.gt to work with Spark Connect Column.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43678): Fix DatetimeOps.le to work with Spark Connect Column.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43679): Fix DatetimeOps.lt to work with Spark Connect Column.") def test_lt(self): super().test_lt() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py index eb97e0f1cb0..00bfb75087a 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py @@ -25,31 +25,31 @@ from pyspark.testing.connectutils import ReusedConnectTestCase class NullOpsParityTests( NullOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43684): Fix NullOps.eq to work with Spark Connect Column.") def test_eq(self): super().test_eq() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43680): Fix NullOps.ge to work with Spark Connect Column.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43681): Fix NullOps.gt to work with Spark Connect Column.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43682): Fix NullOps.le to work with Spark Connect Column.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43683): Fix NullOps.lt to work with Spark Connect Column.") def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43685): Fix NullOps.ne to work with Spark Connect Column.") def test_ne(self): super().test_ne() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py index 4ec71d2598e..1726b9a9e97 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py @@ -30,35 +30,37 @@ class NumOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43686): Enable NumOpsParityTests.test_eq.") def test_eq(self): super().test_eq() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43687): Enable NumOpsParityTests.test_ge.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43688): Enable NumOpsParityTests.test_gt.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43689): Enable NumOpsParityTests.test_le.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43690): Enable NumOpsParityTests.test_lt.") def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43621): Enable pyspark.pandas.spark.functions.repeat in Spark Connect." + ) def test_mul(self): super().test_mul() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43691): Enable NumOpsParityTests.test_ne.") def test_ne(self): super().test_ne() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py index af63790b544..9abfe1d1e09 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py @@ -30,31 +30,35 @@ class StringOpsParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43692): Fix StringOps.ge to work with Spark Connect.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43693): Fix StringOps.gt to work with Spark Connect.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43694): Fix StringOps.le to work with Spark Connect.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43695): Fix StringOps.lt to work with Spark Connect.") def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43621): Enable pyspark.pandas.spark.functions.repeat in Spark Connect." + ) def test_mul(self): super().test_mul() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43621): Enable pyspark.pandas.spark.functions.repeat in Spark Connect." + ) def test_rmul(self): super().test_rmul() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py index 1fdd80d783a..badb32f08b4 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py @@ -25,31 +25,31 @@ from pyspark.testing.connectutils import ReusedConnectTestCase class TimedeltaOpsParityTests( TimedeltaOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43696): Fix TimedeltaOps.ge to work with Spark Connect.") def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43697): Fix TimedeltaOps.gt to work with Spark Connect.") def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43698): Fix TimedeltaOps.le to work with Spark Connect.") def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43699): Fix TimedeltaOps.lt to work with Spark Connect.") def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43700): Fix TimedeltaOps.rsub to work with Spark Connect.") def test_rsub(self): super().test_rsub() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43701): Fix TimedeltaOps.sub to work with Spark Connect.") def test_sub(self): super().test_sub() diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py index 6ea91ce8536..81511829c06 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py @@ -25,35 +25,51 @@ from pyspark.testing.connectutils import ReusedConnectTestCase class UDTOpsParityTests( UDTOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_eq(self): super().test_eq() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_from_to_pandas(self): super().test_from_to_pandas() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_ge(self): super().test_ge() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_gt(self): super().test_gt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_isnull(self): super().test_isnull() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_le(self): super().test_le() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_lt(self): super().test_lt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43702): Fix pyspark.sql.pandas.types.to_arrow_type to work with Spark Connect." + ) def test_ne(self): super().test_ne() diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py index 0582412c87e..d5ae0a39b5b 100644 --- a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py @@ -29,27 +29,33 @@ class IndexesParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_append(self): super().test_append() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_drop_duplicates(self): super().test_drop_duplicates() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_factorize(self): super().test_factorize() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_index_drop_duplicates(self): super().test_index_drop_duplicates() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43703): Enable IndexesParityTests.test_monotonic.") def test_monotonic(self): super().test_monotonic() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43704): Enable IndexesParityTests.test_to_series.") def test_to_series(self): super().test_to_series() diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py index b61c531687f..d99d013306f 100644 --- a/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py @@ -24,39 +24,39 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class CategoricalIndexParityTests( CategoricalIndexTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_append(self): super().test_append() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_factorize(self): super().test_factorize() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_intersection(self): super().test_intersection() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_remove_categories(self): super().test_remove_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_remove_unused_categories(self): super().test_remove_unused_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_reorder_categories(self): super().test_reorder_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_set_categories(self): super().test_set_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_union(self): super().test_union() diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py index 75649f0fb53..aa035d1d608 100644 --- a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py @@ -24,11 +24,15 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DatetimeIndexParityTests( DatetimeIndexTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_indexer_at_time(self): super().test_indexer_at_time() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_indexer_between_time(self): super().test_indexer_between_time() diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py index 2289f24777b..9dadad756c4 100644 --- a/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py @@ -24,7 +24,7 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class TimedeltaIndexParityTests( TimedeltaIndexTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43710): Support functions.date_part for Spark Connect.") def test_properties(self): super().test_properties() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py index db004cd8d86..24392eaf27c 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py @@ -24,11 +24,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class DataFramePlotParityTests( DataFramePlotTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_compute_hist_multi_columns(self): super().test_compute_hist_multi_columns() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_compute_hist_single_column(self): super().test_compute_hist_single_column() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py index 0a6da179c1a..98da8858a03 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py @@ -24,11 +24,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotMatplotlibParityTests( DataFramePlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist_plot(self): super().test_hist_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_kde_plot(self): super().test_kde_plot() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py index a7075b5ab15..7a3efee06df 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py @@ -24,15 +24,15 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotPlotlyParityTests( DataFramePlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist_layout_kwargs(self): super().test_hist_layout_kwargs() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist_plot(self): super().test_hist_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_kde_plot(self): super().test_kde_plot() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py index 69b46ce2f6b..18cdac7a162 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py @@ -24,27 +24,27 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotMatplotlibParityTests( SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist(self): super().test_hist() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist_plot(self): super().test_hist_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_kde_plot(self): super().test_kde_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43712): Enable SeriesPlotMatplotlibParityTests.test_line_plot.") def test_line_plot(self): super().test_line_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43713): Enable SeriesPlotMatplotlibParityTests.test_pie_plot.") def test_pie_plot(self): super().test_pie_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_single_value_hist(self): super().test_single_value_hist() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py index 256f1a555f0..5ced8cb226e 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py @@ -24,11 +24,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotPlotlyParityTests( SeriesPlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43711): Fix Transformer.transform to work with Spark Connect.") def test_hist_plot(self): super().test_hist_plot() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_kde_plot(self): super().test_kde_plot() diff --git a/python/pyspark/pandas/tests/connect/test_parity_categorical.py b/python/pyspark/pandas/tests/connect/test_parity_categorical.py index ef62440e597..3e05eb2c0f3 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_categorical.py +++ b/python/pyspark/pandas/tests/connect/test_parity_categorical.py @@ -29,31 +29,33 @@ class CategoricalParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_astype(self): super().test_astype() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_factorize(self): super().test_factorize() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_remove_categories(self): super().test_remove_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_remove_unused_categories(self): super().test_remove_unused_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_reorder_categories(self): super().test_reorder_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_set_categories(self): super().test_set_categories() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_unstack(self): super().test_unstack() diff --git a/python/pyspark/pandas/tests/connect/test_parity_dataframe.py b/python/pyspark/pandas/tests/connect/test_parity_dataframe.py index 63452f8bd12..c1b9ae2ee11 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_dataframe.py +++ b/python/pyspark/pandas/tests/connect/test_parity_dataframe.py @@ -27,98 +27,117 @@ class DataFrameParityTests(DataFrameTestsMixin, PandasOnSparkTestUtils, ReusedCo def psdf(self): return ps.from_pandas(self.pdf) - # "Spark Connect does not depend on JVM but the tests depend on SparkSession._jvm." - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_aggregate(self): super().test_aggregate() - # TODO(SPARK-41876): Implement DataFrame `toLocalIterator` - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-41876): Implement DataFrame `toLocalIterator`") def test_iterrows(self): super().test_iterrows() - # TODO(SPARK-41876): Implement DataFrame `toLocalIterator` - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-41876): Implement DataFrame `toLocalIterator`") def test_itertuples(self): super().test_itertuples() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummax(self): super().test_cummax() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummax_multiindex_columns(self): super().test_cummax_multiindex_columns() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummin(self): super().test_cummin() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummin_multiindex_columns(self): super().test_cummin_multiindex_columns() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumprod(self): super().test_cumprod() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumprod_multiindex_columns(self): super().test_cumprod_multiindex_columns() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumsum(self): super().test_cumsum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumsum_multiindex_columns(self): super().test_cumsum_multiindex_columns() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_cummax(self): - super().test_cummax() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_cummax(self): - super().test_cummax() - - @unittest.skip("Fails in Spark Connect, should enable.") - def test_cummax(self): - super().test_cummax() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43616): Enable pyspark.pandas.spark.functions.repeat in Spark Connect." + ) def test_binary_operator_multiply(self): super().test_binary_operator_multiply() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43622): Enable pyspark.pandas.spark.functions.var in Spark Connect.") def test_dataframe(self): super().test_dataframe() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_fillna(self): return super().test_fillna() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pivot_table(self): super().test_pivot_table() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pivot_table_dtypes(self): super().test_pivot_table_dtypes() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_reset_index_with_default_index_types(self): super().test_reset_index_with_default_index_types() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_transpose(self): super().test_transpose() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_unstack(self): super().test_unstack() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_append(self): super().test_append() diff --git a/python/pyspark/pandas/tests/connect/test_parity_dataframe_slow.py b/python/pyspark/pandas/tests/connect/test_parity_dataframe_slow.py index 898247da6e3..5876be3b4d0 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_dataframe_slow.py +++ b/python/pyspark/pandas/tests/connect/test_parity_dataframe_slow.py @@ -29,75 +29,87 @@ class DataFrameSlowParityTests( def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_at_time(self): super().test_at_time() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_backfill(self): super().test_backfill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_between_time(self): super().test_between_time() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_bfill(self): super().test_bfill() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_cache(self): - super().test_cache() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43613): Enable pyspark.pandas.spark.functions.covar in Spark Connect." + ) def test_cov(self): super().test_cov() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_diff(self): super().test_diff() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43615): Enable DataFrameSlowParityTests.test_eval.") def test_eval(self): super().test_eval() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_ffill(self): super().test_ffill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43616): Enable pyspark.pandas.spark.functions.mode in Spark Connect." + ) def test_mode(self): super().test_mode() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pad(self): super().test_pad() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pct_change(self): super().test_pct_change() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_persist(self): - super().test_persist() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_product(self): super().test_product() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43618): Fix pyspark.sq.column._unary_op to work with Spark Connect.") def test_rank(self): super().test_rank() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_shift(self): super().test_shift() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_style(self): - super().test_style() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43619): Enable DataFrameSlowParityTests.test_udt.") def test_udt(self): super().test_udt() diff --git a/python/pyspark/pandas/tests/connect/test_parity_default_index.py b/python/pyspark/pandas/tests/connect/test_parity_default_index.py index 2cb5591c923..9d8ebc5eb33 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_default_index.py +++ b/python/pyspark/pandas/tests/connect/test_parity_default_index.py @@ -24,15 +24,21 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class DefaultIndexParityTests( DefaultIndexTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_default_index_distributed(self): super().test_default_index_distributed() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_default_index_sequence(self): super().test_default_index_sequence() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43623): Enable DefaultIndexParityTests.test_index_distributed_sequence_cleanup." + ) def test_index_distributed_sequence_cleanup(self): super().test_index_distributed_sequence_cleanup() diff --git a/python/pyspark/pandas/tests/connect/test_parity_ewm.py b/python/pyspark/pandas/tests/connect/test_parity_ewm.py index 10686f3bdfc..0e13306fd79 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ewm.py +++ b/python/pyspark/pandas/tests/connect/test_parity_ewm.py @@ -22,11 +22,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class EWMParityTests(EWMTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase, TestUtils): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43624): Enable ExponentialMovingLike.mean with Spark Connect.") def test_ewm_mean(self): super().test_ewm_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43624): Enable ExponentialMovingLike.mean with Spark Connect.") def test_groupby_ewm_func(self): super().test_groupby_ewm_func() diff --git a/python/pyspark/pandas/tests/connect/test_parity_expanding.py b/python/pyspark/pandas/tests/connect/test_parity_expanding.py index bdbc29e9e14..d970530db29 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_expanding.py +++ b/python/pyspark/pandas/tests/connect/test_parity_expanding.py @@ -24,83 +24,123 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class ExpandingParityTests( ExpandingTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_count(self): super().test_expanding_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43626): Enable pyspark.pandas.spark.functions.kurt in Spark Connect." + ) def test_expanding_kurt(self): super().test_expanding_kurt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_max(self): super().test_expanding_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_mean(self): super().test_expanding_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_min(self): super().test_expanding_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_quantile(self): super().test_expanding_quantile() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43627): Enable pyspark.pandas.spark.functions.skew in Spark Connect." + ) def test_expanding_skew(self): super().test_expanding_skew() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_std(self): super().test_expanding_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_sum(self): super().test_expanding_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_expanding_var(self): super().test_expanding_var() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_count(self): super().test_groupby_expanding_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43626): Enable pyspark.pandas.spark.functions.kurt in Spark Connect." + ) def test_groupby_expanding_kurt(self): super().test_groupby_expanding_kurt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_max(self): super().test_groupby_expanding_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_mean(self): super().test_groupby_expanding_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_min(self): super().test_groupby_expanding_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_quantile(self): super().test_groupby_expanding_quantile() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43627): Enable pyspark.pandas.spark.functions.skew in Spark Connect." + ) def test_groupby_expanding_skew(self): super().test_groupby_expanding_skew() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_std(self): super().test_groupby_expanding_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_sum(self): super().test_groupby_expanding_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_var(self): super().test_groupby_expanding_var() diff --git a/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py b/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py index 0be30f43860..986ddde4db0 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +++ b/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py @@ -24,19 +24,19 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SparkFrameMethodsParityTests( SparkFrameMethodsTestsMixin, TestUtils, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43628): Enable SparkContext with Spark Connect.") def test_checkpoint(self): super().test_checkpoint() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_coalesce(self): super().test_coalesce() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43630): Implement `localCheckpoint` for Spark Connect DataFrame.") def test_local_checkpoint(self): super().test_local_checkpoint() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43629): Enable RDD with Spark Connect.") def test_repartition(self): super().test_repartition() diff --git a/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py b/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py index 669e078f23c..39934798533 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +++ b/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py @@ -24,15 +24,19 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class GenericFunctionsParityTests( GenericFunctionsTestsMixin, TestUtils, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43631): Enable Series.interpolate with Spark Connect.") def test_interpolate(self): super().test_interpolate() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_prod_precision(self): super().test_prod_precision() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_stat_functions(self): super().test_stat_functions() diff --git a/python/pyspark/pandas/tests/connect/test_parity_groupby.py b/python/pyspark/pandas/tests/connect/test_parity_groupby.py index f6f9c1dac7c..b7a1cfa9e9f 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_groupby.py +++ b/python/pyspark/pandas/tests/connect/test_parity_groupby.py @@ -24,55 +24,79 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class GroupByParityTests( GroupByTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43628): Enable SparkContext with Spark Connect.") def test_apply_with_side_effect(self): super().test_apply_with_side_effect() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43622): Enable pyspark.pandas.spark.functions.covar in Spark Connect." + ) def test_basic_stat_funcs(self): super().test_basic_stat_funcs() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_bfill(self): super().test_bfill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumcount(self): super().test_cumcount() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummax(self): super().test_cummax() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummin(self): super().test_cummin() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumprod(self): super().test_cumprod() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumsum(self): super().test_cumsum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43645): Enable pyspark.pandas.spark.functions.stddev in Spark Connect." + ) def test_ddof(self): super().test_ddof() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_ffill(self): super().test_ffill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_fillna(self): super().test_fillna() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_prod(self): super().test_prod() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_shift(self): super().test_shift() diff --git a/python/pyspark/pandas/tests/connect/test_parity_groupby_slow.py b/python/pyspark/pandas/tests/connect/test_parity_groupby_slow.py index 375dc703d95..797a9622874 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_groupby_slow.py +++ b/python/pyspark/pandas/tests/connect/test_parity_groupby_slow.py @@ -24,19 +24,25 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class GroupBySlowParityTests( GroupBySlowTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_diff(self): super().test_diff() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43645): Enable pyspark.pandas.spark.functions.stddev in Spark Connect." + ) def test_dropna(self): super().test_dropna() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43652): Enable GroupBy.rank with Spark Connect.") def test_rank(self): super().test_rank() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43653): Enable GroupBySlowParityTests.test_split_apply_combine_on_series." + ) def test_split_apply_combine_on_series(self): super().test_split_apply_combine_on_series() diff --git a/python/pyspark/pandas/tests/connect/test_parity_internal.py b/python/pyspark/pandas/tests/connect/test_parity_internal.py index 65147bd3d44..d586fec57f7 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_internal.py +++ b/python/pyspark/pandas/tests/connect/test_parity_internal.py @@ -24,7 +24,7 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class InternalFrameParityTests( InternalFrameTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43654): Enable InternalFrameParityTests.test_from_pandas.") def test_from_pandas(self): super().test_from_pandas() diff --git a/python/pyspark/pandas/tests/connect/test_parity_namespace.py b/python/pyspark/pandas/tests/connect/test_parity_namespace.py index e056c7dee1e..72f638ca23c 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_namespace.py +++ b/python/pyspark/pandas/tests/connect/test_parity_namespace.py @@ -22,15 +22,19 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class NamespaceParityTests(NamespaceTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_concat_index_axis(self): super().test_concat_index_axis() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_concat_multiindex_sort(self): super().test_concat_multiindex_sort() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43655): Enable NamespaceParityTests.test_get_index_map.") def test_get_index_map(self): super().test_get_index_map() diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py index 5544866236d..1bfeda035a4 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py @@ -35,11 +35,15 @@ class NumPyCompatParityTests(NumPyCompatTestsMixin, PandasOnSparkTestUtils, Reus def psdf(self): return ps.from_pandas(self.pdf) - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43656): Fix pyspark.sql.column._to_java_column to accept Connect Column." + ) def test_np_spark_compat_frame(self): super().test_np_spark_compat_frame() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43656): Fix pyspark.sql.column._to_java_column to accept Connect Column." + ) def test_np_spark_compat_series(self): super().test_np_spark_compat_series() diff --git a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py index daeeda53f52..5d6b6a80b9b 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +++ b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py @@ -24,39 +24,51 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class OpsOnDiffFramesGroupByParityTests( OpsOnDiffFramesGroupByTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumcount(self): super().test_cumcount() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummax(self): super().test_cummax() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummin(self): super().test_cummin() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumprod(self): super().test_cumprod() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumsum(self): super().test_cumsum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_diff(self): super().test_diff() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_fillna(self): super().test_fillna() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_fillna(self): - super().test_fillna() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_shift(self): super().test_shift() diff --git a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py index dbb5f00a0c6..90fa36f3b98 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +++ b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py @@ -29,31 +29,45 @@ class OpsOnDiffFramesGroupByExpandingParityTests( TestUtils, ReusedConnectTestCase, ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_count(self): super().test_groupby_expanding_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_min(self): super().test_groupby_expanding_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_max(self): super().test_groupby_expanding_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_mean(self): super().test_groupby_expanding_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_sum(self): super().test_groupby_expanding_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_std(self): super().test_groupby_expanding_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_expanding_var(self): super().test_groupby_expanding_var() diff --git a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py index 910ec2c8bd5..dd82e443256 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +++ b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py @@ -29,31 +29,45 @@ class OpsOnDiffFramesGroupByRollingParityTests( TestUtils, ReusedConnectTestCase, ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_count(self): super().test_groupby_rolling_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_min(self): super().test_groupby_rolling_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_max(self): super().test_groupby_rolling_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_mean(self): super().test_groupby_rolling_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_sum(self): super().test_groupby_rolling_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_std(self): super().test_groupby_rolling_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_var(self): super().test_groupby_rolling_var() diff --git a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_slow.py b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_slow.py index e14686adf30..c9713d1905d 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_slow.py +++ b/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_slow.py @@ -24,31 +24,43 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class OpsOnDiffFramesEnabledSlowParityTests( OpsOnDiffFramesEnabledSlowTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43613): Enable pyspark.pandas.spark.functions.covar in Spark Connect." + ) def test_cov(self): super().test_cov() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_diff(self): super().test_diff() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43658): Fix unexpected `SparkConnectGrpcException` from Spark Connect client." + ) def test_frame_iloc_setitem(self): super().test_frame_iloc_setitem() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43652): Enable GroupBy.rank with Spark Connect.") def test_rank(self): super().test_rank() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43659): Enable OpsOnDiffFramesEnabledSlowParityTests.test_series_eq." + ) def test_series_eq(self): super().test_series_eq() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43658): Fix unexpected `SparkConnectGrpcException` from Spark Connect client." + ) def test_series_iloc_setitem(self): super().test_series_iloc_setitem() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_shift(self): super().test_shift() diff --git a/python/pyspark/pandas/tests/connect/test_parity_resample.py b/python/pyspark/pandas/tests/connect/test_parity_resample.py index cd4b125b1b4..ab3e7f4410b 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_resample.py +++ b/python/pyspark/pandas/tests/connect/test_parity_resample.py @@ -24,11 +24,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class ResampleTestsParityMixin( ResampleTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43660): Enable `resample` with Spark Connect.") def test_dataframe_resample(self): super().test_dataframe_resample() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43660): Enable `resample` with Spark Connect.") def test_series_resample(self): super().test_series_resample() diff --git a/python/pyspark/pandas/tests/connect/test_parity_reshape.py b/python/pyspark/pandas/tests/connect/test_parity_reshape.py index 2d8f856e9ed..e221ff82fa4 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_reshape.py +++ b/python/pyspark/pandas/tests/connect/test_parity_reshape.py @@ -22,11 +22,11 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class ReshapeParityTests(ReshapeTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43661): Enable ReshapeParityTests.test_get_dummies_date_datetime.") def test_get_dummies_date_datetime(self): super().test_get_dummies_date_datetime() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43662): Enable ReshapeParityTests.test_merge_asof.") def test_merge_asof(self): super().test_merge_asof() diff --git a/python/pyspark/pandas/tests/connect/test_parity_rolling.py b/python/pyspark/pandas/tests/connect/test_parity_rolling.py index cb82b4d6dc9..77b56c50b91 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_rolling.py +++ b/python/pyspark/pandas/tests/connect/test_parity_rolling.py @@ -24,83 +24,123 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class RollingParityTests( RollingTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_count(self): super().test_groupby_rolling_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43626): Enable pyspark.pandas.spark.functions.kurt in Spark Connect." + ) def test_groupby_rolling_kurt(self): super().test_groupby_rolling_kurt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_max(self): super().test_groupby_rolling_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_mean(self): super().test_groupby_rolling_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_min(self): super().test_groupby_rolling_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_quantile(self): super().test_groupby_rolling_quantile() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43627): Enable pyspark.pandas.spark.functions.skew in Spark Connect." + ) def test_groupby_rolling_skew(self): super().test_groupby_rolling_skew() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_std(self): super().test_groupby_rolling_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_sum(self): super().test_groupby_rolling_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_groupby_rolling_var(self): super().test_groupby_rolling_var() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_count(self): super().test_rolling_count() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43626): Enable pyspark.pandas.spark.functions.kurt in Spark Connect." + ) def test_rolling_kurt(self): super().test_rolling_kurt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_max(self): super().test_rolling_max() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_mean(self): super().test_rolling_mean() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_min(self): super().test_rolling_min() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_quantile(self): super().test_rolling_quantile() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43627): Enable pyspark.pandas.spark.functions.skew in Spark Connect." + ) def test_rolling_skew(self): super().test_rolling_skew() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_std(self): super().test_rolling_std() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_sum(self): super().test_rolling_sum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_rolling_var(self): super().test_rolling_var() diff --git a/python/pyspark/pandas/tests/connect/test_parity_series.py b/python/pyspark/pandas/tests/connect/test_parity_series.py index b1b5da3f69f..35a9ba1fca4 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_series.py +++ b/python/pyspark/pandas/tests/connect/test_parity_series.py @@ -22,111 +22,151 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class SeriesParityTests(SeriesTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_argsort(self): super().test_argsort() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_asof(self): super().test_asof() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_at_time(self): super().test_at_time() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_backfill(self): super().test_backfill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43610): Enable `InternalFrame.attach_distributed_column` in Spark Connect." + ) def test_between_time(self): super().test_between_time() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_bfill(self): super().test_bfill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43663): Enable SeriesParityTests.test_compare.") def test_compare(self): super().test_compare() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43613): Enable pyspark.pandas.spark.functions.covar in Spark Connect." + ) def test_cov(self): super().test_cov() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummax(self): super().test_cummax() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cummin(self): super().test_cummin() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumprod(self): super().test_cumprod() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_cumsum(self): super().test_cumsum() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_diff(self): super().test_diff() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43620): Support `Column` for SparkConnectColumn.__getitem__.") def test_factorize(self): super().test_factorize() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_ffill(self): super().test_ffill() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_fillna(self): super().test_fillna() - @unittest.skip("Fails in Spark Connect, should enable.") - def test_iteritems(self): - super().test_iteritems() - - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43616): Enable pyspark.pandas.spark.functions.mode in Spark Connect." + ) def test_mode(self): super().test_mode() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pad(self): super().test_pad() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_pct_change(self): super().test_pct_change() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_product(self): super().test_product() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43618): Fix pyspark.sq.column._unary_op to work with Spark Connect.") def test_rank(self): super().test_rank() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_replace(self): super().test_replace() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_reset_index_with_default_index_types(self): super().test_reset_index_with_default_index_types() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_shift(self): super().test_shift() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43619): Enable DataFrameSlowParityTests.test_udt.") def test_udt(self): super().test_udt() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43611): Fix unexpected `AnalysisException` from Spark Connect client." + ) def test_unstack(self): super().test_unstack() diff --git a/python/pyspark/pandas/tests/connect/test_parity_series_string.py b/python/pyspark/pandas/tests/connect/test_parity_series_string.py index 9f170a65494..c8e529d9df0 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_series_string.py +++ b/python/pyspark/pandas/tests/connect/test_parity_series_string.py @@ -24,7 +24,9 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class SeriesStringParityTests( SeriesStringTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43621): Enable pyspark.pandas.spark.functions.repeat in Spark Connect." + ) def test_string_repeat(self): super().test_string_repeat() diff --git a/python/pyspark/pandas/tests/connect/test_parity_sql.py b/python/pyspark/pandas/tests/connect/test_parity_sql.py index 5afda98929f..6c2979f785a 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_sql.py +++ b/python/pyspark/pandas/tests/connect/test_parity_sql.py @@ -22,15 +22,17 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class SQLParityTests(SQLTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43664): Fix TABLE_OR_VIEW_NOT_FOUND from SQLParityTests.") def test_sql_with_index_col(self): super().test_sql_with_index_col() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip("TODO(SPARK-43664): Fix TABLE_OR_VIEW_NOT_FOUND from SQLParityTests.") def test_sql_with_pandas_on_spark_objects(self): super().test_sql_with_pandas_on_spark_objects() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43665): Enable PandasSQLStringFormatter.vformat to work with Spark Connect." + ) def test_sql_with_python_objects(self): super().test_sql_with_python_objects() diff --git a/python/pyspark/pandas/tests/connect/test_parity_stats.py b/python/pyspark/pandas/tests/connect/test_parity_stats.py index 0b354b58953..d9a8bf8e31a 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_stats.py +++ b/python/pyspark/pandas/tests/connect/test_parity_stats.py @@ -22,35 +22,51 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class StatsParityTests(StatsTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43626): Enable pyspark.pandas.spark.functions.kurt in Spark Connect." + ) def test_axis_on_dataframe(self): super().test_axis_on_dataframe() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_product(self): super().test_product() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43627): Enable pyspark.pandas.spark.functions.skew in Spark Connect." + ) def test_skew_kurt_numerical_stability(self): super().test_skew_kurt_numerical_stability() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43645): Enable pyspark.pandas.spark.functions.stddev in Spark Connect." + ) def test_stat_functions(self): super().test_stat_functions() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43645): Enable pyspark.pandas.spark.functions.stddev in Spark Connect." + ) def test_stat_functions_multiindex_column(self): super().test_stat_functions_multiindex_column() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_stats_on_boolean_dataframe(self): super().test_stats_on_boolean_dataframe() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_stats_on_boolean_series(self): super().test_stats_on_boolean_series() - @unittest.skip("Fails in Spark Connect, should enable.") + @unittest.skip( + "TODO(SPARK-43617): Enable pyspark.pandas.spark.functions.product in Spark Connect." + ) def test_stats_on_non_numeric_columns_should_be_discarded_if_numeric_only_is_true(self): super().test_stats_on_non_numeric_columns_should_be_discarded_if_numeric_only_is_true() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org