This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new dab20b31388b [SPARK-48057][PYTHON][CONNECT][TESTS] Enable `GroupedApplyInPandasTests.test_grouped_with_empty_partition` dab20b31388b is described below commit dab20b31388ba7bcd2ab4d4424cbbd072bf84c30 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue Apr 30 12:19:18 2024 -0700 [SPARK-48057][PYTHON][CONNECT][TESTS] Enable `GroupedApplyInPandasTests.test_grouped_with_empty_partition` ### What changes were proposed in this pull request? Enable `GroupedApplyInPandasTests. test_grouped_with_empty_partition` ### Why are the changes needed? test coverage ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46299 from zhengruifeng/fix_test_grouped_with_empty_partition. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py | 4 ---- python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py index 1cc4ce012623..8a1da440c799 100644 --- a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py @@ -38,10 +38,6 @@ class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedConnectTes def test_apply_in_pandas_returning_incompatible_type(self): super().test_apply_in_pandas_returning_incompatible_type() - @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.") - def test_grouped_with_empty_partition(self): - super().test_grouped_with_empty_partition() - if __name__ == "__main__": from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import * # noqa: F401 diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py index f43dafc0a4a1..1e86e12eb74f 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py @@ -680,13 +680,13 @@ class GroupedApplyInPandasTestsMixin: data = [Row(id=1, x=2), Row(id=1, x=3), Row(id=2, x=4)] expected = [Row(id=1, x=5), Row(id=1, x=5), Row(id=2, x=4)] num_parts = len(data) + 1 - df = self.spark.createDataFrame(self.sc.parallelize(data, numSlices=num_parts)) + df = self.spark.createDataFrame(data).repartition(num_parts) f = pandas_udf( lambda pdf: pdf.assign(x=pdf["x"].sum()), "id long, x int", PandasUDFType.GROUPED_MAP ) - result = df.groupBy("id").apply(f).collect() + result = df.groupBy("id").apply(f).sort("id").collect() self.assertEqual(result, expected) def test_grouped_over_window(self): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org