This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e94cb2bcfde [SPARK-41927][CONNECT][PYTHON] Add the unsupported list for `GroupedData` e94cb2bcfde is described below commit e94cb2bcfde8b24d08be314dea73c435208adcd1 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Sat Jan 7 13:52:11 2023 +0900 [SPARK-41927][CONNECT][PYTHON] Add the unsupported list for `GroupedData` ### What changes were proposed in this pull request? Add the unsupported list for `GroupedData`, they are from [PandasGroupedOpsMixin](https://github.com/apache/spark/blob/0eaa8e1e76ab6ecdd3b51d751857e50530ccdeb6/python/pyspark/sql/pandas/group_ops.py#L37) ### Why are the changes needed? to explictly tell users they are not implemented ### Does this PR introduce _any_ user-facing change? yes, NotImplementedError ### How was this patch tested? added UT Closes #39437 from zhengruifeng/connect_group_unsupported. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/group.py | 12 ++++++++++++ python/pyspark/sql/tests/connect/test_connect_basic.py | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py index c65167742ec..df73f5b6fa2 100644 --- a/python/pyspark/sql/connect/group.py +++ b/python/pyspark/sql/connect/group.py @@ -206,6 +206,18 @@ class GroupedData: pivot.__doc__ = PySparkGroupedData.pivot.__doc__ + def apply(self, *args: Any, **kwargs: Any) -> None: + raise NotImplementedError("apply() is not implemented.") + + def applyInPandas(self, *args: Any, **kwargs: Any) -> None: + raise NotImplementedError("applyInPandas() is not implemented.") + + def applyInPandasWithState(self, *args: Any, **kwargs: Any) -> None: + raise NotImplementedError("applyInPandasWithState() is not implemented.") + + def cogroup(self, *args: Any, **kwargs: Any) -> None: + raise NotImplementedError("cogroup() is not implemented.") + GroupedData.__doc__ = PySparkGroupedData.__doc__ diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 31a7e6fdbad..8977c5ac310 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -2027,6 +2027,18 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): with self.assertRaises(NotImplementedError): getattr(df, f)() + def test_unsupported_group_functions(self): + # SPARK-41927: Disable unsupported functions. + cg = self.connect.read.table(self.tbl_name).groupBy("id") + for f in ( + "apply", + "applyInPandas", + "applyInPandasWithState", + "cogroup", + ): + with self.assertRaises(NotImplementedError): + getattr(cg, f)() + @unittest.skipIf(not should_test_connect, connect_requirement_message) class ChannelBuilderTests(ReusedPySparkTestCase): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org