This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e94cb2bcfde [SPARK-41927][CONNECT][PYTHON] Add the unsupported list 
for `GroupedData`
e94cb2bcfde is described below

commit e94cb2bcfde8b24d08be314dea73c435208adcd1
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Sat Jan 7 13:52:11 2023 +0900

    [SPARK-41927][CONNECT][PYTHON] Add the unsupported list for `GroupedData`
    
    ### What changes were proposed in this pull request?
    Add the unsupported list for `GroupedData`, they are from 
[PandasGroupedOpsMixin](https://github.com/apache/spark/blob/0eaa8e1e76ab6ecdd3b51d751857e50530ccdeb6/python/pyspark/sql/pandas/group_ops.py#L37)
    
    ### Why are the changes needed?
    to explictly tell users they are not implemented
    
    ### Does this PR introduce _any_ user-facing change?
    yes, NotImplementedError
    
    ### How was this patch tested?
    added UT
    
    Closes #39437 from zhengruifeng/connect_group_unsupported.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/connect/group.py                    | 12 ++++++++++++
 python/pyspark/sql/tests/connect/test_connect_basic.py | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/python/pyspark/sql/connect/group.py 
b/python/pyspark/sql/connect/group.py
index c65167742ec..df73f5b6fa2 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -206,6 +206,18 @@ class GroupedData:
 
     pivot.__doc__ = PySparkGroupedData.pivot.__doc__
 
+    def apply(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("apply() is not implemented.")
+
+    def applyInPandas(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("applyInPandas() is not implemented.")
+
+    def applyInPandasWithState(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("applyInPandasWithState() is not 
implemented.")
+
+    def cogroup(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("cogroup() is not implemented.")
+
 
 GroupedData.__doc__ = PySparkGroupedData.__doc__
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py 
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 31a7e6fdbad..8977c5ac310 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -2027,6 +2027,18 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
             with self.assertRaises(NotImplementedError):
                 getattr(df, f)()
 
+    def test_unsupported_group_functions(self):
+        # SPARK-41927: Disable unsupported functions.
+        cg = self.connect.read.table(self.tbl_name).groupBy("id")
+        for f in (
+            "apply",
+            "applyInPandas",
+            "applyInPandasWithState",
+            "cogroup",
+        ):
+            with self.assertRaises(NotImplementedError):
+                getattr(cg, f)()
+
 
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class ChannelBuilderTests(ReusedPySparkTestCase):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to