This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 984e16b60862 [SPARK-53657][PYTHON][TESTS] Enable doctests for 
`GroupedData.agg`
984e16b60862 is described below

commit 984e16b60862a5498a1deefbf2a5514c68e3b29d
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Sep 22 18:59:20 2025 +0800

    [SPARK-53657][PYTHON][TESTS] Enable doctests for `GroupedData.agg`
    
    ### What changes were proposed in this pull request?
    Enable doctests for `GroupedData.agg`, some doctests were skipped due to 
dependency on pandas/pyarrow installations.
    
    ### Why are the changes needed?
    to improve test coverage
    
    ### Does this PR introduce _any_ user-facing change?
    doc-only changes
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #52404 from zhengruifeng/enable_group_agg_doctest.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/sql/connect/group.py |  4 ++++
 python/pyspark/sql/group.py         | 13 +++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/connect/group.py 
b/python/pyspark/sql/connect/group.py
index b7b0473c13ce..04f8f26ecf38 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -583,9 +583,13 @@ def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
     import pyspark.sql.connect.group
+    from pyspark.testing.utils import have_pandas, have_pyarrow
 
     globs = pyspark.sql.connect.group.__dict__.copy()
 
+    if not have_pandas or not have_pyarrow:
+        del pyspark.sql.connect.group.GroupedData.agg.__doc__
+
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.group tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 2e6941e48541..05021aabb50f 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -126,9 +126,8 @@ class GroupedData(PandasGroupedOpsMixin):
 
         Examples
         --------
-        >>> import pandas as pd  # doctest: +SKIP
+        >>> import pandas as pd
         >>> from pyspark.sql import functions as sf
-        >>> from pyspark.sql.functions import pandas_udf
         >>> df = spark.createDataFrame(
         ...      [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")], 
["age", "name"])
         >>> df.show()
@@ -166,11 +165,12 @@ class GroupedData(PandasGroupedOpsMixin):
 
         Same as above but uses pandas UDF.
 
-        >>> @pandas_udf('int')  # doctest: +SKIP
+        >>> from pyspark.sql.functions import pandas_udf
+        >>> @pandas_udf('int')
         ... def min_udf(v: pd.Series) -> int:
         ...     return v.min()
         ...
-        >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show()  # 
doctest: +SKIP
+        >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show()
         +-----+------------+
         | name|min_udf(age)|
         +-----+------------+
@@ -533,8 +533,13 @@ def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
     import pyspark.sql.group
+    from pyspark.testing.utils import have_pandas, have_pyarrow
 
     globs = pyspark.sql.group.__dict__.copy()
+
+    if not have_pandas or not have_pyarrow:
+        del pyspark.sql.group.GroupedData.agg.__doc__
+
     spark = SparkSession.builder.master("local[4]").appName("sql.group 
tests").getOrCreate()
     globs["spark"] = spark
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to