This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 984e16b60862 [SPARK-53657][PYTHON][TESTS] Enable doctests for
`GroupedData.agg`
984e16b60862 is described below
commit 984e16b60862a5498a1deefbf2a5514c68e3b29d
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Sep 22 18:59:20 2025 +0800
[SPARK-53657][PYTHON][TESTS] Enable doctests for `GroupedData.agg`
### What changes were proposed in this pull request?
Enable doctests for `GroupedData.agg`, some doctests were skipped due to
dependency on pandas/pyarrow installations.
### Why are the changes needed?
to improve test coverage
### Does this PR introduce _any_ user-facing change?
doc-only changes
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #52404 from zhengruifeng/enable_group_agg_doctest.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/connect/group.py | 4 ++++
python/pyspark/sql/group.py | 13 +++++++++----
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/connect/group.py
b/python/pyspark/sql/connect/group.py
index b7b0473c13ce..04f8f26ecf38 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -583,9 +583,13 @@ def _test() -> None:
import doctest
from pyspark.sql import SparkSession as PySparkSession
import pyspark.sql.connect.group
+ from pyspark.testing.utils import have_pandas, have_pyarrow
globs = pyspark.sql.connect.group.__dict__.copy()
+ if not have_pandas or not have_pyarrow:
+ del pyspark.sql.connect.group.GroupedData.agg.__doc__
+
globs["spark"] = (
PySparkSession.builder.appName("sql.connect.group tests")
.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 2e6941e48541..05021aabb50f 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -126,9 +126,8 @@ class GroupedData(PandasGroupedOpsMixin):
Examples
--------
- >>> import pandas as pd # doctest: +SKIP
+ >>> import pandas as pd
>>> from pyspark.sql import functions as sf
- >>> from pyspark.sql.functions import pandas_udf
>>> df = spark.createDataFrame(
... [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")],
["age", "name"])
>>> df.show()
@@ -166,11 +165,12 @@ class GroupedData(PandasGroupedOpsMixin):
Same as above but uses pandas UDF.
- >>> @pandas_udf('int') # doctest: +SKIP
+ >>> from pyspark.sql.functions import pandas_udf
+ >>> @pandas_udf('int')
... def min_udf(v: pd.Series) -> int:
... return v.min()
...
- >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show() #
doctest: +SKIP
+ >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show()
+-----+------------+
| name|min_udf(age)|
+-----+------------+
@@ -533,8 +533,13 @@ def _test() -> None:
import doctest
from pyspark.sql import SparkSession
import pyspark.sql.group
+ from pyspark.testing.utils import have_pandas, have_pyarrow
globs = pyspark.sql.group.__dict__.copy()
+
+ if not have_pandas or not have_pyarrow:
+ del pyspark.sql.group.GroupedData.agg.__doc__
+
spark = SparkSession.builder.master("local[4]").appName("sql.group
tests").getOrCreate()
globs["spark"] = spark
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]