This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 563b3cab749f [SPARK-45259][PYTHON][DOCS] Refine docstring of `count` 563b3cab749f is described below commit 563b3cab749f0104ef399730fe69fa4efd14be84 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Nov 7 09:52:51 2023 -0800 [SPARK-45259][PYTHON][DOCS] Refine docstring of `count` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `count`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43685 from HyukjinKwon/SPARK-45259. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/functions.py | 47 +++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index a32f04164f31..81d120e2ff49 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1162,15 +1162,48 @@ def count(col: "ColumnOrName") -> Column: Examples -------- - Count by all columns (start), and by a column that does not count ``None``. + Example 1: Count all rows in a DataFrame + >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([(None,), ("a",), ("b",), ("c",)], schema=["alphabets"]) - >>> df.select(count(expr("*")), count(df.alphabets)).show() - +--------+----------------+ - |count(1)|count(alphabets)| - +--------+----------------+ - | 4| 3| - +--------+----------------+ + >>> df.select(sf.count(sf.expr("*"))).show() + +--------+ + |count(1)| + +--------+ + | 4| + +--------+ + + Example 2: Count non-null values in a specific column + + >>> from pyspark.sql import functions as sf + >>> df.select(sf.count(df.alphabets)).show() + +----------------+ + |count(alphabets)| + +----------------+ + | 3| + +----------------+ + + Example 3: Count all rows in a DataFrame with multiple columns + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame( + ... [(1, "apple"), (2, "banana"), (3, None)], schema=["id", "fruit"]) + >>> df.select(sf.count(sf.expr("*"))).show() + +--------+ + |count(1)| + +--------+ + | 3| + +--------+ + + Example 4: Count non-null values in multiple columns + + >>> from pyspark.sql import functions as sf + >>> df.select(sf.count(df.id), sf.count(df.fruit)).show() + +---------+------------+ + |count(id)|count(fruit)| + +---------+------------+ + | 3| 2| + +---------+------------+ """ return _invoke_function_over_columns("count", col) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org