This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 68c47d5e74b [SPARK-40010][PYTHON][DOCS][FOLLOWUP] Make pyspark.sql.window examples self-contained (part 2) 68c47d5e74b is described below commit 68c47d5e74b8be481318388b3cc8b40ead35beea Author: Qian.Sun <qian.sun2...@gmail.com> AuthorDate: Fri Aug 26 14:17:07 2022 +0900 [SPARK-40010][PYTHON][DOCS][FOLLOWUP] Make pyspark.sql.window examples self-contained (part 2) ### What changes were proposed in this pull request? As mentioned [here](https://issues.apache.org/jira/browse/SPARK-40148), we need have examples for several API such as orderBy in `pyspark.sql.window`. ### Why are the changes needed? To make the documentation more readable and able to copy and paste directly in PySpark shell. ### Does this PR introduce _any_ user-facing change? Yes, Documentation changes only. ### How was this patch tested? Github Action about python doctest. Closes #37657 from dcoliversun/SPARK-40010. Authored-by: Qian.Sun <qian.sun2...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/window.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py index 7bb59f36289..898cdfec14a 100644 --- a/python/pyspark/sql/window.py +++ b/python/pyspark/sql/window.py @@ -79,6 +79,44 @@ class Window: ---------- cols : str, :class:`Column` or list names of columns or expressions + + Returns + ------- + :class: `WindowSpec` + A :class:`WindowSpec` with the partitioning defined. + + Examples + -------- + >>> from pyspark.sql import Window + >>> from pyspark.sql.functions import row_number + >>> df = spark.createDataFrame( + ... [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"]) + >>> df.show() + +---+--------+ + | id|category| + +---+--------+ + | 1| a| + | 1| a| + | 2| a| + | 1| b| + | 2| b| + | 3| b| + +---+--------+ + + Show row number order by ``id`` in partition ``category``. + + >>> window = Window.partitionBy("category").orderBy("id") + >>> df.withColumn("row_number", row_number().over(window)).show() + +---+--------+----------+ + | id|category|row_number| + +---+--------+----------+ + | 1| a| 1| + | 1| a| 2| + | 2| a| 3| + | 1| b| 1| + | 2| b| 2| + | 3| b| 3| + +---+--------+----------+ """ sc = SparkContext._active_spark_context assert sc is not None and sc._jvm is not None @@ -95,6 +133,44 @@ class Window: ---------- cols : str, :class:`Column` or list names of columns or expressions + + Returns + ------- + :class: `WindowSpec` + A :class:`WindowSpec` with the ordering defined. + + Examples + -------- + >>> from pyspark.sql import Window + >>> from pyspark.sql.functions import row_number + >>> df = spark.createDataFrame( + ... [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"]) + >>> df.show() + +---+--------+ + | id|category| + +---+--------+ + | 1| a| + | 1| a| + | 2| a| + | 1| b| + | 2| b| + | 3| b| + +---+--------+ + + Show row number order by ``category`` in partition ``id``. + + >>> window = Window.partitionBy("id").orderBy("category") + >>> df.withColumn("row_number", row_number().over(window)).show() + +---+--------+----------+ + | id|category|row_number| + +---+--------+----------+ + | 1| a| 1| + | 1| a| 2| + | 1| b| 3| + | 2| a| 1| + | 2| b| 2| + | 3| b| 1| + +---+--------+----------+ """ sc = SparkContext._active_spark_context assert sc is not None and sc._jvm is not None @@ -134,6 +210,12 @@ class Window: The frame is unbounded if this is ``Window.unboundedFollowing``, or any value greater than or equal to 9223372036854775807. + Returns + ------- + :class: `WindowSpec` + A :class:`WindowSpec` with the frame boundaries defined, + from `start` (inclusive) to `end` (inclusive). + Examples -------- >>> from pyspark.sql import Window @@ -214,6 +296,12 @@ class Window: The frame is unbounded if this is ``Window.unboundedFollowing``, or any value greater than or equal to min(sys.maxsize, 9223372036854775807). + Returns + ------- + :class: `WindowSpec` + A :class:`WindowSpec` with the frame boundaries defined, + from `start` (inclusive) to `end` (inclusive). + Examples -------- >>> from pyspark.sql import Window --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org