This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 3f1a0a50452 [SPARK-44853][PYTHON][DOCS] Refine docstring of DataFrame.columns property 3f1a0a50452 is described below commit 3f1a0a504524b52d499e4b428617b43ff49f9d3b Author: allisonwang-db <allison.w...@databricks.com> AuthorDate: Fri Aug 18 17:31:20 2023 +0800 [SPARK-44853][PYTHON][DOCS] Refine docstring of DataFrame.columns property ### What changes were proposed in this pull request? This PR refines the docstring of `df.columns` and adds more examples. ### Why are the changes needed? To make PySpark documentation better. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? doctest Closes #42540 from allisonwang-db/spark-44853-refine-df-columns. Authored-by: allisonwang-db <allison.w...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit fc0be7ebace3aaf22954f1311532db5c33f4d8fa) Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/dataframe.py | 62 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 932c29910bb..03aaee8f2ec 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2084,7 +2084,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): @property def columns(self) -> List[str]: - """Returns all column names as a list. + """ + Retrieves the names of all columns in the :class:`DataFrame` as a list. + + The order of the column names in the list reflects their order in the DataFrame. .. versionadded:: 1.3.0 @@ -2094,14 +2097,65 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): Returns ------- list - List of column names. + List of column names in the DataFrame. Examples -------- + Example 1: Retrieve column names of a DataFrame + >>> df = spark.createDataFrame( - ... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) + ... [(14, "Tom", "CA"), (23, "Alice", "NY"), (16, "Bob", "TX")], + ... ["age", "name", "state"] + ... ) >>> df.columns - ['age', 'name'] + ['age', 'name', 'state'] + + Example 2: Using column names to project specific columns + + >>> selected_cols = [col for col in df.columns if col != "age"] + >>> df.select(selected_cols).show() + +-----+-----+ + | name|state| + +-----+-----+ + | Tom| CA| + |Alice| NY| + | Bob| TX| + +-----+-----+ + + Example 3: Checking if a specific column exists in a DataFrame + + >>> "state" in df.columns + True + >>> "salary" in df.columns + False + + Example 4: Iterating over columns to apply a transformation + + >>> import pyspark.sql.functions as f + >>> for col_name in df.columns: + ... df = df.withColumn(col_name, f.upper(f.col(col_name))) + >>> df.show() + +---+-----+-----+ + |age| name|state| + +---+-----+-----+ + | 14| TOM| CA| + | 23|ALICE| NY| + | 16| BOB| TX| + +---+-----+-----+ + + Example 5: Renaming columns and checking the updated column names + + >>> df = df.withColumnRenamed("name", "first_name") + >>> df.columns + ['age', 'first_name', 'state'] + + Example 6: Using the `columns` property to ensure two DataFrames have the + same columns before a union + + >>> df2 = spark.createDataFrame( + ... [(30, "Eve", "FL"), (40, "Sam", "WA")], ["age", "name", "location"]) + >>> df.columns == df2.columns + False """ return [f.name for f in self.schema.fields] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org