This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 361072dad35c [SPARK-45983][PYTHON][DOCS] Refine docstring of `DataFrame.substract` 361072dad35c is described below commit 361072dad35c2700e4b2b3661193ac02ee754ea1 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat Nov 18 10:13:42 2023 -0800 [SPARK-45983][PYTHON][DOCS] Refine docstring of `DataFrame.substract` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `DataFrame.substract`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43882 from HyukjinKwon/SPARK-45983. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/dataframe.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 3dc95fd80655..f83f75a47896 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -4829,14 +4829,40 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): Examples -------- + Example 1: Subtracting two DataFrames with the same schema + >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"]) >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"]) - >>> df1.subtract(df2).show() + >>> result_df = df1.subtract(df2) + >>> result_df.show() +---+---+ | C1| C2| +---+---+ | c| 4| +---+---+ + + Example 2: Subtracting two DataFrames with different schemas + + >>> df1 = spark.createDataFrame([(1, "A"), (2, "B")], ["id", "value"]) + >>> df2 = spark.createDataFrame([(2, "B"), (3, "C")], ["id", "value"]) + >>> result_df = df1.subtract(df2) + >>> result_df.show() + +---+-----+ + | id|value| + +---+-----+ + | 1| A| + +---+-----+ + + Example 3: Subtracting two DataFrames with mismatched columns + + >>> df1 = spark.createDataFrame([(1, 2)], ["A", "B"]) + >>> df2 = spark.createDataFrame([(1, 2)], ["C", "D"]) + >>> result_df = df1.subtract(df2) + >>> result_df.show() + +---+---+ + | A| B| + +---+---+ + +---+---+ """ return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org