Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21857#discussion_r205561601 --- Diff: python/pyspark/sql/dataframe.py --- @@ -293,6 +293,28 @@ def explain(self, extended=False): else: print(self._jdf.queryExecution().simpleString()) + @since(2.4) + def exceptAll(self, other): + """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame` but + not in another :class:`DataFrame` while preserving duplicates. + + This is equivalent to `EXCEPT ALL` in SQL. + + >>> df1 = spark.createDataFrame([("a", 1), ("a", 2), ("b", 3), ("c", 4)], ["C1", "C2"]) --- End diff -- Maybe it is better to add one more row to show the behavior of `preserving duplicates`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org