This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0df4c01b7c4 [SPARK-43502][PYTHON][CONNECT] DataFrame.drop` should accept empty column 0df4c01b7c4 is described below commit 0df4c01b7c4d4476fe0de9dccb3425cc1295fc85 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue May 16 12:38:08 2023 +0800 [SPARK-43502][PYTHON][CONNECT] DataFrame.drop` should accept empty column ### What changes were proposed in this pull request? Make `DataFrame.drop` accept empty column ### Why are the changes needed? to be consistent with vanilla PySpark ### Does this PR introduce _any_ user-facing change? yes ``` In [1]: df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age")) In [2]: df.drop() ``` before: ``` In [2]: df.drop() --------------------------------------------------------------------------- PySparkValueError Traceback (most recent call last) Cell In[2], line 1 ----> 1 df.drop() File ~/Dev/spark/python/pyspark/sql/connect/dataframe.py:449, in DataFrame.drop(self, *cols) 444 raise PySparkTypeError( 445 error_class="NOT_COLUMN_OR_STR", 446 message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__}, 447 ) 448 if len(_cols) == 0: --> 449 raise PySparkValueError( 450 error_class="CANNOT_BE_EMPTY", 451 message_parameters={"item": "cols"}, 452 ) 454 return DataFrame.withPlan( 455 plan.Drop( 456 child=self._plan, (...) 459 session=self._session, 460 ) PySparkValueError: [CANNOT_BE_EMPTY] At least one cols must be specified. ``` after ``` In [2]: df.drop() Out[2]: DataFrame[id: bigint, age: bigint] ``` ### How was this patch tested? enabled UT Closes #41180 from zhengruifeng/connect_drop_empty_col. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/dataframe.py | 5 ----- python/pyspark/sql/connect/plan.py | 3 ++- python/pyspark/sql/tests/connect/test_parity_dataframe.py | 5 ----- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index ffd52cf0cec..7a5ba50b3c6 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -445,11 +445,6 @@ class DataFrame: error_class="NOT_COLUMN_OR_STR", message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__}, ) - if len(_cols) == 0: - raise PySparkValueError( - error_class="CANNOT_BE_EMPTY", - message_parameters={"item": "cols"}, - ) return DataFrame.withPlan( plan.Drop( diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index 03aca4896be..eb4765cbd4b 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -664,7 +664,8 @@ class Drop(LogicalPlan): columns: List[Union[Column, str]], ) -> None: super().__init__(child) - assert len(columns) > 0 and all(isinstance(c, (Column, str)) for c in columns) + if len(columns) > 0: + assert all(isinstance(c, (Column, str)) for c in columns) self._columns = columns def plan(self, session: "SparkConnectClient") -> proto.Relation: diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py index 34f63c1410e..a74afc4d504 100644 --- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py +++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py @@ -84,11 +84,6 @@ class DataFrameParityTests(DataFrameTestsMixin, ReusedConnectTestCase): def test_to_pandas_from_mixed_dataframe(self): self.check_to_pandas_from_mixed_dataframe() - # TODO(SPARK-43502): DataFrame.drop should support empty column - @unittest.skip("Fails in Spark Connect, should enable.") - def test_drop_empty_column(self): - super().test_drop_empty_column() - if __name__ == "__main__": import unittest --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org