This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ffd64a32eb2 [SPARK-43500][PYTHON][TESTS] Test `DataFrame.drop` with empty column list and names containing dot ffd64a32eb2 is described below commit ffd64a32eb2609ecfb68d252723671ec5cca3ffb Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon May 15 16:47:15 2023 +0800 [SPARK-43500][PYTHON][TESTS] Test `DataFrame.drop` with empty column list and names containing dot ### What changes were proposed in this pull request? add tests for: 1, `DataFrame.drop` with empty column; 2, `DataFrame.drop` with column names containing dot; ### Why are the changes needed? for better test coverage, the two UTs were once broken in [SPARK-39895](https://issues.apache.org/jira/browse/SPARK-39895), and then fixed in [SPARK-42444](https://issues.apache.org/jira/browse/SPARK-42444) ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? added UTs Closes #41167 from zhengruifeng/py_test_drop_more. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../sql/tests/connect/test_parity_dataframe.py | 5 +++++ python/pyspark/sql/tests/test_dataframe.py | 24 ++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py index a74afc4d504..34f63c1410e 100644 --- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py +++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py @@ -84,6 +84,11 @@ class DataFrameParityTests(DataFrameTestsMixin, ReusedConnectTestCase): def test_to_pandas_from_mixed_dataframe(self): self.check_to_pandas_from_mixed_dataframe() + # TODO(SPARK-43502): DataFrame.drop should support empty column + @unittest.skip("Fails in Spark Connect, should enable.") + def test_drop_empty_column(self): + super().test_drop_empty_column() + if __name__ == "__main__": import unittest diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 715cd1d142c..527a51cc239 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -156,6 +156,30 @@ class DataFrameTestsMixin: self.assertEqual(df3.drop("name", df3.age, "unknown").columns, ["height"]) self.assertEqual(df3.drop("name", "age", df3.height).columns, []) + def test_drop_empty_column(self): + df = self.spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) + + self.assertEqual(df.drop().columns, ["age", "name"]) + self.assertEqual(df.drop(*[]).columns, ["age", "name"]) + + def test_drop_column_name_with_dot(self): + df = ( + self.spark.range(1, 3) + .withColumn("first.name", lit("Peter")) + .withColumn("city.name", lit("raleigh")) + .withColumn("state", lit("nc")) + ) + + self.assertEqual(df.drop("first.name").columns, ["id", "city.name", "state"]) + self.assertEqual(df.drop("city.name").columns, ["id", "first.name", "state"]) + self.assertEqual(df.drop("first.name", "city.name").columns, ["id", "state"]) + self.assertEqual( + df.drop("first.name", "city.name", "unknown.unknown").columns, ["id", "state"] + ) + self.assertEqual( + df.drop("unknown.unknown").columns, ["id", "first.name", "city.name", "state"] + ) + def test_dropna(self): schema = StructType( [ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org