This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 961d5bd9097 [SPARK-42002][CONNECT][PYTHON][FOLLOWUP] Enable tests in ReadwriterV2ParityTests 961d5bd9097 is described below commit 961d5bd909744fea24e2391cd1a7aea3c96c418d Author: Takuya UESHIN <ues...@databricks.com> AuthorDate: Fri Feb 17 09:26:12 2023 +0800 [SPARK-42002][CONNECT][PYTHON][FOLLOWUP] Enable tests in ReadwriterV2ParityTests ### What changes were proposed in this pull request? Enables tests in `ReadwriterV2ParityTests`. ### Why are the changes needed? Now that `DataFrameWriterV2` for Spark Connect is implemented, we can enable tests in `ReadwriterV2ParityTests`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Enabled tests. Closes #40060 from ueshin/issues/SPARK-42002/tests. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../sql/tests/connect/test_parity_readwriter.py | 9 ++---- python/pyspark/sql/tests/test_readwriter.py | 36 ++++++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py b/python/pyspark/sql/tests/connect/test_parity_readwriter.py index 61f542385ec..bf77043ef38 100644 --- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py +++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py @@ -16,6 +16,7 @@ # import unittest +from pyspark.sql.connect.readwriter import DataFrameWriterV2 from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin, ReadwriterV2TestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase @@ -33,15 +34,11 @@ class ReadwriterParityTests(ReadwriterTestsMixin, ReusedConnectTestCase): class ReadwriterV2ParityTests(ReadwriterV2TestsMixin, ReusedConnectTestCase): - # TODO(SPARK-42002): Implement writeTo() - @unittest.skip("Fails in Spark Connect, should enable.") def test_api(self): - super().test_api() + self.check_api(DataFrameWriterV2) - # TODO(SPARK-42002): Implement writeTo() - @unittest.skip("Fails in Spark Connect, should enable.") def test_partitioning_functions(self): - super().test_partitioning_functions() + self.check_partitioning_functions(DataFrameWriterV2) if __name__ == "__main__": diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py index 23b607fe98e..9cd3e613667 100644 --- a/python/pyspark/sql/tests/test_readwriter.py +++ b/python/pyspark/sql/tests/test_readwriter.py @@ -181,17 +181,23 @@ class ReadwriterTestsMixin: class ReadwriterV2TestsMixin: def test_api(self): + self.check_api(DataFrameWriterV2) + + def check_api(self, tpe): df = self.df writer = df.writeTo("testcat.t") - self.assertIsInstance(writer, DataFrameWriterV2) - self.assertIsInstance(writer.option("property", "value"), DataFrameWriterV2) - self.assertIsInstance(writer.options(property="value"), DataFrameWriterV2) - self.assertIsInstance(writer.using("source"), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy("id"), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(col("id")), DataFrameWriterV2) - self.assertIsInstance(writer.tableProperty("foo", "bar"), DataFrameWriterV2) + self.assertIsInstance(writer, tpe) + self.assertIsInstance(writer.option("property", "value"), tpe) + self.assertIsInstance(writer.options(property="value"), tpe) + self.assertIsInstance(writer.using("source"), tpe) + self.assertIsInstance(writer.partitionedBy("id"), tpe) + self.assertIsInstance(writer.partitionedBy(col("id")), tpe) + self.assertIsInstance(writer.tableProperty("foo", "bar"), tpe) def test_partitioning_functions(self): + self.check_partitioning_functions(DataFrameWriterV2) + + def check_partitioning_functions(self, tpe): import datetime from pyspark.sql.functions import years, months, days, hours, bucket @@ -201,15 +207,13 @@ class ReadwriterV2TestsMixin: writer = df.writeTo("testcat.t") - self.assertIsInstance(writer.partitionedBy(years("ts")), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(months("ts")), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(days("ts")), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(hours("ts")), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), DataFrameWriterV2) - self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), DataFrameWriterV2) - self.assertIsInstance( - writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), DataFrameWriterV2 - ) + self.assertIsInstance(writer.partitionedBy(years("ts")), tpe) + self.assertIsInstance(writer.partitionedBy(months("ts")), tpe) + self.assertIsInstance(writer.partitionedBy(days("ts")), tpe) + self.assertIsInstance(writer.partitionedBy(hours("ts")), tpe) + self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), tpe) + self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), tpe) + self.assertIsInstance(writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), tpe) class ReadwriterTests(ReadwriterTestsMixin, ReusedSQLTestCase): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org