This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 961d5bd9097 [SPARK-42002][CONNECT][PYTHON][FOLLOWUP] Enable tests in 
ReadwriterV2ParityTests
961d5bd9097 is described below

commit 961d5bd909744fea24e2391cd1a7aea3c96c418d
Author: Takuya UESHIN <ues...@databricks.com>
AuthorDate: Fri Feb 17 09:26:12 2023 +0800

    [SPARK-42002][CONNECT][PYTHON][FOLLOWUP] Enable tests in 
ReadwriterV2ParityTests
    
    ### What changes were proposed in this pull request?
    
    Enables tests in `ReadwriterV2ParityTests`.
    
    ### Why are the changes needed?
    
    Now that `DataFrameWriterV2` for Spark Connect is implemented, we can 
enable tests in `ReadwriterV2ParityTests`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Enabled tests.
    
    Closes #40060 from ueshin/issues/SPARK-42002/tests.
    
    Authored-by: Takuya UESHIN <ues...@databricks.com>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../sql/tests/connect/test_parity_readwriter.py    |  9 ++----
 python/pyspark/sql/tests/test_readwriter.py        | 36 ++++++++++++----------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py 
b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
index 61f542385ec..bf77043ef38 100644
--- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -16,6 +16,7 @@
 #
 import unittest
 
+from pyspark.sql.connect.readwriter import DataFrameWriterV2
 from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin, 
ReadwriterV2TestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
@@ -33,15 +34,11 @@ class ReadwriterParityTests(ReadwriterTestsMixin, 
ReusedConnectTestCase):
 
 
 class ReadwriterV2ParityTests(ReadwriterV2TestsMixin, ReusedConnectTestCase):
-    # TODO(SPARK-42002): Implement writeTo()
-    @unittest.skip("Fails in Spark Connect, should enable.")
     def test_api(self):
-        super().test_api()
+        self.check_api(DataFrameWriterV2)
 
-    # TODO(SPARK-42002): Implement writeTo()
-    @unittest.skip("Fails in Spark Connect, should enable.")
     def test_partitioning_functions(self):
-        super().test_partitioning_functions()
+        self.check_partitioning_functions(DataFrameWriterV2)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_readwriter.py 
b/python/pyspark/sql/tests/test_readwriter.py
index 23b607fe98e..9cd3e613667 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -181,17 +181,23 @@ class ReadwriterTestsMixin:
 
 class ReadwriterV2TestsMixin:
     def test_api(self):
+        self.check_api(DataFrameWriterV2)
+
+    def check_api(self, tpe):
         df = self.df
         writer = df.writeTo("testcat.t")
-        self.assertIsInstance(writer, DataFrameWriterV2)
-        self.assertIsInstance(writer.option("property", "value"), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.options(property="value"), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.using("source"), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy("id"), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(col("id")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.tableProperty("foo", "bar"), 
DataFrameWriterV2)
+        self.assertIsInstance(writer, tpe)
+        self.assertIsInstance(writer.option("property", "value"), tpe)
+        self.assertIsInstance(writer.options(property="value"), tpe)
+        self.assertIsInstance(writer.using("source"), tpe)
+        self.assertIsInstance(writer.partitionedBy("id"), tpe)
+        self.assertIsInstance(writer.partitionedBy(col("id")), tpe)
+        self.assertIsInstance(writer.tableProperty("foo", "bar"), tpe)
 
     def test_partitioning_functions(self):
+        self.check_partitioning_functions(DataFrameWriterV2)
+
+    def check_partitioning_functions(self, tpe):
         import datetime
         from pyspark.sql.functions import years, months, days, hours, bucket
 
@@ -201,15 +207,13 @@ class ReadwriterV2TestsMixin:
 
         writer = df.writeTo("testcat.t")
 
-        self.assertIsInstance(writer.partitionedBy(years("ts")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(months("ts")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(days("ts")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(hours("ts")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), 
DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), 
DataFrameWriterV2)
-        self.assertIsInstance(
-            writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), 
DataFrameWriterV2
-        )
+        self.assertIsInstance(writer.partitionedBy(years("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(months("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(days("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(hours("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(3, "id"), 
hours(col("ts"))), tpe)
 
 
 class ReadwriterTests(ReadwriterTestsMixin, ReusedSQLTestCase):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to