(spark) branch master updated: [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at test_parity_types.py

gurwls223 Mon, 13 May 2024 07:23:13 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 27048702830d [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array 
inference tests at test_parity_types.py
27048702830d is described below

commit 27048702830d42864ecd7cbb10da600277887fbe
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Mon May 13 23:22:56 2024 +0900

    [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at 
test_parity_types.py
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to enable some array inference tests at 
test_parity_types.py
    
    ### Why are the changes needed?
    
    For better test coverage for Spark Connect.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    CI in this PR should verify them.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46550 from HyukjinKwon/SPARK-48250.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/tests/connect/test_parity_types.py |  8 ++------
 python/pyspark/sql/tests/test_types.py                | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py 
b/python/pyspark/sql/tests/connect/test_parity_types.py
index 82a677574b45..55acb4b1a381 100644
--- a/python/pyspark/sql/tests/connect/test_parity_types.py
+++ b/python/pyspark/sql/tests/connect/test_parity_types.py
@@ -39,12 +39,8 @@ class TypesParityTests(TypesTestsMixin, 
ReusedConnectTestCase):
         super().test_create_dataframe_schema_mismatch()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
-    def test_infer_array_element_type_empty(self):
-        super().test_infer_array_element_type_empty()
-
-    @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
-    def test_infer_array_element_type_with_struct(self):
-        super().test_infer_array_element_type_with_struct()
+    def test_infer_array_element_type_empty_rdd(self):
+        super().test_infer_array_element_type_empty_rdd()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
     def test_infer_array_merge_element_types_with_rdd(self):
diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index 159678937683..84d89b544f15 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -366,7 +366,7 @@ class TypesTestsMixin:
         df = self.spark.createDataFrame(rdd)
         self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())
 
-    def test_infer_array_element_type_empty(self):
+    def test_infer_array_element_type_empty_rdd(self):
         # SPARK-39168: Test inferring array element type from all rows
         ArrayRow = Row("f1")
 
@@ -379,6 +379,12 @@ class TypesTestsMixin:
         self.assertEqual(Row(f1=[None]), rows[1])
         self.assertEqual(Row(f1=[1]), rows[2])
 
+    def test_infer_array_element_type_empty(self):
+        # SPARK-39168: Test inferring array element type from all rows
+        ArrayRow = Row("f1")
+
+        data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]
+
         df = self.spark.createDataFrame(data)
         rows = df.collect()
         self.assertEqual(Row(f1=[]), rows[0])
@@ -392,12 +398,6 @@ class TypesTestsMixin:
         with 
self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}):
             data = [NestedRow([{"payment": 200.5}, {"name": "A"}])]
 
-            nestedRdd = self.sc.parallelize(data)
-            df = self.spark.createDataFrame(nestedRdd)
-            self.assertEqual(
-                Row(f1=[Row(payment=200.5, name=None), Row(payment=None, 
name="A")]), df.first()
-            )
-
             df = self.spark.createDataFrame(data)
             self.assertEqual(
                 Row(f1=[Row(payment=200.5, name=None), Row(payment=None, 
name="A")]), df.first()
@@ -1626,7 +1626,10 @@ class TypesTestsMixin:
         with self.sql_conf(
             
{"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
         ):
-            self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, 
"a"]]]]).first()[0])
+            self.assertEqual(
+                ArrayType(ArrayType(LongType())),
+                self.spark.createDataFrame([[[[1, 
1.0]]]]).schema.fields[0].dataType,
+            )
 
 
 class DataTypeTests(unittest.TestCase):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at test_parity_types.py

Reply via email to