This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 27048702830d [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at test_parity_types.py 27048702830d is described below commit 27048702830d42864ecd7cbb10da600277887fbe Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon May 13 23:22:56 2024 +0900 [SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at test_parity_types.py ### What changes were proposed in this pull request? This PR proposes to enable some array inference tests at test_parity_types.py ### Why are the changes needed? For better test coverage for Spark Connect. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PR should verify them. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46550 from HyukjinKwon/SPARK-48250. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/connect/test_parity_types.py | 8 ++------ python/pyspark/sql/tests/test_types.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py b/python/pyspark/sql/tests/connect/test_parity_types.py index 82a677574b45..55acb4b1a381 100644 --- a/python/pyspark/sql/tests/connect/test_parity_types.py +++ b/python/pyspark/sql/tests/connect/test_parity_types.py @@ -39,12 +39,8 @@ class TypesParityTests(TypesTestsMixin, ReusedConnectTestCase): super().test_create_dataframe_schema_mismatch() @unittest.skip("Spark Connect does not support RDD but the tests depend on them.") - def test_infer_array_element_type_empty(self): - super().test_infer_array_element_type_empty() - - @unittest.skip("Spark Connect does not support RDD but the tests depend on them.") - def test_infer_array_element_type_with_struct(self): - super().test_infer_array_element_type_with_struct() + def test_infer_array_element_type_empty_rdd(self): + super().test_infer_array_element_type_empty_rdd() @unittest.skip("Spark Connect does not support RDD but the tests depend on them.") def test_infer_array_merge_element_types_with_rdd(self): diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 159678937683..84d89b544f15 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -366,7 +366,7 @@ class TypesTestsMixin: df = self.spark.createDataFrame(rdd) self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first()) - def test_infer_array_element_type_empty(self): + def test_infer_array_element_type_empty_rdd(self): # SPARK-39168: Test inferring array element type from all rows ArrayRow = Row("f1") @@ -379,6 +379,12 @@ class TypesTestsMixin: self.assertEqual(Row(f1=[None]), rows[1]) self.assertEqual(Row(f1=[1]), rows[2]) + def test_infer_array_element_type_empty(self): + # SPARK-39168: Test inferring array element type from all rows + ArrayRow = Row("f1") + + data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])] + df = self.spark.createDataFrame(data) rows = df.collect() self.assertEqual(Row(f1=[]), rows[0]) @@ -392,12 +398,6 @@ class TypesTestsMixin: with self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}): data = [NestedRow([{"payment": 200.5}, {"name": "A"}])] - nestedRdd = self.sc.parallelize(data) - df = self.spark.createDataFrame(nestedRdd) - self.assertEqual( - Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first() - ) - df = self.spark.createDataFrame(data) self.assertEqual( Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first() @@ -1626,7 +1626,10 @@ class TypesTestsMixin: with self.sql_conf( {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True} ): - self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, "a"]]]]).first()[0]) + self.assertEqual( + ArrayType(ArrayType(LongType())), + self.spark.createDataFrame([[[[1, 1.0]]]]).schema.fields[0].dataType, + ) class DataTypeTests(unittest.TestCase): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org