This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 6e22f1108bf4 [SPARK-48248][PYTHON] Fix nested array to respect legacy 
conf of inferArrayTypeFromFirstElement
6e22f1108bf4 is described below

commit 6e22f1108bf4c0d28b03f2618e308cde6fc7faa0
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Mon May 13 17:15:28 2024 +0900

    [SPARK-48248][PYTHON] Fix nested array to respect legacy conf of 
inferArrayTypeFromFirstElement
    
    This PR fixes a bug that does not respect 
`spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` in nested 
arrays, introduced by https://github.com/apache/spark/pull/36545.
    
    To have a way to restore the original behaviour.
    
    Yes, it fixes the regression when 
`spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` is set to 
`True`.
    
    Unittest added.
    
    No.
    
    Closes #46548 from HyukjinKwon/SPARK-48248.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit b2140d0f25d81e64a968df83c5da5089051acaac)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/tests/test_types.py |  7 +++++++
 python/pyspark/sql/types.py            | 18 ++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index c5a6a1756976..02e3476ffd3b 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -1192,6 +1192,13 @@ class TypesTestsMixin:
         for n, (a, e) in enumerate(zip(actual, expected)):
             self.assertEqual(a, e, "%s does not match with %s" % (exprs[n], 
expected[n]))
 
+    def test_infer_array_element_type_with_struct(self):
+        # SPARK-48248: Nested array to respect legacy conf of 
inferArrayTypeFromFirstElement
+        with self.sql_conf(
+            
{"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
+        ):
+            self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, 
"a"]]]]).first()[0])
+
 
 class DataTypeTests(unittest.TestCase):
     # regression test for SPARK-6055
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 21fef0a2c08a..c668c5c59eca 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1508,13 +1508,27 @@ def _infer_type(
         if len(obj) > 0:
             if infer_array_from_first_element:
                 return ArrayType(
-                    _infer_type(obj[0], infer_dict_as_struct, 
prefer_timestamp_ntz), True
+                    _infer_type(
+                        obj[0],
+                        infer_dict_as_struct,
+                        infer_array_from_first_element,
+                        prefer_timestamp_ntz,
+                    ),
+                    True,
                 )
             else:
                 return ArrayType(
                     reduce(
                         _merge_type,
-                        (_infer_type(v, infer_dict_as_struct, 
prefer_timestamp_ntz) for v in obj),
+                        (
+                            _infer_type(
+                                v,
+                                infer_dict_as_struct,
+                                infer_array_from_first_element,
+                                prefer_timestamp_ntz,
+                            )
+                            for v in obj
+                        ),
                     ),
                     True,
                 )


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to