This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d3c79006f2 fix: handle Null type in try_merge for Struct, List, 
LargeList, and Union (#9524)
d3c79006f2 is described below

commit d3c79006f2595e144d539f56b3054fe916ab184b
Author: Qi Zhu <[email protected]>
AuthorDate: Wed Mar 11 18:37:37 2026 +0800

    fix: handle Null type in try_merge for Struct, List, LargeList, and Union 
(#9524)
    
    # Which issue does this PR close?
    
    Field::try_merge correctly handles DataType::Null for primitive types
    and when self is Null, but fails when self is a compound type (Struct,
    List, LargeList, Union) and from is Null. This causes Schema::try_merge
    to error when merging schemas where one has a Null field and another has
    a
    concrete compound type for the same field.
    
    This is common in JSON inference where some files have null values for
    fields that are structs/lists in other files.
    
    - Closes[ #9523](https://github.com/apache/arrow-rs/issues/9523)
    
    # Rationale for this change
    
    Add `DataType::Null` arms to the Struct, List, LargeList, and Union
    branches in `Field::try_merge`, consistent with how primitive types
    already handle it.
    
    # What changes are included in this PR?
    
    Add `DataType::Null` arms to the Struct, List, LargeList, and Union
    branches in `Field::try_merge`, consistent with how primitive types
    already handle it.
    # Are these changes tested?
    
    - Added test `test_merge_compound_with_null` covering Struct, List,
      LargeList, and Union merging with Null in both directions.
    - Existing tests continue to pass.
    
    # Are there any user-facing changes?
    
    No
---
 arrow-schema/src/field.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index a1c509abf2..1f2b57564d 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -830,6 +830,9 @@ impl Field {
                         .try_for_each(|f| builder.try_merge(f))?;
                     *nested_fields = builder.finish().fields;
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from 
data_type = {} is not DataType::Struct",
@@ -841,6 +844,9 @@ impl Field {
                 DataType::Union(from_nested_fields, _) => {
                     nested_fields.try_merge(from_nested_fields)?
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from 
data_type = {} is not DataType::Union",
@@ -854,6 +860,9 @@ impl Field {
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from 
data_type = {} is not DataType::List",
@@ -867,6 +876,9 @@ impl Field {
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from 
data_type = {} is not DataType::LargeList",
@@ -1461,4 +1473,58 @@ mod test {
 
         assert_binary_serde_round_trip(field)
     }
+
+    #[test]
+    fn test_merge_compound_with_null() {
+        // Struct + Null
+        let mut field = Field::new(
+            "s",
+            DataType::Struct(Fields::from(vec![Field::new("a", 
DataType::Int32, false)])),
+            false,
+        );
+        field
+            .try_merge(&Field::new("s", DataType::Null, true))
+            .expect("Struct should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::Struct(_)));
+
+        // List + Null
+        let mut field = Field::new(
+            "l",
+            DataType::List(Field::new("item", DataType::Utf8, false).into()),
+            false,
+        );
+        field
+            .try_merge(&Field::new("l", DataType::Null, true))
+            .expect("List should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::List(_)));
+
+        // LargeList + Null
+        let mut field = Field::new(
+            "ll",
+            DataType::LargeList(Field::new("item", DataType::Utf8, 
false).into()),
+            false,
+        );
+        field
+            .try_merge(&Field::new("ll", DataType::Null, true))
+            .expect("LargeList should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::LargeList(_)));
+
+        // Union + Null
+        let mut field = Field::new(
+            "u",
+            DataType::Union(
+                UnionFields::try_new(vec![0], vec![Field::new("f", 
DataType::Int32, false)])
+                    .unwrap(),
+                UnionMode::Dense,
+            ),
+            false,
+        );
+        field
+            .try_merge(&Field::new("u", DataType::Null, true))
+            .expect("Union should merge with Null");
+        assert!(matches!(field.data_type(), DataType::Union(_, _)));
+    }
 }

Reply via email to