This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d3c79006f2 fix: handle Null type in try_merge for Struct, List,
LargeList, and Union (#9524)
d3c79006f2 is described below
commit d3c79006f2595e144d539f56b3054fe916ab184b
Author: Qi Zhu <[email protected]>
AuthorDate: Wed Mar 11 18:37:37 2026 +0800
fix: handle Null type in try_merge for Struct, List, LargeList, and Union
(#9524)
# Which issue does this PR close?
Field::try_merge correctly handles DataType::Null for primitive types
and when self is Null, but fails when self is a compound type (Struct,
List, LargeList, Union) and from is Null. This causes Schema::try_merge
to error when merging schemas where one has a Null field and another has
a
concrete compound type for the same field.
This is common in JSON inference where some files have null values for
fields that are structs/lists in other files.
- Closes[ #9523](https://github.com/apache/arrow-rs/issues/9523)
# Rationale for this change
Add `DataType::Null` arms to the Struct, List, LargeList, and Union
branches in `Field::try_merge`, consistent with how primitive types
already handle it.
# What changes are included in this PR?
Add `DataType::Null` arms to the Struct, List, LargeList, and Union
branches in `Field::try_merge`, consistent with how primitive types
already handle it.
# Are these changes tested?
- Added test `test_merge_compound_with_null` covering Struct, List,
LargeList, and Union merging with Null in both directions.
- Existing tests continue to pass.
# Are there any user-facing changes?
No
---
arrow-schema/src/field.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index a1c509abf2..1f2b57564d 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -830,6 +830,9 @@ impl Field {
.try_for_each(|f| builder.try_merge(f))?;
*nested_fields = builder.finish().fields;
}
+ DataType::Null => {
+ self.nullable = true;
+ }
_ => {
return Err(ArrowError::SchemaError(format!(
"Fail to merge schema field '{}' because the from
data_type = {} is not DataType::Struct",
@@ -841,6 +844,9 @@ impl Field {
DataType::Union(from_nested_fields, _) => {
nested_fields.try_merge(from_nested_fields)?
}
+ DataType::Null => {
+ self.nullable = true;
+ }
_ => {
return Err(ArrowError::SchemaError(format!(
"Fail to merge schema field '{}' because the from
data_type = {} is not DataType::Union",
@@ -854,6 +860,9 @@ impl Field {
f.try_merge(from_field)?;
(*field) = Arc::new(f);
}
+ DataType::Null => {
+ self.nullable = true;
+ }
_ => {
return Err(ArrowError::SchemaError(format!(
"Fail to merge schema field '{}' because the from
data_type = {} is not DataType::List",
@@ -867,6 +876,9 @@ impl Field {
f.try_merge(from_field)?;
(*field) = Arc::new(f);
}
+ DataType::Null => {
+ self.nullable = true;
+ }
_ => {
return Err(ArrowError::SchemaError(format!(
"Fail to merge schema field '{}' because the from
data_type = {} is not DataType::LargeList",
@@ -1461,4 +1473,58 @@ mod test {
assert_binary_serde_round_trip(field)
}
+
+ #[test]
+ fn test_merge_compound_with_null() {
+ // Struct + Null
+ let mut field = Field::new(
+ "s",
+ DataType::Struct(Fields::from(vec![Field::new("a",
DataType::Int32, false)])),
+ false,
+ );
+ field
+ .try_merge(&Field::new("s", DataType::Null, true))
+ .expect("Struct should merge with Null");
+ assert!(field.is_nullable());
+ assert!(matches!(field.data_type(), DataType::Struct(_)));
+
+ // List + Null
+ let mut field = Field::new(
+ "l",
+ DataType::List(Field::new("item", DataType::Utf8, false).into()),
+ false,
+ );
+ field
+ .try_merge(&Field::new("l", DataType::Null, true))
+ .expect("List should merge with Null");
+ assert!(field.is_nullable());
+ assert!(matches!(field.data_type(), DataType::List(_)));
+
+ // LargeList + Null
+ let mut field = Field::new(
+ "ll",
+ DataType::LargeList(Field::new("item", DataType::Utf8,
false).into()),
+ false,
+ );
+ field
+ .try_merge(&Field::new("ll", DataType::Null, true))
+ .expect("LargeList should merge with Null");
+ assert!(field.is_nullable());
+ assert!(matches!(field.data_type(), DataType::LargeList(_)));
+
+ // Union + Null
+ let mut field = Field::new(
+ "u",
+ DataType::Union(
+ UnionFields::try_new(vec![0], vec![Field::new("f",
DataType::Int32, false)])
+ .unwrap(),
+ UnionMode::Dense,
+ ),
+ false,
+ );
+ field
+ .try_merge(&Field::new("u", DataType::Null, true))
+ .expect("Union should merge with Null");
+ assert!(matches!(field.data_type(), DataType::Union(_, _)));
+ }
}