This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 83946594d6 [Variant] Fix NULL handling for shredded object fields 
(#8395)
83946594d6 is described below

commit 83946594d6ebfb4f1d62353e6caa7bd5566c01b4
Author: Ryan Johnson <[email protected]>
AuthorDate: Fri Sep 19 18:18:32 2025 -0600

    [Variant] Fix NULL handling for shredded object fields (#8395)
    
    # Which issue does this PR close?
    
    - Fast-follow for https://github.com/apache/arrow-rs/pull/8366
    - Related to https://github.com/apache/arrow-rs/pull/8392
    
    # Rationale for this change
    
    Somehow, https://github.com/apache/arrow-rs/pull/8392 exposes a latent
    bug in https://github.com/apache/arrow-rs/pull/8366, which has improper
    NULL handling for shredded object fields. The shredding PR originally
    attempted to handle this case, but somehow the test did not trigger the
    bug and so the (admittedly incomplete) code was removed. See
    https://github.com/apache/arrow-rs/pull/8366#discussion_r2357552451. To
    be honest, I have no idea how the original ever worked correctly, nor
    why the new PR is able to expose the problem.
    
    # What changes are included in this PR?
    
    When used as a top-level builder,
    `VariantToShreddedVariantRowBuilder::append_null` must append NULL to
    its own `NullBufferBuilder`; but when used as a shredded object field
    builder, it must append non-NULL. Plumb a new `top_level` parameter
    through the various functions and into the two sub-builders it relies
    on, so they can implement the correct semantics.
    
    # Are these changes tested?
    
    In theory, yes (I don't know how the object shredding test ever passed).
    And it fixes the breakage in
    https://github.com/apache/arrow-rs/pull/8392.
    
    # Are there any user-facing changes?
    
    No
---
 parquet-variant-compute/src/shred_variant.rs | 40 ++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/parquet-variant-compute/src/shred_variant.rs 
b/parquet-variant-compute/src/shred_variant.rs
index 9b517c0346..aea36266e8 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -76,8 +76,12 @@ pub fn shred_variant(array: &VariantArray, as_type: 
&DataType) -> Result<Variant
     };
 
     let cast_options = CastOptions::default();
-    let mut builder =
-        make_variant_to_shredded_variant_arrow_row_builder(as_type, 
&cast_options, array.len())?;
+    let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
+        as_type,
+        &cast_options,
+        array.len(),
+        true,
+    )?;
     for i in 0..array.len() {
         if array.is_null(i) {
             builder.append_null()?;
@@ -98,11 +102,16 @@ pub(crate) fn 
make_variant_to_shredded_variant_arrow_row_builder<'a>(
     data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
+    top_level: bool,
 ) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
     let builder = match data_type {
         DataType::Struct(fields) => {
-            let typed_value_builder =
-                VariantToShreddedObjectVariantRowBuilder::try_new(fields, 
cast_options, capacity)?;
+            let typed_value_builder = 
VariantToShreddedObjectVariantRowBuilder::try_new(
+                fields,
+                cast_options,
+                capacity,
+                top_level,
+            )?;
             VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
         }
         DataType::List(_)
@@ -118,7 +127,7 @@ pub(crate) fn 
make_variant_to_shredded_variant_arrow_row_builder<'a>(
             let builder =
                 make_primitive_variant_to_arrow_row_builder(data_type, 
cast_options, capacity)?;
             let typed_value_builder =
-                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, 
capacity);
+                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, 
capacity, top_level);
             VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
         }
     };
@@ -160,21 +169,26 @@ pub(crate) struct 
VariantToShreddedPrimitiveVariantRowBuilder<'a> {
     value_builder: VariantValueArrayBuilder,
     typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
     nulls: NullBufferBuilder,
+    top_level: bool,
 }
 
 impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
     pub(crate) fn new(
         typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
         capacity: usize,
+        top_level: bool,
     ) -> Self {
         Self {
             value_builder: VariantValueArrayBuilder::new(capacity),
             typed_value_builder,
             nulls: NullBufferBuilder::new(capacity),
+            top_level,
         }
     }
     fn append_null(&mut self) -> Result<()> {
-        self.nulls.append_null();
+        // Only the top-level struct that represents the variant can be 
nullable; object fields and
+        // array elements are non-nullable.
+        self.nulls.append(!self.top_level);
         self.value_builder.append_null();
         self.typed_value_builder.append_null()
     }
@@ -201,15 +215,22 @@ pub(crate) struct 
VariantToShreddedObjectVariantRowBuilder<'a> {
     typed_value_builders: IndexMap<&'a str, 
VariantToShreddedVariantRowBuilder<'a>>,
     typed_value_nulls: NullBufferBuilder,
     nulls: NullBufferBuilder,
+    top_level: bool,
 }
 
 impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
-    fn try_new(fields: &'a Fields, cast_options: &'a CastOptions, capacity: 
usize) -> Result<Self> {
+    fn try_new(
+        fields: &'a Fields,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+        top_level: bool,
+    ) -> Result<Self> {
         let typed_value_builders = fields.iter().map(|field| {
             let builder = make_variant_to_shredded_variant_arrow_row_builder(
                 field.data_type(),
                 cast_options,
                 capacity,
+                false,
             )?;
             Ok((field.name().as_str(), builder))
         });
@@ -218,11 +239,14 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
             typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
             typed_value_nulls: NullBufferBuilder::new(capacity),
             nulls: NullBufferBuilder::new(capacity),
+            top_level,
         })
     }
 
     fn append_null(&mut self) -> Result<()> {
-        self.nulls.append_null();
+        // Only the top-level struct that represents the variant can be 
nullable; object fields and
+        // array elements are non-nullable.
+        self.nulls.append(!self.top_level);
         self.value_builder.append_null();
         self.typed_value_nulls.append_null();
         for (_, typed_value_builder) in &mut self.typed_value_builders {

Reply via email to