This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 0936b38d3d [Variant] Fix `variant_get` to return `List<T>` instead of 
`List<Struct>` (#9631)
0936b38d3d is described below

commit 0936b38d3d3db962af782b368b88ee81fe8e49d5
Author: Liam Bao <[email protected]>
AuthorDate: Mon Apr 6 15:34:39 2026 -0400

    [Variant] Fix `variant_get` to return `List<T>` instead of `List<Struct>` 
(#9631)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    -->
    
    - Closes #9615.
    
    # Rationale for this change
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    `variant_get(..., List<T>)` was returning `List<Struct<value,
    typed_value>>` instead of `List<T>`. This happened because
    `VariantToListArrowRowBuilder` unconditionally used
    `make_variant_to_shredded_variant_arrow_row_builder` for its element
    builder, which produces the shredded representation (a struct with
    `value` and `typed_value` fields). This is correct for `shred_variant`,
    but `variant_get` should produce strongly typed arrays directly.
    
    
    # What changes are included in this PR?
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    - Introduced a `ListElementBuilder` enum with `Typed` and `Shredded`
    variants to abstract over the two element output modes.
    - `Typed` wraps `VariantToArrowRowBuilder` and produces the target type
    directly (e.g., `Int64Array`).
    - `Shredded` wraps `VariantToShreddedVariantRowBuilder` and produces the
    `Struct<value, typed_value>` used by shredding.
    - Added a `shredded: bool` parameter to
    `ArrayVariantToArrowRowBuilder::try_new` and
    `VariantToListArrowRowBuilder::try_new` to select the appropriate mode.
    
    
    # Are these changes tested?
    
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    
    Yes
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
    
    Yes, the `variant_get(..., List<T>)` should have correct behavior now
---
 parquet-variant-compute/src/shred_variant.rs    |   1 +
 parquet-variant-compute/src/variant_get.rs      | 137 ++++++++++++------------
 parquet-variant-compute/src/variant_to_arrow.rs |  75 ++++++++++---
 3 files changed, 130 insertions(+), 83 deletions(-)

diff --git a/parquet-variant-compute/src/shred_variant.rs 
b/parquet-variant-compute/src/shred_variant.rs
index d80d2f9863..d2021e12f6 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -303,6 +303,7 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
                 data_type,
                 cast_options,
                 capacity,
+                true,
             )?,
             nulls: NullBufferBuilder::new(capacity),
             null_value,
diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 73906f70eb..896900cc5d 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -365,8 +365,8 @@ mod test {
     use arrow_schema::{DataType, Field, FieldRef, Fields, IntervalUnit, 
TimeUnit};
     use chrono::DateTime;
     use parquet_variant::{
-        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantBuilder, 
VariantDecimal4, VariantDecimal8,
-        VariantDecimal16, VariantDecimalType, VariantPath,
+        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, 
VariantDecimal8, VariantDecimal16,
+        VariantDecimalType, VariantPath,
     };
 
     fn single_variant_get_test(input_json: &str, path: VariantPath, 
expected_json: &str) {
@@ -4131,69 +4131,45 @@ mod test {
         ]));
         let variant_array = 
ArrayRef::from(json_to_variant(&string_array).unwrap());
 
-        let value_array: ArrayRef = {
-            let mut builder = VariantBuilder::new();
-            builder.append_value("two");
-            let (_, value_bytes) = builder.finish();
-            Arc::new(BinaryViewArray::from(vec![
-                None,
-                Some(value_bytes.as_slice()),
-                None,
-            ]))
-        };
-        let typed_value_array: ArrayRef = 
Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
-        let struct_fields = Fields::from(vec![
-            Field::new("value", DataType::BinaryView, true),
-            Field::new("typed_value", DataType::Int64, true),
-        ]);
-        let struct_array: ArrayRef = Arc::new(
-            StructArray::try_new(
-                struct_fields.clone(),
-                vec![value_array.clone(), typed_value_array.clone()],
-                None,
-            )
-            .unwrap(),
-        );
-
-        let request_field = Arc::new(Field::new("item", DataType::Int64, 
true));
-        let result_field = Arc::new(Field::new("item", 
DataType::Struct(struct_fields), true));
+        let element_array: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), 
None, Some(3)]));
+        let field = Arc::new(Field::new("item", Int64, true));
 
         let expectations = vec![
             (
-                DataType::List(request_field.clone()),
+                DataType::List(field.clone()),
                 Arc::new(ListArray::new(
-                    result_field.clone(),
+                    field.clone(),
                     OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
-                    struct_array.clone(),
+                    element_array.clone(),
                     Some(NullBuffer::from(vec![true, false])),
                 )) as ArrayRef,
             ),
             (
-                DataType::LargeList(request_field.clone()),
+                DataType::LargeList(field.clone()),
                 Arc::new(LargeListArray::new(
-                    result_field.clone(),
+                    field.clone(),
                     OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 3])),
-                    struct_array.clone(),
+                    element_array.clone(),
                     Some(NullBuffer::from(vec![true, false])),
                 )) as ArrayRef,
             ),
             (
-                DataType::ListView(request_field.clone()),
+                DataType::ListView(field.clone()),
                 Arc::new(ListViewArray::new(
-                    result_field.clone(),
+                    field.clone(),
                     ScalarBuffer::from(vec![0, 3]),
                     ScalarBuffer::from(vec![3, 0]),
-                    struct_array.clone(),
+                    element_array.clone(),
                     Some(NullBuffer::from(vec![true, false])),
                 )) as ArrayRef,
             ),
             (
-                DataType::LargeListView(request_field),
+                DataType::LargeListView(field.clone()),
                 Arc::new(LargeListViewArray::new(
-                    result_field,
+                    field,
                     ScalarBuffer::from(vec![0, 3]),
                     ScalarBuffer::from(vec![3, 0]),
-                    struct_array,
+                    element_array,
                     Some(NullBuffer::from(vec![true, false])),
                 )) as ArrayRef,
             ),
@@ -4235,6 +4211,52 @@ mod test {
         }
     }
 
+    #[test]
+    fn test_variant_get_nested_list() {
+        use arrow::datatypes::Int64Type;
+
+        let string_array: ArrayRef = Arc::new(StringArray::from(vec![
+            r#"[[1, 2], [3]]"#,
+            r#"[[4], "not a list", [5, 6]]"#,
+        ]));
+        let variant_array = 
ArrayRef::from(json_to_variant(&string_array).unwrap());
+
+        let inner_field = Arc::new(Field::new("item", Int64, true));
+        let outer_field = Arc::new(Field::new(
+            "item",
+            DataType::List(inner_field.clone()),
+            true,
+        ));
+        let request_type = DataType::List(outer_field.clone());
+
+        let options = 
GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
+            "result",
+            request_type,
+            true,
+        ))));
+        let result = variant_get(&variant_array, options).unwrap();
+        let outer = result.as_list::<i32>();
+
+        // Row 0: [[1, 2], [3]]
+        let row0 = outer.value(0);
+        let row0 = row0.as_list::<i32>();
+        assert_eq!(row0.len(), 2);
+        let elem0 = row0.value(0);
+        assert_eq!(elem0.as_primitive::<Int64Type>().values(), &[1, 2]);
+        let elem1 = row0.value(1);
+        assert_eq!(elem1.as_primitive::<Int64Type>().values(), &[3]);
+
+        // Row 1: [[4], null, [5, 6]] — "not a list" becomes null inner list
+        let row1 = outer.value(1);
+        let row1 = row1.as_list::<i32>();
+        assert_eq!(row1.len(), 3);
+        let elem0 = row1.value(0);
+        assert_eq!(elem0.as_primitive::<Int64Type>().values(), &[4]);
+        assert!(row1.is_null(1));
+        let elem2 = row1.value(2);
+        assert_eq!(elem2.as_primitive::<Int64Type>().values(), &[5, 6]);
+    }
+
     #[test]
     fn test_variant_get_list_like_unsafe_cast_errors_on_element_mismatch() {
         let string_array: ArrayRef =
@@ -4287,40 +4309,17 @@ mod test {
             .with_cast_options(cast_options);
 
         let result = variant_get(&variant_array, options).unwrap();
-        let element_struct = result
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap()
+        let list_array = result.as_any().downcast_ref::<ListArray>().unwrap();
+        let values = list_array
             .values()
             .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap();
-
-        let value = element_struct
-            .column_by_name("value")
-            .unwrap()
-            .as_any()
-            .downcast_ref::<BinaryViewArray>()
-            .unwrap();
-        let typed_value = element_struct
-            .column_by_name("typed_value")
-            .unwrap()
-            .as_any()
             .downcast_ref::<Int64Array>()
             .unwrap();
 
-        assert_eq!(typed_value.len(), 3);
-        assert_eq!(typed_value.value(0), 1);
-        assert!(typed_value.is_null(1));
-        assert_eq!(typed_value.value(2), 3);
-
-        assert!(value.is_null(0));
-        assert!(value.is_valid(1));
-        assert_eq!(
-            Variant::new(EMPTY_VARIANT_METADATA_BYTES, value.value(1)),
-            Variant::Null
-        );
-        assert!(value.is_null(2));
+        assert_eq!(values.len(), 3);
+        assert_eq!(values.value(0), 1);
+        assert!(values.is_null(1));
+        assert_eq!(values.value(2), 3);
     }
 
     #[test]
diff --git a/parquet-variant-compute/src/variant_to_arrow.rs 
b/parquet-variant-compute/src/variant_to_arrow.rs
index dd396117d2..dd054a5f7d 100644
--- a/parquet-variant-compute/src/variant_to_arrow.rs
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -107,7 +107,7 @@ fn make_typed_variant_to_arrow_row_builder<'a>(
         | DataType::LargeListView(_)
         | DataType::FixedSizeList(..)) => {
             let builder =
-                ArrayVariantToArrowRowBuilder::try_new(data_type, 
cast_options, capacity)?;
+                ArrayVariantToArrowRowBuilder::try_new(data_type, 
cast_options, capacity, false)?;
             Ok(Array(builder))
         }
         data_type => {
@@ -587,10 +587,17 @@ impl<'a> StructVariantToArrowRowBuilder<'a> {
 }
 
 impl<'a> ArrayVariantToArrowRowBuilder<'a> {
+    /// Creates a new list builder for the given data type.
+    ///
+    /// # Arguments
+    /// * `shredded` - If true, element builders produce shredded structs with 
`value`/`typed_value`
+    ///   fields (for [`crate::shred_variant()`]). If false, element builders 
produce strongly typed
+    ///   arrays directly (for [`crate::variant_get()`]).
     pub(crate) fn try_new(
         data_type: &'a DataType,
         cast_options: &'a CastOptions,
         capacity: usize,
+        shredded: bool,
     ) -> Result<Self> {
         use ArrayVariantToArrowRowBuilder::*;
 
@@ -602,6 +609,7 @@ impl<'a> ArrayVariantToArrowRowBuilder<'a> {
                     $field.data_type(),
                     cast_options,
                     capacity,
+                    shredded,
                 )?)
             };
         }
@@ -893,13 +901,45 @@ impl<'a> VariantToUuidArrowRowBuilder<'a> {
     }
 }
 
+/// Element builder for list variants, supporting both typed (for 
[`crate::variant_get()`])
+/// and shredded (for [`crate::shred_variant()`]) output modes.
+enum ListElementBuilder<'a> {
+    /// Produces the target array type directly.
+    Typed(Box<VariantToArrowRowBuilder<'a>>),
+    /// Produces a shredded struct with `value` and `typed_value` fields.
+    Shredded(Box<VariantToShreddedVariantRowBuilder<'a>>),
+}
+
+impl<'a> ListElementBuilder<'a> {
+    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        match self {
+            Self::Typed(b) => b.append_value(value),
+            Self::Shredded(b) => b.append_value(value),
+        }
+    }
+
+    fn finish(self) -> Result<ArrayRef> {
+        match self {
+            Self::Typed(b) => b.finish(),
+            Self::Shredded(b) => {
+                let (value, typed_value, nulls) = b.finish()?;
+                Ok(ArrayRef::from(ShreddedVariantFieldArray::from_parts(
+                    Some(value),
+                    Some(typed_value),
+                    nulls,
+                )))
+            }
+        }
+    }
+}
+
 pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
 where
     O: OffsetSizeTrait + ArrowNativeTypeOp,
 {
     field: FieldRef,
     offsets: Vec<O>,
-    element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
+    element_builder: ListElementBuilder<'a>,
     nulls: NullBufferBuilder,
     current_offset: O,
     cast_options: &'a CastOptions<'a>,
@@ -914,6 +954,7 @@ where
         element_data_type: &'a DataType,
         cast_options: &'a CastOptions,
         capacity: usize,
+        shredded: bool,
     ) -> Result<Self> {
         if capacity >= isize::MAX as usize {
             return Err(ArrowError::ComputeError(
@@ -922,16 +963,24 @@ where
         }
         let mut offsets = Vec::with_capacity(capacity + 1);
         offsets.push(O::ZERO);
-        let element_builder = 
make_variant_to_shredded_variant_arrow_row_builder(
-            element_data_type,
-            cast_options,
-            capacity,
-            NullValue::ArrayElement,
-        )?;
+        let element_builder = if shredded {
+            let builder = make_variant_to_shredded_variant_arrow_row_builder(
+                element_data_type,
+                cast_options,
+                capacity,
+                NullValue::ArrayElement,
+            )?;
+            ListElementBuilder::Shredded(Box::new(builder))
+        } else {
+            let builder =
+                make_typed_variant_to_arrow_row_builder(element_data_type, 
cast_options, capacity)?;
+            ListElementBuilder::Typed(Box::new(builder))
+        };
+
         Ok(Self {
             field,
             offsets,
-            element_builder: Box::new(element_builder),
+            element_builder,
             nulls: NullBufferBuilder::new(capacity),
             current_offset: O::ZERO,
             cast_options,
@@ -966,9 +1015,7 @@ where
     }
 
     fn finish(mut self) -> Result<ArrayRef> {
-        let (value, typed_value, nulls) = self.element_builder.finish()?;
-        let element_array =
-            ShreddedVariantFieldArray::from_parts(Some(value), 
Some(typed_value), nulls);
+        let element_array: ArrayRef = self.element_builder.finish()?;
         let field = Arc::new(
             self.field
                 .as_ref()
@@ -987,7 +1034,7 @@ where
                 field,
                 ScalarBuffer::from(self.offsets),
                 ScalarBuffer::from(sizes),
-                ArrayRef::from(element_array),
+                element_array,
                 self.nulls.finish(),
             );
             Ok(Arc::new(list_view_array))
@@ -995,7 +1042,7 @@ where
             let list_array = GenericListArray::<O>::new(
                 field,
                 OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
-                ArrayRef::from(element_array),
+                element_array,
                 self.nulls.finish(),
             );
             Ok(Arc::new(list_array))

Reply via email to