This is an automated email from the ASF dual-hosted git repository.

mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new bed9ed8ffb Variant integration fixes (#8438)
bed9ed8ffb is described below

commit bed9ed8ffb5c78f906225bdb24031ec06c219b86
Author: Ryan Johnson <[email protected]>
AuthorDate: Thu Sep 25 10:13:36 2025 -0600

    Variant integration fixes (#8438)
    
    # Which issue does this PR close?
    
    Closes
    - https://github.com/apache/arrow-rs/issues/8435
    - https://github.com/apache/arrow-rs/issues/8420
    
    # Rationale for this change
    
    It turns out we were too permissive in our handling of `typed_value`
    columns and certain other exceptional cases that parquet's variant
    integration tests specifically expect readers to reject.
    
    # What changes are included in this PR?
    
    * Simplify `VariantArray::value` to work directly with (optional)
    `value` and `typed_value` columns instead of the `ShreddingState` enum
    * Rename `rewrite_to_view_types` as `canonicalize_and_verify_data_type`
    and expand it to also reject all illegal column types (= any that don't
    map directly to a variant subtype)
    * Fix several broken integration tests
    * Remove several illegal unit tests (that were exercising invalid
    shredding scenarios)
    
    # Are these changes tested?
    
    Yes.
    
    # Are there any user-facing changes?
    
    Behavior change: We no longer tolerate invalid-type `typed_value`
    columns when reading shredded variant data. At least, not in code paths
    that go through `VariantArray::value`. There may still be some leakage
    in the shredded path step handling of `variant_get`.
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 parquet-variant-compute/src/variant_array.rs | 207 +++++++++++++++++---------
 parquet-variant-compute/src/variant_get.rs   | 210 +--------------------------
 parquet/tests/variant_integration.rs         |  16 +-
 3 files changed, 145 insertions(+), 288 deletions(-)

diff --git a/parquet-variant-compute/src/variant_array.rs 
b/parquet-variant-compute/src/variant_array.rs
index dbed1a4fbb..16dbff4c34 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -23,12 +23,13 @@ use arrow::buffer::NullBuffer;
 use arrow::compute::cast;
 use arrow::datatypes::{
     Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
-    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_schema::extension::ExtensionType;
-use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
+use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
 use parquet_variant::Uuid;
 use parquet_variant::Variant;
+
+use std::borrow::Cow;
 use std::sync::Arc;
 
 /// Arrow Variant [`ExtensionType`].
@@ -353,37 +354,18 @@ impl VariantArray {
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant<'_, '_> {
-        match &self.shredding_state {
-            ShreddingState::Unshredded { value, .. } => {
-                // Unshredded case
-                Variant::new(self.metadata.value(index), value.value(index))
-            }
-            ShreddingState::Typed { typed_value, .. } => {
-                // Typed case (formerly PerfectlyShredded)
-                if typed_value.is_null(index) {
-                    Variant::Null
-                } else {
-                    typed_value_to_variant(typed_value, index)
-                }
-            }
-            ShreddingState::PartiallyShredded {
-                value, typed_value, ..
-            } => {
-                // PartiallyShredded case (formerly ImperfectlyShredded)
-                if typed_value.is_null(index) {
-                    Variant::new(self.metadata.value(index), 
value.value(index))
-                } else {
-                    typed_value_to_variant(typed_value, index)
-                }
+        match (self.typed_value_field(), self.value_field()) {
+            // Always prefer typed_value, if available
+            (Some(typed_value), value) if typed_value.is_valid(index) => {
+                typed_value_to_variant(typed_value, value, index)
             }
-            ShreddingState::AllNull => {
-                // AllNull case: neither value nor typed_value fields exist
-                // NOTE: This handles the case where neither value nor 
typed_value fields exist.
-                // For top-level variants, this returns Variant::Null (JSON 
null).
-                // For shredded object fields, this technically should 
indicate SQL NULL,
-                // but the current API cannot distinguish these contexts.
-                Variant::Null
+            // Otherwise fall back to value, if available
+            (_, Some(value)) if value.is_valid(index) => {
+                Variant::new(self.metadata.value(index), value.value(index))
             }
+            // It is technically invalid for neither value nor typed_value 
fields to be available,
+            // but the spec specifically requires readers to return 
Variant::Null in this case.
+            _ => Variant::Null,
         }
     }
 
@@ -796,8 +778,17 @@ impl StructArrayBuilder {
 }
 
 /// returns the non-null element at index as a Variant
-fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, 
'_> {
-    match typed_value.data_type() {
+fn typed_value_to_variant<'a>(
+    typed_value: &'a ArrayRef,
+    value: Option<&BinaryViewArray>,
+    index: usize,
+) -> Variant<'a, 'a> {
+    let data_type = typed_value.data_type();
+    if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && 
v.is_valid(index)) {
+        // Only a partially shredded struct is allowed to have values for both 
columns
+        panic!("Invalid variant, conflicting value and typed_value");
+    }
+    match data_type {
         DataType::Boolean => {
             let boolean_array = typed_value.as_boolean();
             let value = boolean_array.value(index);
@@ -809,17 +800,11 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
             let date = Date32Type::to_naive_date(value);
             Variant::from(date)
         }
-        DataType::FixedSizeBinary(binary_len) => {
+        // 16-byte FixedSizeBinary alway corresponds to a UUID; all other 
sizes are illegal.
+        DataType::FixedSizeBinary(16) => {
             let array = typed_value.as_fixed_size_binary();
-            // Try to treat 16 byte FixedSizeBinary as UUID
-            let value = array.value(index);
-            if *binary_len == 16 {
-                if let Ok(uuid) = Uuid::from_slice(value) {
-                    return Variant::from(uuid);
-                }
-            }
             let value = array.value(index);
-            Variant::from(value)
+            Uuid::from_slice(value).unwrap().into() // unwrap is safe: slice 
is always 16 bytes
         }
         DataType::BinaryView => {
             let array = typed_value.as_binary_view();
@@ -843,18 +828,6 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
         DataType::Int64 => {
             primitive_conversion_single_value!(Int64Type, typed_value, index)
         }
-        DataType::UInt8 => {
-            primitive_conversion_single_value!(UInt8Type, typed_value, index)
-        }
-        DataType::UInt16 => {
-            primitive_conversion_single_value!(UInt16Type, typed_value, index)
-        }
-        DataType::UInt32 => {
-            primitive_conversion_single_value!(UInt32Type, typed_value, index)
-        }
-        DataType::UInt64 => {
-            primitive_conversion_single_value!(UInt64Type, typed_value, index)
-        }
         DataType::Float16 => {
             primitive_conversion_single_value!(Float16Type, typed_value, index)
         }
@@ -891,28 +864,120 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
 ///
 /// So cast them to get the right type.
 fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
-    let new_type = rewrite_to_view_types(array.data_type());
-    cast(array, &new_type)
+    let new_type = canonicalize_and_verify_data_type(array.data_type())?;
+    cast(array, new_type.as_ref())
 }
 
-/// replaces all instances of Binary with BinaryView in a DataType
-fn rewrite_to_view_types(data_type: &DataType) -> DataType {
-    match data_type {
-        DataType::Binary => DataType::BinaryView,
-        DataType::List(field) => DataType::List(rewrite_field_type(field)),
-        DataType::Struct(fields) => {
-            DataType::Struct(fields.iter().map(rewrite_field_type).collect())
-        }
-        _ => data_type.clone(),
+/// Validates whether a given arrow decimal is a valid variant decimal
+///
+/// NOTE: By a strict reading of the "decimal table" in the [shredding spec], 
each decimal type
+/// should have a width-dependent lower bound on precision as well as an upper 
bound (i.e. Decimal16
+/// with precision 5 is invalid because Decimal4 "covers" it). But the variant 
shredding integration
+/// tests specifically expect such cases to succeed, so we only enforce the 
upper bound here.
+///
+/// [shredding spec]: 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#encoding-types
+fn is_valid_variant_decimal(p: &u8, s: &i8, max_precision: u8) -> bool {
+    (1..=max_precision).contains(p) && (0..=*p as i8).contains(s)
+}
+
+/// Recursively visits a data type, ensuring that it only contains data types 
that can legally
+/// appear in a (possibly shredded) variant array. It also replaces Binary 
fields with BinaryView,
+/// since that's what comes back from the parquet reader and what the variant 
code expects to find.
+fn canonicalize_and_verify_data_type(
+    data_type: &DataType,
+) -> Result<Cow<'_, DataType>, ArrowError> {
+    use DataType::*;
+
+    // helper macros
+    macro_rules! fail {
+        () => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Illegal shredded value type: {data_type}"
+            )))
+        };
     }
+    macro_rules! borrow {
+        () => {
+            Cow::Borrowed(data_type)
+        };
+    }
+
+    let new_data_type = match data_type {
+        // Primitive arrow types that have a direct variant counterpart are 
allowed
+        Null | Boolean => borrow!(),
+        Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => borrow!(),
+
+        // Unsigned integers and half-float are not allowed
+        UInt8 | UInt16 | UInt32 | UInt64 | Float16 => fail!(),
+
+        // Most decimal types are allowed, with restrictions on precision and 
scale
+        Decimal32(p, s) if is_valid_variant_decimal(p, s, 9) => borrow!(),
+        Decimal64(p, s) if is_valid_variant_decimal(p, s, 18) => borrow!(),
+        Decimal128(p, s) if is_valid_variant_decimal(p, s, 38) => borrow!(),
+        Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..) => 
fail!(),
+
+        // Only micro and nano timestamps are allowed
+        Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) => 
borrow!(),
+        Timestamp(TimeUnit::Millisecond | TimeUnit::Second, _) => fail!(),
+
+        // Only 32-bit dates and 64-bit microsecond time are allowed.
+        Date32 | Time64(TimeUnit::Microsecond) => borrow!(),
+        Date64 | Time32(_) | Time64(_) | Duration(_) | Interval(_) => fail!(),
+
+        // Binary and string are allowed. Force Binary to BinaryView because 
that's what the parquet
+        // reader returns and what the rest of the variant code expects.
+        Binary => Cow::Owned(DataType::BinaryView),
+        BinaryView | Utf8 => borrow!(),
+
+        // UUID maps to 16-byte fixed-size binary; no other width is allowed
+        FixedSizeBinary(16) => borrow!(),
+        FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
+
+        // We can _possibly_ allow (some of) these some day?
+        LargeBinary | LargeUtf8 | Utf8View | ListView(_) | LargeList(_) | 
LargeListView(_) => {
+            fail!()
+        }
+
+        // Lists and struct are allowed, maps and unions are not
+        List(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
+        },
+        // Struct is used by the internal layout, and can also represent a 
shredded variant object.
+        Struct(fields) => {
+            // Avoid allocation unless at least one field changes, to avoid 
unnecessary deep cloning
+            // of the data type. Even if some fields change, the others are 
shallow arc clones.
+            let mut new_fields = std::collections::HashMap::new();
+            for (i, field) in fields.iter().enumerate() {
+                if let Cow::Owned(new_field) = 
canonicalize_and_verify_field(field)? {
+                    new_fields.insert(i, new_field);
+                }
+            }
+
+            if new_fields.is_empty() {
+                borrow!()
+            } else {
+                let new_fields = fields
+                    .iter()
+                    .enumerate()
+                    .map(|(i, field)| new_fields.remove(&i).unwrap_or_else(|| 
field.clone()));
+                Cow::Owned(DataType::Struct(new_fields.collect()))
+            }
+        }
+        Map(..) | Union(..) => fail!(),
+
+        // We can _possibly_ support (some of) these some day?
+        Dictionary(..) | RunEndEncoded(..) => fail!(),
+    };
+    Ok(new_data_type)
 }
 
-fn rewrite_field_type(field: impl AsRef<Field>) -> Arc<Field> {
-    let field = field.as_ref();
-    let new_field = field
-        .clone()
-        .with_data_type(rewrite_to_view_types(field.data_type()));
-    Arc::new(new_field)
+fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, 
Arc<Field>>, ArrowError> {
+    let Cow::Owned(new_data_type) = 
canonicalize_and_verify_data_type(field.data_type())? else {
+        return Ok(Cow::Borrowed(field));
+    };
+    let new_field = field.as_ref().clone().with_data_type(new_data_type);
+    Ok(Cow::Owned(Arc::new(new_field)))
 }
 
 #[cfg(test)]
diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 5adb3c0d31..49f56af573 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -297,13 +297,12 @@ mod test {
     use std::sync::Arc;
 
     use arrow::array::{
-        Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array, 
FixedSizeBinaryArray,
-        Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, 
Int64Array, Int8Array,
-        StringArray, StructArray, UInt16Array, UInt32Array, UInt64Array, 
UInt8Array,
+        Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array, 
Float32Array,
+        Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, 
StringArray, StructArray,
     };
     use arrow::buffer::NullBuffer;
     use arrow::compute::CastOptions;
-    use arrow::datatypes::DataType::{Int16, Int32, Int64, UInt16, UInt32, 
UInt64, UInt8};
+    use arrow::datatypes::DataType::{Int16, Int32, Int64};
     use arrow_schema::{DataType, Field, FieldRef, Fields};
     use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
 
@@ -438,31 +437,6 @@ mod test {
         numeric_partially_shredded_test!(i64, 
partially_shredded_int64_variant_array);
     }
 
-    #[test]
-    fn get_variant_partially_shredded_uint8_as_variant() {
-        numeric_partially_shredded_test!(u8, 
partially_shredded_uint8_variant_array);
-    }
-
-    #[test]
-    fn get_variant_partially_shredded_uint16_as_variant() {
-        numeric_partially_shredded_test!(u16, 
partially_shredded_uint16_variant_array);
-    }
-
-    #[test]
-    fn get_variant_partially_shredded_uint32_as_variant() {
-        numeric_partially_shredded_test!(u32, 
partially_shredded_uint32_variant_array);
-    }
-
-    #[test]
-    fn get_variant_partially_shredded_uint64_as_variant() {
-        numeric_partially_shredded_test!(u64, 
partially_shredded_uint64_variant_array);
-    }
-
-    #[test]
-    fn get_variant_partially_shredded_float16_as_variant() {
-        numeric_partially_shredded_test!(half::f16, 
partially_shredded_float16_variant_array);
-    }
-
     #[test]
     fn get_variant_partially_shredded_float32_as_variant() {
         numeric_partially_shredded_test!(f32, 
partially_shredded_float32_variant_array);
@@ -490,23 +464,6 @@ mod test {
         assert_eq!(result.value(3), Variant::from(false));
     }
 
-    #[test]
-    fn get_variant_partially_shredded_fixed_size_binary_as_variant() {
-        let array = partially_shredded_fixed_size_binary_variant_array();
-        let options = GetOptions::new();
-        let result = variant_get(&array, options).unwrap();
-
-        // expect the result is a VariantArray
-        let result = VariantArray::try_new(&result).unwrap();
-        assert_eq!(result.len(), 4);
-
-        // Expect the values are the same as the original values
-        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
-        assert!(!result.is_valid(1));
-        assert_eq!(result.value(2), Variant::from("n/a"));
-        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
-    }
-
     #[test]
     fn get_variant_partially_shredded_utf8_as_variant() {
         let array = partially_shredded_utf8_variant_array();
@@ -645,31 +602,6 @@ mod test {
         numeric_perfectly_shredded_test!(i64, 
perfectly_shredded_int64_variant_array);
     }
 
-    #[test]
-    fn get_variant_perfectly_shredded_uint8_as_variant() {
-        numeric_perfectly_shredded_test!(u8, 
perfectly_shredded_uint8_variant_array);
-    }
-
-    #[test]
-    fn get_variant_perfectly_shredded_uint16_as_variant() {
-        numeric_perfectly_shredded_test!(u16, 
perfectly_shredded_uint16_variant_array);
-    }
-
-    #[test]
-    fn get_variant_perfectly_shredded_uint32_as_variant() {
-        numeric_perfectly_shredded_test!(u32, 
perfectly_shredded_uint32_variant_array);
-    }
-
-    #[test]
-    fn get_variant_perfectly_shredded_uint64_as_variant() {
-        numeric_perfectly_shredded_test!(u64, 
perfectly_shredded_uint64_variant_array);
-    }
-
-    #[test]
-    fn get_variant_perfectly_shredded_float16_as_variant() {
-        numeric_perfectly_shredded_test!(half::f16, 
perfectly_shredded_float16_variant_array);
-    }
-
     #[test]
     fn get_variant_perfectly_shredded_float32_as_variant() {
         numeric_perfectly_shredded_test!(f32, 
perfectly_shredded_float32_variant_array);
@@ -749,34 +681,6 @@ mod test {
         Int64Array::from(vec![Some(1), Some(2), Some(3)])
     );
 
-    perfectly_shredded_to_arrow_primitive_test!(
-        get_variant_perfectly_shredded_uint8_as_int8,
-        UInt8,
-        perfectly_shredded_uint8_variant_array,
-        UInt8Array::from(vec![Some(1), Some(2), Some(3)])
-    );
-
-    perfectly_shredded_to_arrow_primitive_test!(
-        get_variant_perfectly_shredded_uint16_as_uint16,
-        UInt16,
-        perfectly_shredded_uint16_variant_array,
-        UInt16Array::from(vec![Some(1), Some(2), Some(3)])
-    );
-
-    perfectly_shredded_to_arrow_primitive_test!(
-        get_variant_perfectly_shredded_uint32_as_uint32,
-        UInt32,
-        perfectly_shredded_uint32_variant_array,
-        UInt32Array::from(vec![Some(1), Some(2), Some(3)])
-    );
-
-    perfectly_shredded_to_arrow_primitive_test!(
-        get_variant_perfectly_shredded_uint64_as_uint64,
-        UInt64,
-        perfectly_shredded_uint64_variant_array,
-        UInt64Array::from(vec![Some(1), Some(2), Some(3)])
-    );
-
     /// Return a VariantArray that represents a perfectly "shredded" variant
     /// for the given typed value.
     ///
@@ -835,31 +739,6 @@ mod test {
         Int64Array,
         i64
     );
-    numeric_perfectly_shredded_variant_array_fn!(
-        perfectly_shredded_uint8_variant_array,
-        UInt8Array,
-        u8
-    );
-    numeric_perfectly_shredded_variant_array_fn!(
-        perfectly_shredded_uint16_variant_array,
-        UInt16Array,
-        u16
-    );
-    numeric_perfectly_shredded_variant_array_fn!(
-        perfectly_shredded_uint32_variant_array,
-        UInt32Array,
-        u32
-    );
-    numeric_perfectly_shredded_variant_array_fn!(
-        perfectly_shredded_uint64_variant_array,
-        UInt64Array,
-        u64
-    );
-    numeric_perfectly_shredded_variant_array_fn!(
-        perfectly_shredded_float16_variant_array,
-        Float16Array,
-        half::f16
-    );
     numeric_perfectly_shredded_variant_array_fn!(
         perfectly_shredded_float32_variant_array,
         Float32Array,
@@ -963,31 +842,6 @@ mod test {
         Int64Array,
         i64
     );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_uint8_variant_array,
-        UInt8Array,
-        u8
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_uint16_variant_array,
-        UInt16Array,
-        u16
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_uint32_variant_array,
-        UInt32Array,
-        u32
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_uint64_variant_array,
-        UInt64Array,
-        u64
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_float16_variant_array,
-        Float16Array,
-        half::f16
-    );
     numeric_partially_shredded_variant_array_fn!(
         partially_shredded_float32_variant_array,
         Float32Array,
@@ -1043,64 +897,6 @@ mod test {
         Arc::new(struct_array)
     }
 
-    /// Return a VariantArray that represents a partially "shredded" variant 
for fixed size binary
-    fn partially_shredded_fixed_size_binary_variant_array() -> ArrayRef {
-        let (metadata, string_value) = {
-            let mut builder = parquet_variant::VariantBuilder::new();
-            builder.append_value("n/a");
-            builder.finish()
-        };
-
-        // Create the null buffer for the overall array
-        let nulls = NullBuffer::from(vec![
-            true,  // row 0 non null
-            false, // row 1 is null
-            true,  // row 2 non null
-            true,  // row 3 non null
-        ]);
-
-        // metadata is the same for all rows
-        let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
-
-        // See 
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
-        // about why row1 is an empty but non null, value.
-        let values = BinaryViewArray::from(vec![
-            None,                // row 0 is shredded, so no value
-            Some(b"" as &[u8]),  // row 1 is null, so empty value
-            Some(&string_value), // copy the string value "N/A"
-            None,                // row 3 is shredded, so no value
-        ]);
-
-        // Create fixed size binary array with 3-byte values
-        let data = vec![
-            1u8, 2u8, 3u8, // row 0 is shredded
-            0u8, 0u8, 0u8, // row 1 is null (value doesn't matter)
-            0u8, 0u8, 0u8, // row 2 is a string (value doesn't matter)
-            4u8, 5u8, 6u8, // row 3 is shredded
-        ];
-        let typed_value_nulls = arrow::buffer::NullBuffer::from(vec![
-            true,  // row 0 has value
-            false, // row 1 is null
-            false, // row 2 is string
-            true,  // row 3 has value
-        ]);
-        let typed_value = FixedSizeBinaryArray::try_new(
-            3, // byte width
-            arrow::buffer::Buffer::from(data),
-            Some(typed_value_nulls),
-        )
-        .expect("should create fixed size binary array");
-
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata), false)
-            .with_field("typed_value", Arc::new(typed_value), true)
-            .with_field("value", Arc::new(values), true)
-            .with_nulls(nulls)
-            .build();
-
-        Arc::new(struct_array)
-    }
-
     /// Return a VariantArray that represents a partially "shredded" variant 
for UTF8
     fn partially_shredded_utf8_variant_array() -> ArrayRef {
         let (metadata, string_value) = {
diff --git a/parquet/tests/variant_integration.rs 
b/parquet/tests/variant_integration.rs
index 01ae4175c4..9f202f4db8 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -120,10 +120,7 @@ variant_test_case!(39);
 variant_test_case!(40, "Unsupported typed_value type: List(");
 variant_test_case!(41, "Unsupported typed_value type: List(");
 // Is an error case (should be failing as the expected error message indicates)
-variant_test_case!(
-    42,
-    "Expected an error 'Invalid variant, conflicting value and typed_value`, 
but got no error"
-);
+variant_test_case!(42, "Invalid variant, conflicting value and typed_value");
 // https://github.com/apache/arrow-rs/issues/8336
 variant_test_case!(43, "Unsupported typed_value type: Struct(");
 variant_test_case!(44, "Unsupported typed_value type: Struct(");
@@ -173,6 +170,7 @@ variant_test_case!(84, "Unsupported typed_value type: 
Struct(");
 variant_test_case!(85, "Unsupported typed_value type: List(");
 variant_test_case!(86, "Unsupported typed_value type: List(");
 // Is an error case (should be failing as the expected error message indicates)
+// TODO: Once structs are supported, expect "Invalid variant, non-object value 
with shredded fields"
 variant_test_case!(87, "Unsupported typed_value type: Struct(");
 variant_test_case!(88, "Unsupported typed_value type: List(");
 variant_test_case!(89);
@@ -214,13 +212,11 @@ variant_test_case!(124);
 variant_test_case!(125, "Unsupported typed_value type: Struct");
 variant_test_case!(126, "Unsupported typed_value type: List(");
 // Is an error case (should be failing as the expected error message indicates)
-variant_test_case!(
-    127,
-    "Invalid variant data: InvalidArgumentError(\"Received empty bytes\")"
-);
+variant_test_case!(127, "Illegal shredded value type: UInt32");
 // Is an error case (should be failing as the expected error message indicates)
+// TODO: Once structs are supported, expect "Invalid variant, non-object value 
with shredded fields"
 variant_test_case!(128, "Unsupported typed_value type: Struct(");
-variant_test_case!(129, "Invalid variant data: InvalidArgumentError(");
+variant_test_case!(129);
 variant_test_case!(130, "Unsupported typed_value type: Struct(");
 variant_test_case!(131);
 variant_test_case!(132, "Unsupported typed_value type: Struct(");
@@ -228,7 +224,7 @@ variant_test_case!(133, "Unsupported typed_value type: 
Struct(");
 variant_test_case!(134, "Unsupported typed_value type: Struct(");
 variant_test_case!(135);
 variant_test_case!(136, "Unsupported typed_value type: List(");
-variant_test_case!(137, "Invalid variant data: InvalidArgumentError(");
+variant_test_case!(137, "Illegal shredded value type: FixedSizeBinary(4)");
 variant_test_case!(138, "Unsupported typed_value type: Struct(");
 
 /// Test case definition structure matching the format from

Reply via email to