This is an automated email from the ASF dual-hosted git repository.
etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0b044835a8 support string view unshred variant (#9514)
0b044835a8 is described below
commit 0b044835a8180100c89b60d856e9f67634b5d5e7
Author: Matthew Kim <[email protected]>
AuthorDate: Mon Mar 9 14:41:30 2026 -0400
support string view unshred variant (#9514)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/9512
# Rationale for this change
You can build a Variant with a StringView type shredded out, but calling
`unshred_variant` will fail with not yet implemented
---
parquet-variant-compute/src/unshred_variant.rs | 51 +++++++++++++++++---------
1 file changed, 33 insertions(+), 18 deletions(-)
diff --git a/parquet-variant-compute/src/unshred_variant.rs
b/parquet-variant-compute/src/unshred_variant.rs
index 0fba53b315..cfe4134600 100644
--- a/parquet-variant-compute/src/unshred_variant.rs
+++ b/parquet-variant-compute/src/unshred_variant.rs
@@ -21,7 +21,7 @@ use crate::{BorrowedShreddingState, VariantArray,
VariantValueArrayBuilder};
use arrow::array::{
Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
FixedSizeListArray,
GenericListArray, GenericListViewArray, LargeStringArray, ListLikeArray,
PrimitiveArray,
- StringArray, StructArray,
+ StringArray, StringViewArray, StructArray,
};
use arrow::buffer::NullBuffer;
use arrow::datatypes::{
@@ -105,6 +105,7 @@ enum UnshredVariantRowBuilder<'a> {
TimestampNanosecond(TimestampUnshredRowBuilder<'a,
TimestampNanosecondType>),
PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
+ PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>),
PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>),
PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
@@ -147,6 +148,7 @@ impl<'a> UnshredVariantRowBuilder<'a> {
Self::TimestampNanosecond(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveBoolean(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
+ Self::PrimitiveStringView(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveLargeString(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
@@ -228,6 +230,7 @@ impl<'a> UnshredVariantRowBuilder<'a> {
}
DataType::Boolean => primitive_builder!(PrimitiveBoolean,
as_boolean),
DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
+ DataType::Utf8View => primitive_builder!(PrimitiveStringView,
as_string_view),
DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString,
as_string),
DataType::BinaryView => primitive_builder!(PrimitiveBinaryView,
as_binary_view),
DataType::FixedSizeBinary(16) => {
@@ -408,6 +411,7 @@ macro_rules! impl_append_to_variant_builder {
impl_append_to_variant_builder!(BooleanArray);
impl_append_to_variant_builder!(StringArray);
+impl_append_to_variant_builder!(StringViewArray);
impl_append_to_variant_builder!(LargeStringArray);
impl_append_to_variant_builder!(BinaryViewArray);
impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
@@ -668,35 +672,46 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a,
L> {
}
}
-// TODO: This code is covered by tests in
`parquet/tests/variant_integration.rs`. Does that suffice?
-// Or do we also need targeted stand-alone unit tests for full coverage?
-
#[cfg(test)]
mod tests {
use crate::VariantArray;
- use arrow::array::{BinaryViewArray, LargeStringArray};
+ use arrow::array::{BinaryViewArray, LargeStringArray, StringViewArray};
use parquet_variant::Variant;
+ #[test]
+ fn test_unshred_utf8view_typed_value() {
+ let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+ let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes;
3]);
+
+ let typed_value: arrow::array::ArrayRef =
std::sync::Arc::new(StringViewArray::from(vec![
+ Some("hello"),
+ Some("middle"),
+ Some("world"),
+ ]));
+
+ let variant_array = VariantArray::from_parts(metadata, None,
Some(typed_value), None);
+
+ let result = crate::unshred_variant(&variant_array).unwrap();
+
+ assert_eq!(result.len(), 3);
+ assert_eq!(result.value(0), Variant::from("hello"));
+ assert_eq!(result.value(1), Variant::from("middle"));
+ assert_eq!(result.value(2), Variant::from("world"));
+ }
+
#[test]
fn test_unshred_largeutf8_typed_value() {
let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
- let metadata =
- BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
+ let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes;
3]);
- let typed_value: arrow::array::ArrayRef = std::sync::Arc::new(
- LargeStringArray::from(vec![
+ let typed_value: arrow::array::ArrayRef =
+ std::sync::Arc::new(LargeStringArray::from(vec![
Some("hello"),
Some("middle"),
Some("world"),
- ]),
- );
-
- let variant_array = VariantArray::from_parts(
- metadata,
- None,
- Some(typed_value),
- None,
- );
+ ]));
+
+ let variant_array = VariantArray::from_parts(metadata, None,
Some(typed_value), None);
let result = crate::unshred_variant(&variant_array).unwrap();