This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 322f9ce681 [Variant] Add unshred_variant support for Binary and
LargeBinary types (#9576)
322f9ce681 is described below
commit 322f9ce681ed51aa0c99b6517d5f43b7279ecc52
Author: Kunal <[email protected]>
AuthorDate: Fri Mar 20 19:26:43 2026 +0530
[Variant] Add unshred_variant support for Binary and LargeBinary types
(#9576)
# Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
-->
- Closes #9526
# Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
`shred_variant` already supports Binary and LargeBinary types (#9525,
#9554), but unshred_variant does not handle these types. This means
shredded Binary/LargeBinary columns cannot be converted back to
unshredded VariantArrays.
# What changes are included in this PR?
Adds unshred_variant support for DataType::Binary and
DataType::LargeBinary in parquet-variant-compute/src/unshred_variant.rs:
- New enum variants PrimitiveBinary and PrimitiveLargeBinary
- Match arms in append_row and try_new_opt
- AppendToVariantBuilder impls for BinaryArray and LargeBinaryArray
# Are these changes tested?
Yes
# Are there any user-facing changes?
No breaking changes
---------
Signed-off-by: Kunal Singh Dadhwal <[email protected]>
---
parquet-variant-compute/src/unshred_variant.rs | 62 ++++++++++++++++++++++++--
1 file changed, 58 insertions(+), 4 deletions(-)
diff --git a/parquet-variant-compute/src/unshred_variant.rs
b/parquet-variant-compute/src/unshred_variant.rs
index cfe4134600..2df36fa63f 100644
--- a/parquet-variant-compute/src/unshred_variant.rs
+++ b/parquet-variant-compute/src/unshred_variant.rs
@@ -19,9 +19,9 @@
use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
use arrow::array::{
- Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
FixedSizeListArray,
- GenericListArray, GenericListViewArray, LargeStringArray, ListLikeArray,
PrimitiveArray,
- StringArray, StringViewArray, StructArray,
+ Array, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray,
FixedSizeBinaryArray,
+ FixedSizeListArray, GenericListArray, GenericListViewArray,
LargeBinaryArray, LargeStringArray,
+ ListLikeArray, PrimitiveArray, StringArray, StringViewArray, StructArray,
};
use arrow::buffer::NullBuffer;
use arrow::datatypes::{
@@ -107,7 +107,9 @@ enum UnshredVariantRowBuilder<'a> {
PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>),
PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>),
+ PrimitiveBinary(UnshredPrimitiveRowBuilder<'a, BinaryArray>),
PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
+ PrimitiveLargeBinary(UnshredPrimitiveRowBuilder<'a, LargeBinaryArray>),
PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
@@ -150,7 +152,9 @@ impl<'a> UnshredVariantRowBuilder<'a> {
Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
Self::PrimitiveStringView(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveLargeString(b) => b.append_row(builder, metadata,
index),
+ Self::PrimitiveBinary(b) => b.append_row(builder, metadata, index),
Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata,
index),
+ Self::PrimitiveLargeBinary(b) => b.append_row(builder, metadata,
index),
Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
Self::List(b) => b.append_row(builder, metadata, index),
Self::LargeList(b) => b.append_row(builder, metadata, index),
@@ -232,7 +236,9 @@ impl<'a> UnshredVariantRowBuilder<'a> {
DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
DataType::Utf8View => primitive_builder!(PrimitiveStringView,
as_string_view),
DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString,
as_string),
+ DataType::Binary => primitive_builder!(PrimitiveBinary, as_binary),
DataType::BinaryView => primitive_builder!(PrimitiveBinaryView,
as_binary_view),
+ DataType::LargeBinary => primitive_builder!(PrimitiveLargeBinary,
as_binary),
DataType::FixedSizeBinary(16) => {
primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
}
@@ -413,7 +419,9 @@ impl_append_to_variant_builder!(BooleanArray);
impl_append_to_variant_builder!(StringArray);
impl_append_to_variant_builder!(StringViewArray);
impl_append_to_variant_builder!(LargeStringArray);
+impl_append_to_variant_builder!(BinaryArray);
impl_append_to_variant_builder!(BinaryViewArray);
+impl_append_to_variant_builder!(LargeBinaryArray);
impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
@@ -675,7 +683,9 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L>
{
#[cfg(test)]
mod tests {
use crate::VariantArray;
- use arrow::array::{BinaryViewArray, LargeStringArray, StringViewArray};
+ use arrow::array::{
+ BinaryArray, BinaryViewArray, LargeBinaryArray, LargeStringArray,
StringViewArray,
+ };
use parquet_variant::Variant;
#[test]
@@ -720,4 +730,48 @@ mod tests {
assert_eq!(result.value(1), Variant::from("middle"));
assert_eq!(result.value(2), Variant::from("world"));
}
+
+ #[test]
+ fn test_unshred_binary_typed_value() {
+ let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+ let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes;
3]);
+
+ let typed_value: arrow::array::ArrayRef =
+ std::sync::Arc::new(BinaryArray::from_iter_values(vec![
+ &b"\x00\x01\x02"[..],
+ &b"\xff\xaa"[..],
+ &b"\xde\xad\xbe\xef"[..],
+ ]));
+
+ let variant_array = VariantArray::from_parts(metadata, None,
Some(typed_value), None);
+
+ let result = crate::unshred_variant(&variant_array).unwrap();
+
+ assert_eq!(result.len(), 3);
+ assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
+ assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
+ assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
+ }
+
+ #[test]
+ fn test_unshred_largebinary_typed_value() {
+ let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+ let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes;
3]);
+
+ let typed_value: arrow::array::ArrayRef =
+ std::sync::Arc::new(LargeBinaryArray::from_iter_values(vec![
+ &b"\x00\x01\x02"[..],
+ &b"\xff\xaa"[..],
+ &b"\xde\xad\xbe\xef"[..],
+ ]));
+
+ let variant_array = VariantArray::from_parts(metadata, None,
Some(typed_value), None);
+
+ let result = crate::unshred_variant(&variant_array).unwrap();
+
+ assert_eq!(result.len(), 3);
+ assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
+ assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
+ assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
+ }
}