This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 1f77ac51c7 [Variant] Support Variant to PrimitiveArrow for unsigned
integer (#8369)
1f77ac51c7 is described below
commit 1f77ac51c760108ec6263c30666d6581955da336
Author: Congxian Qiu <[email protected]>
AuthorDate: Thu Sep 18 03:02:16 2025 +0800
[Variant] Support Variant to PrimitiveArrow for unsigned integer (#8369)
# Which issue does this PR close?
- Closes #8368 .
# Rationale for this change
- Add support for variant to arrow primitive about unsigned integers
- Add tests for signed & unsigned integers for variant to arrow
primitive
# Are these changes tested?
Covered by added unit tests
# Are there any user-facing changes?
No
If there are any breaking changes to public APIs, please call them out.
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet-variant-compute/src/type_conversion.rs | 24 +++++++
parquet-variant-compute/src/variant_get.rs | 85 ++++++++++++++++++-------
parquet-variant-compute/src/variant_to_arrow.rs | 32 ++++++++++
3 files changed, 118 insertions(+), 23 deletions(-)
diff --git a/parquet-variant-compute/src/type_conversion.rs
b/parquet-variant-compute/src/type_conversion.rs
index 74a17b4685..ccecd510f6 100644
--- a/parquet-variant-compute/src/type_conversion.rs
+++ b/parquet-variant-compute/src/type_conversion.rs
@@ -74,6 +74,30 @@ impl VariantAsPrimitive<datatypes::Float64Type> for
Variant<'_, '_> {
}
}
+impl VariantAsPrimitive<datatypes::UInt8Type> for Variant<'_, '_> {
+ fn as_primitive(&self) -> Option<u8> {
+ self.as_u8()
+ }
+}
+
+impl VariantAsPrimitive<datatypes::UInt16Type> for Variant<'_, '_> {
+ fn as_primitive(&self) -> Option<u16> {
+ self.as_u16()
+ }
+}
+
+impl VariantAsPrimitive<datatypes::UInt32Type> for Variant<'_, '_> {
+ fn as_primitive(&self) -> Option<u32> {
+ self.as_u32()
+ }
+}
+
+impl VariantAsPrimitive<datatypes::UInt64Type> for Variant<'_, '_> {
+ fn as_primitive(&self) -> Option<u64> {
+ self.as_u64()
+ }
+}
+
/// Convert the value at a specific index in the given array into a `Variant`.
macro_rules! non_generic_conversion_single_value {
($array:expr, $cast_fn:expr, $index:expr) => {{
diff --git a/parquet-variant-compute/src/variant_get.rs
b/parquet-variant-compute/src/variant_get.rs
index 5cd3c094e2..9d32c7f5a6 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -304,6 +304,7 @@ mod test {
};
use arrow::buffer::NullBuffer;
use arrow::compute::CastOptions;
+ use arrow::datatypes::DataType::{Int16, Int32, Int64, UInt16, UInt32,
UInt64, UInt8};
use arrow_schema::{DataType, Field, FieldRef, Fields};
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
@@ -661,19 +662,6 @@ mod test {
numeric_perfectly_shredded_test!(f64,
perfectly_shredded_float64_variant_array);
}
- /// Shredding: Extract the typed value as Int32Array
- #[test]
- fn get_variant_perfectly_shredded_int32_as_int32() {
- // Extract the typed value as Int32Array
- let array = perfectly_shredded_int32_variant_array();
- // specify we want the typed value as Int32
- let field = Field::new("typed_value", DataType::Int32, true);
- let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
- let result = variant_get(&array, options).unwrap();
- let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1),
Some(2), Some(3)]));
- assert_eq!(&result, &expected)
- }
-
/// AllNull: extract a value as a VariantArray
#[test]
fn get_variant_all_null_as_variant() {
@@ -708,18 +696,69 @@ mod test {
assert_eq!(&result, &expected)
}
- #[test]
- fn get_variant_perfectly_shredded_int16_as_int16() {
- // Extract the typed value as Int16Array
- let array = perfectly_shredded_int16_variant_array();
- // specify we want the typed value as Int16
- let field = Field::new("typed_value", DataType::Int16, true);
- let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
- let result = variant_get(&array, options).unwrap();
- let expected: ArrayRef = Arc::new(Int16Array::from(vec![Some(1),
Some(2), Some(3)]));
- assert_eq!(&result, &expected)
+ macro_rules! perfectly_shredded_to_arrow_primitive_test {
+ ($name:ident, $primitive_type:ident,
$perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
+ #[test]
+ fn $name() {
+ let array = $perfectly_shredded_array_gen_fun();
+ let field = Field::new("typed_value", $primitive_type, true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&array, options).unwrap();
+ let expected_array: ArrayRef = Arc::new($expected_array);
+ assert_eq!(&result, &expected_array);
+ }
+ };
}
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_int16_as_int16,
+ Int16,
+ perfectly_shredded_int16_variant_array,
+ Int16Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_int32_as_int32,
+ Int32,
+ perfectly_shredded_int32_variant_array,
+ Int32Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_int64_as_int64,
+ Int64,
+ perfectly_shredded_int64_variant_array,
+ Int64Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_uint8_as_int8,
+ UInt8,
+ perfectly_shredded_uint8_variant_array,
+ UInt8Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_uint16_as_uint16,
+ UInt16,
+ perfectly_shredded_uint16_variant_array,
+ UInt16Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_uint32_as_uint32,
+ UInt32,
+ perfectly_shredded_uint32_variant_array,
+ UInt32Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
+ perfectly_shredded_to_arrow_primitive_test!(
+ get_variant_perfectly_shredded_uint64_as_uint64,
+ UInt64,
+ perfectly_shredded_uint64_variant_array,
+ UInt64Array::from(vec![Some(1), Some(2), Some(3)])
+ );
+
/// Return a VariantArray that represents a perfectly "shredded" variant
/// for the given typed value.
///
diff --git a/parquet-variant-compute/src/variant_to_arrow.rs
b/parquet-variant-compute/src/variant_to_arrow.rs
index 60f74e365d..115a6a42be 100644
--- a/parquet-variant-compute/src/variant_to_arrow.rs
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -39,6 +39,10 @@ pub(crate) enum VariantToArrowRowBuilder<'a> {
Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
+ UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
+ UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
+ UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
+ UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
// Path extraction wrapper - contains a boxed enum for any of the above
@@ -53,6 +57,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
Int16(b) => b.append_null(),
Int32(b) => b.append_null(),
Int64(b) => b.append_null(),
+ UInt8(b) => b.append_null(),
+ UInt16(b) => b.append_null(),
+ UInt32(b) => b.append_null(),
+ UInt64(b) => b.append_null(),
Float16(b) => b.append_null(),
Float32(b) => b.append_null(),
Float64(b) => b.append_null(),
@@ -68,6 +76,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
Int16(b) => b.append_value(value),
Int32(b) => b.append_value(value),
Int64(b) => b.append_value(value),
+ UInt8(b) => b.append_value(value),
+ UInt16(b) => b.append_value(value),
+ UInt32(b) => b.append_value(value),
+ UInt64(b) => b.append_value(value),
Float16(b) => b.append_value(value),
Float32(b) => b.append_value(value),
Float64(b) => b.append_value(value),
@@ -83,6 +95,10 @@ impl<'a> VariantToArrowRowBuilder<'a> {
Int16(b) => b.finish(),
Int32(b) => b.finish(),
Int64(b) => b.finish(),
+ UInt8(b) => b.finish(),
+ UInt16(b) => b.finish(),
+ UInt32(b) => b.finish(),
+ UInt64(b) => b.finish(),
Float16(b) => b.finish(),
Float32(b) => b.finish(),
Float64(b) => b.finish(),
@@ -132,6 +148,22 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>(
cast_options,
capacity,
)),
+ Some(DataType::UInt8) => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ Some(DataType::UInt16) =>
UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ Some(DataType::UInt32) =>
UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ Some(DataType::UInt64) =>
UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
_ => {
return Err(ArrowError::NotYetImplemented(format!(
"variant_get with path={:?} and data_type={:?} not yet
implemented",