klion26 commented on code in PR #9518:
URL: https://github.com/apache/arrow-rs/pull/9518#discussion_r2916360301
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -463,51 +465,87 @@ mod test {
};
}
+ /// Build the mixed input [typed, null, "n/a", typed] and let shred_variant
+ /// generate the shredded fixture for the requested type.
macro_rules! partially_shredded_variant_array_gen {
($func_name:ident, $typed_value_array_gen: expr) => {
fn $func_name() -> ArrayRef {
- // At the time of writing, the `VariantArrayBuilder` does not
support shredding.
- // so we must construct the array manually. see
https://github.com/apache/arrow-rs/issues/7895
- let (metadata, string_value) = {
- let mut builder = parquet_variant::VariantBuilder::new();
- builder.append_value("n/a");
- builder.finish()
- };
-
- let nulls = NullBuffer::from(vec![
- true, // row 0 non null
- false, // row 1 is null
- true, // row 2 non null
- true, // row 3 non null
- ]);
-
- // metadata is the same for all rows
- let metadata =
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
-
- // See
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
- // about why row1 is an empty but non null, value.
- let values = BinaryViewArray::from(vec![
- None, // row 0 is shredded, so no value
- Some(b"" as &[u8]), // row 1 is null, so empty value
(why?)
- Some(&string_value), // copy the string value "N/A"
- None, // row 3 is shredded, so no value
- ]);
-
- let typed_value = $typed_value_array_gen();
-
- let struct_array = StructArrayBuilder::new()
- .with_field("metadata", Arc::new(metadata), false)
- .with_field("typed_value", Arc::new(typed_value), true)
- .with_field("value", Arc::new(values), true)
- .with_nulls(nulls)
- .build();
- ArrayRef::from(
- VariantArray::try_new(&struct_array).expect("should create
variant array"),
- )
+ let typed_value: ArrayRef = Arc::new($typed_value_array_gen());
+ let typed_as_variant = cast_to_variant(typed_value.as_ref())
+ .expect("should cast typed array to variant");
+ let mut input_builder =
VariantArrayBuilder::new(typed_as_variant.len());
+ input_builder.append_variant(typed_as_variant.value(0));
+ input_builder.append_null();
+ input_builder.append_variant(Variant::from("n/a"));
+ input_builder.append_variant(typed_as_variant.value(3));
+
+ let variant_array = shred_variant(&input_builder.build(),
typed_value.data_type())
+ .expect("should shred variant array");
+ ArrayRef::from(variant_array)
}
};
}
+ // Fixture definitions grouped with the partially-shredded tests.
+ macro_rules! numeric_partially_shredded_variant_array_fn {
+ ($func:ident, $array_type:ident, $primitive_type:ty) => {
+ partially_shredded_variant_array_gen!($func, ||
$array_type::from(vec![
+ Some(<$primitive_type>::try_from(34u8).unwrap()),
+ None,
+ None,
+ Some(<$primitive_type>::try_from(100u8).unwrap()),
+ ]));
+ };
+ }
+
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_int8_variant_array,
+ Int8Array,
+ i8
+ );
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_int16_variant_array,
+ Int16Array,
+ i16
+ );
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_int32_variant_array,
+ Int32Array,
+ i32
+ );
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_int64_variant_array,
+ Int64Array,
+ i64
+ );
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_float32_variant_array,
+ Float32Array,
+ f32
+ );
+ numeric_partially_shredded_variant_array_fn!(
+ partially_shredded_float64_variant_array,
+ Float64Array,
+ f64
+ );
+
+
partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, ||
{
+ arrow::array::BooleanArray::from(vec![Some(true), None, None,
Some(false)])
+ });
+
+
partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, ||
{
Review Comment:
IIUC, this will generate the data that(the second is invalid) `typed_value`
contains `hello`, `n/a`, and `world`(Variant::from("n/a") in the macro will
also be shredded into `typed_value`) , there is no item located in `value`,
maybe we can improve this case to satisfy that there are some items in
`typed_value` and some in `value`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]