This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 7ac9db7f62 [Variant] [Shredding] feat: Support typed_access for Date32
(#8379)
7ac9db7f62 is described below
commit 7ac9db7f62946bda26344dc2cdb0292a31996abd
Author: Li Jiaying <[email protected]>
AuthorDate: Fri Sep 19 15:04:12 2025 -0400
[Variant] [Shredding] feat: Support typed_access for Date32 (#8379)
# Which issue does this PR close?
- Closes #8330.
# Rationale for this change
# What changes are included in this PR?
# Are these changes tested?
Yes
# Are there any user-facing changes?
N/A
---
parquet-variant-compute/src/variant_array.rs | 10 +++-
parquet-variant-compute/src/variant_get.rs | 73 ++++++++++++++++++++++++++--
parquet/tests/variant_integration.rs | 5 +-
3 files changed, 80 insertions(+), 8 deletions(-)
diff --git a/parquet-variant-compute/src/variant_array.rs
b/parquet-variant-compute/src/variant_array.rs
index 4abffa65c2..faaa1611ef 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -21,8 +21,8 @@ use crate::type_conversion::primitive_conversion_single_value;
use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray,
StructArray};
use arrow::buffer::NullBuffer;
use arrow::datatypes::{
- Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
Int8Type, UInt16Type,
- UInt32Type, UInt64Type, UInt8Type,
+ Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type, Int8Type,
+ UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
use parquet_variant::Uuid;
@@ -556,6 +556,12 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index:
usize) -> Variant<'_, '
let value = boolean_array.value(index);
Variant::from(value)
}
+ DataType::Date32 => {
+ let array = typed_value.as_primitive::<Date32Type>();
+ let value = array.value(index);
+ let date = Date32Type::to_naive_date(value);
+ Variant::from(date)
+ }
DataType::FixedSizeBinary(binary_len) => {
let array = typed_value.as_fixed_size_binary();
// Try to treat 16 byte FixedSizeBinary as UUID
diff --git a/parquet-variant-compute/src/variant_get.rs
b/parquet-variant-compute/src/variant_get.rs
index 0e11168516..8bb34166ae 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -303,9 +303,9 @@ mod test {
use std::sync::Arc;
use arrow::array::{
- Array, ArrayRef, BinaryViewArray, Float16Array, Float32Array,
Float64Array, Int16Array,
- Int32Array, Int64Array, Int8Array, StringArray, StructArray,
UInt16Array, UInt32Array,
- UInt64Array, UInt8Array,
+ Array, ArrayRef, BinaryViewArray, Date32Array, Float16Array,
Float32Array, Float64Array,
+ Int16Array, Int32Array, Int64Array, Int8Array, StringArray,
StructArray, UInt16Array,
+ UInt32Array, UInt64Array, UInt8Array,
};
use arrow::buffer::NullBuffer;
use arrow::compute::CastOptions;
@@ -531,6 +531,26 @@ mod test {
assert_eq!(result.value(3), Variant::from("world"));
}
+ #[test]
+ fn get_variant_partially_shredded_date32_as_variant() {
+ let array = partially_shredded_date32_variant_array();
+ let options = GetOptions::new();
+ let result = variant_get(&array, options).unwrap();
+
+ // expect the result is a VariantArray
+ let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+ assert_eq!(result.len(), 4);
+
+ // Expect the values are the same as the original values
+ use chrono::NaiveDate;
+ let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
+ let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
+ assert_eq!(result.value(0), Variant::from(date1));
+ assert!(!result.is_valid(1));
+ assert_eq!(result.value(2), Variant::from("n/a"));
+ assert_eq!(result.value(3), Variant::from(date2));
+ }
+
#[test]
fn get_variant_partially_shredded_binary_view_as_variant() {
let array = partially_shredded_binary_view_variant_array();
@@ -1143,6 +1163,53 @@ mod test {
)
}
+ /// Return a VariantArray that represents a partially "shredded" variant
for Date32
+ fn partially_shredded_date32_variant_array() -> ArrayRef {
+ let (metadata, string_value) = {
+ let mut builder = parquet_variant::VariantBuilder::new();
+ builder.append_value("n/a");
+ builder.finish()
+ };
+
+ // Create the null buffer for the overall array
+ let nulls = NullBuffer::from(vec![
+ true, // row 0 non null
+ false, // row 1 is null
+ true, // row 2 non null
+ true, // row 3 non null
+ ]);
+
+ // metadata is the same for all rows
+ let metadata =
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+ // See
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
+ // about why row1 is an empty but non null, value.
+ let values = BinaryViewArray::from(vec![
+ None, // row 0 is shredded, so no value
+ Some(b"" as &[u8]), // row 1 is null, so empty value
+ Some(&string_value), // copy the string value "N/A"
+ None, // row 3 is shredded, so no value
+ ]);
+
+ let typed_value = Date32Array::from(vec![
+ Some(20348), // row 0 is shredded, 2025-09-17
+ None, // row 1 is null
+ None, // row 2 is a string, not a date
+ Some(20340), // row 3 is shredded, 2025-09-09
+ ]);
+
+ let struct_array = StructArrayBuilder::new()
+ .with_field("metadata", Arc::new(metadata), true)
+ .with_field("typed_value", Arc::new(typed_value), true)
+ .with_field("value", Arc::new(values), true)
+ .with_nulls(nulls)
+ .build();
+
+ Arc::new(
+ VariantArray::try_new(Arc::new(struct_array)).expect("should
create variant array"),
+ )
+ }
+
/// Return a VariantArray that represents a partially "shredded" variant
for BinaryView
fn partially_shredded_binary_view_variant_array() -> ArrayRef {
let (metadata, string_value) = {
diff --git a/parquet/tests/variant_integration.rs
b/parquet/tests/variant_integration.rs
index 97fb6b8801..ebce056cc4 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -92,9 +92,8 @@ variant_test_case!(14);
variant_test_case!(15);
variant_test_case!(16);
variant_test_case!(17);
-// https://github.com/apache/arrow-rs/issues/8330
-variant_test_case!(18, "Unsupported typed_value type: Date32");
-variant_test_case!(19, "Unsupported typed_value type: Date32");
+variant_test_case!(18);
+variant_test_case!(19);
// https://github.com/apache/arrow-rs/issues/8331
variant_test_case!(
20,