This is an automated email from the ASF dual-hosted git repository.
mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 477d69e799 [Variant] Remove unused metadata from variant
ShreddingState (#8355)
477d69e799 is described below
commit 477d69e799d4c0e465432e2b653adeae70f25a62
Author: Ryan Johnson <[email protected]>
AuthorDate: Tue Sep 16 07:34:30 2025 -0600
[Variant] Remove unused metadata from variant ShreddingState (#8355)
# Which issue does this PR close?
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
- Closes #NNN.
# Rationale for this change
Somehow (maybe due to a bad merge?), `ShreddingState` ended up with a
`metadata` field, even though nobody uses it. The top-level variant
array stores metadata directly, and shredded object fields don't have
metadata in the first place.
# What changes are included in this PR?
Remove the redundant arg.
# Are these changes tested?
Existing tests cover this code removal.
# Are there any user-facing changes?
No
---
parquet-variant-compute/src/variant_array.rs | 100 ++++++++-------------------
1 file changed, 27 insertions(+), 73 deletions(-)
diff --git a/parquet-variant-compute/src/variant_array.rs
b/parquet-variant-compute/src/variant_array.rs
index 050ba053cb..c3f3ad5413 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -129,7 +129,7 @@ impl VariantArray {
Ok(Self {
inner: inner.clone(),
metadata: metadata.clone(),
- shredding_state: ShreddingState::try_new(metadata.clone(), value,
typed_value)?,
+ shredding_state: ShreddingState::try_new(value, typed_value)?,
})
}
@@ -154,8 +154,7 @@ impl VariantArray {
// This would be a lot simpler if ShreddingState were just a pair of
Option... we already
// have everything we need.
let inner = builder.build();
- let shredding_state =
- ShreddingState::try_new(metadata.clone(), value,
typed_value).unwrap(); // valid by construction
+ let shredding_state = ShreddingState::try_new(value,
typed_value).unwrap(); // valid by construction
Self {
inner,
metadata,
@@ -222,7 +221,7 @@ impl VariantArray {
typed_value_to_variant(typed_value, index)
}
}
- ShreddingState::AllNull { .. } => {
+ ShreddingState::AllNull => {
// AllNull case: neither value nor typed_value fields exist
// NOTE: This handles the case where neither value nor
typed_value fields exist.
// For top-level variants, this returns Variant::Null (JSON
null).
@@ -325,14 +324,11 @@ impl ShreddedVariantFieldArray {
.and_then(|col| col.as_binary_view_opt().cloned());
let typed_value = inner_struct.column_by_name("typed_value").cloned();
- // Use a dummy metadata for the constructor (ShreddedVariantFieldArray
doesn't have metadata)
- let dummy_metadata =
arrow::array::BinaryViewArray::new_null(inner_struct.len());
-
// Note this clone is cheap, it just bumps the ref count
let inner = inner_struct.clone();
Ok(Self {
inner: inner.clone(),
- shredding_state: ShreddingState::try_new(dummy_metadata, value,
typed_value)?,
+ shredding_state: ShreddingState::try_new(value, typed_value)?,
})
}
@@ -432,16 +428,10 @@ impl Array for ShreddedVariantFieldArray {
#[derive(Debug)]
pub enum ShreddingState {
/// This variant has no typed_value field
- Unshredded {
- metadata: BinaryViewArray,
- value: BinaryViewArray,
- },
+ Unshredded { value: BinaryViewArray },
/// This variant has a typed_value field and no value field
/// meaning it is the shredded type
- Typed {
- metadata: BinaryViewArray,
- typed_value: ArrayRef,
- },
+ Typed { typed_value: ArrayRef },
/// Imperfectly shredded: Shredded values reside in `typed_value` while
those that failed to
/// shred reside in `value`. Missing field values are NULL in both
columns, while NULL primitive
/// values have NULL `typed_value` and `Variant::Null` in `value`.
@@ -453,7 +443,6 @@ pub enum ShreddingState {
/// the `value` is a variant object containing the subset of fields for
which shredding was
/// not even attempted.
PartiallyShredded {
- metadata: BinaryViewArray,
value: BinaryViewArray,
typed_value: ArrayRef,
},
@@ -463,38 +452,20 @@ pub enum ShreddingState {
/// Note: By strict spec interpretation, this should only be valid for
shredded object fields,
/// not top-level variants. However, we allow it and treat as
Variant::Null for pragmatic
/// handling of missing data.
- AllNull { metadata: BinaryViewArray },
+ AllNull,
}
impl ShreddingState {
/// try to create a new `ShreddingState` from the given fields
pub fn try_new(
- metadata: BinaryViewArray,
value: Option<BinaryViewArray>,
typed_value: Option<ArrayRef>,
) -> Result<Self, ArrowError> {
- match (metadata, value, typed_value) {
- (metadata, Some(value), Some(typed_value)) =>
Ok(Self::PartiallyShredded {
- metadata,
- value,
- typed_value,
- }),
- (metadata, Some(value), None) => Ok(Self::Unshredded { metadata,
value }),
- (metadata, None, Some(typed_value)) => Ok(Self::Typed {
- metadata,
- typed_value,
- }),
- (metadata, None, None) => Ok(Self::AllNull { metadata }),
- }
- }
-
- /// Return a reference to the metadata field
- pub fn metadata_field(&self) -> &BinaryViewArray {
- match self {
- ShreddingState::Unshredded { metadata, .. } => metadata,
- ShreddingState::Typed { metadata, .. } => metadata,
- ShreddingState::PartiallyShredded { metadata, .. } => metadata,
- ShreddingState::AllNull { metadata } => metadata,
+ match (value, typed_value) {
+ (Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded {
value, typed_value }),
+ (Some(value), None) => Ok(Self::Unshredded { value }),
+ (None, Some(typed_value)) => Ok(Self::Typed { typed_value }),
+ (None, None) => Ok(Self::AllNull),
}
}
@@ -504,7 +475,7 @@ impl ShreddingState {
ShreddingState::Unshredded { value, .. } => Some(value),
ShreddingState::Typed { .. } => None,
ShreddingState::PartiallyShredded { value, .. } => Some(value),
- ShreddingState::AllNull { .. } => None,
+ ShreddingState::AllNull => None,
}
}
@@ -514,36 +485,26 @@ impl ShreddingState {
ShreddingState::Unshredded { .. } => None,
ShreddingState::Typed { typed_value, .. } => Some(typed_value),
ShreddingState::PartiallyShredded { typed_value, .. } =>
Some(typed_value),
- ShreddingState::AllNull { .. } => None,
+ ShreddingState::AllNull => None,
}
}
/// Slice all the underlying arrays
pub fn slice(&self, offset: usize, length: usize) -> Self {
match self {
- ShreddingState::Unshredded { metadata, value } =>
ShreddingState::Unshredded {
- metadata: metadata.slice(offset, length),
+ ShreddingState::Unshredded { value } => ShreddingState::Unshredded
{
value: value.slice(offset, length),
},
- ShreddingState::Typed {
- metadata,
- typed_value,
- } => ShreddingState::Typed {
- metadata: metadata.slice(offset, length),
- typed_value: typed_value.slice(offset, length),
- },
- ShreddingState::PartiallyShredded {
- metadata,
- value,
- typed_value,
- } => ShreddingState::PartiallyShredded {
- metadata: metadata.slice(offset, length),
- value: value.slice(offset, length),
+ ShreddingState::Typed { typed_value } => ShreddingState::Typed {
typed_value: typed_value.slice(offset, length),
},
- ShreddingState::AllNull { metadata } => ShreddingState::AllNull {
- metadata: metadata.slice(offset, length),
- },
+ ShreddingState::PartiallyShredded { value, typed_value } => {
+ ShreddingState::PartiallyShredded {
+ value: value.slice(offset, length),
+ typed_value: typed_value.slice(offset, length),
+ }
+ }
+ ShreddingState::AllNull => ShreddingState::AllNull,
}
}
}
@@ -744,7 +705,7 @@ mod test {
// Verify the shredding state is AllNull
assert!(matches!(
variant_array.shredding_state(),
- ShreddingState::AllNull { .. }
+ ShreddingState::AllNull
));
// Verify that value() returns Variant::Null (compensating for spec
violation)
@@ -801,17 +762,10 @@ mod test {
#[test]
fn all_null_shredding_state() {
- let metadata = BinaryViewArray::from(vec![b"test" as &[u8]]);
- let shredding_state = ShreddingState::try_new(metadata.clone(), None,
None).unwrap();
+ let shredding_state = ShreddingState::try_new(None, None).unwrap();
// Verify the shredding state is AllNull
- assert!(matches!(shredding_state, ShreddingState::AllNull { .. }));
-
- // Verify metadata is preserved correctly
- if let ShreddingState::AllNull { metadata: m } = shredding_state {
- assert_eq!(m.len(), metadata.len());
- assert_eq!(m.value(0), metadata.value(0));
- }
+ assert!(matches!(shredding_state, ShreddingState::AllNull));
}
#[test]
@@ -827,7 +781,7 @@ mod test {
// Verify the shredding state is AllNull
assert!(matches!(
variant_array.shredding_state(),
- ShreddingState::AllNull { .. }
+ ShreddingState::AllNull
));
// Verify all values are null