This is an automated email from the ASF dual-hosted git repository.
mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 686d0851a4 [Variant] ReadOnlyMetadataBuilder borrows its underlying
VariantMetadata (#8502)
686d0851a4 is described below
commit 686d0851a4aaeb556b7a1b342d82e9e8ec678cc4
Author: Ryan Johnson <[email protected]>
AuthorDate: Tue Sep 30 02:08:04 2025 -0600
[Variant] ReadOnlyMetadataBuilder borrows its underlying VariantMetadata
(#8502)
# Which issue does this PR close?
- Relates to https://github.com/apache/arrow-rs/issues/8336
# Rationale for this change
While pathfinding variant unshredding code, I noticed that there's a lot
of variant metadata cloning going on. It's a 32-byte object, so cloning
is not free. And it turns out that ~all read-only metadata builder usage
in the code base can work just as easily with a borrowed reference
because ultimately we're working with bytes borrowed from a binary view
that holds the metadata column.
# What changes are included in this PR?
Update the `ReadOnlyMetadataBuilder` to borrow the `VariantMetadata` it
uses, and update call sites accordingly.
# Are these changes tested?
Yes, the change affects several unit tests.
# Are there any user-facing changes?
Signature of `ReadOnlyMetadataBuilder::new` has changed.
---
parquet-variant-compute/src/shred_variant.rs | 4 ++--
parquet-variant-compute/src/variant_array_builder.rs | 11 ++++++-----
parquet-variant/src/builder.rs | 16 ++++++++--------
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/parquet-variant-compute/src/shred_variant.rs
b/parquet-variant-compute/src/shred_variant.rs
index 568eb1b6af..4c5a3c3f8a 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -267,7 +267,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
};
// Route the object's fields by name as either shredded or unshredded
- let mut builder =
self.value_builder.builder_ext(value.metadata().clone());
+ let mut builder = self.value_builder.builder_ext(value.metadata());
let mut object_builder = builder.try_new_object()?;
let mut seen = std::collections::HashSet::new();
let mut partially_shredded = false;
@@ -794,7 +794,7 @@ mod tests {
let object_with_foo_field = |i| {
use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
let metadata = VariantMetadata::new(metadata.value(i));
- let mut metadata_builder =
ReadOnlyMetadataBuilder::new(metadata.clone());
+ let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
let mut value_builder = ValueBuilder::new();
let state = ParentState::variant(&mut value_builder, &mut
metadata_builder);
ObjectBuilder::new(state, false)
diff --git a/parquet-variant-compute/src/variant_array_builder.rs
b/parquet-variant-compute/src/variant_array_builder.rs
index 80e6c0a2a0..29e135a8ff 100644
--- a/parquet-variant-compute/src/variant_array_builder.rs
+++ b/parquet-variant-compute/src/variant_array_builder.rs
@@ -295,7 +295,8 @@ impl VariantValueArrayBuilder {
/// builder.append_value(Variant::from(42));
/// ```
pub fn append_value(&mut self, value: Variant<'_, '_>) {
- self.builder_ext(value.metadata().clone())
+ // NOTE: Have to clone because the builder consumes `value`
+ self.builder_ext(&value.metadata().clone())
.append_value(value);
}
@@ -313,7 +314,7 @@ impl VariantValueArrayBuilder {
/// let Variant::Object(obj) = value else {
/// panic!("Not a variant object");
/// };
- /// let mut metadata_builder =
ReadOnlyMetadataBuilder::new(obj.metadata.clone());
+ /// let mut metadata_builder = ReadOnlyMetadataBuilder::new(&obj.metadata);
/// let state = value_array_builder.parent_state(&mut metadata_builder);
/// let mut object_builder = ObjectBuilder::new(state, false);
/// for (field_name, field_value) in obj.iter() {
@@ -339,7 +340,7 @@ impl VariantValueArrayBuilder {
/// parameter (similar to the way [`parquet_variant::ObjectFieldBuilder`]
hides field names).
pub fn builder_ext<'a>(
&'a mut self,
- metadata: VariantMetadata<'a>,
+ metadata: &'a VariantMetadata<'a>,
) -> VariantValueArrayBuilderExt<'a> {
VariantValueArrayBuilderExt {
metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
@@ -548,7 +549,7 @@ mod test {
// filtering fields takes more work because we need to manually create
an object builder
let value = array.value(1);
- let mut builder = value_builder.builder_ext(value.metadata().clone());
+ let mut builder = value_builder.builder_ext(value.metadata());
builder
.new_object()
.with_field("name", value.get_object_field("name").unwrap())
@@ -557,7 +558,7 @@ mod test {
// same bytes, but now nested and duplicated inside a list
let value = array.value(2);
- let mut builder = value_builder.builder_ext(value.metadata().clone());
+ let mut builder = value_builder.builder_ext(value.metadata());
builder
.new_list()
.with_value(value.clone())
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index 95a30c206d..17455fc4bf 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -498,7 +498,7 @@ impl MetadataBuilder for WritableMetadataBuilder {
/// time `finish` is called, a different trait impl will be needed.
#[derive(Debug)]
pub struct ReadOnlyMetadataBuilder<'m> {
- metadata: VariantMetadata<'m>,
+ metadata: &'m VariantMetadata<'m>,
// A cache that tracks field names this builder has already seen, because
finding the field id
// for a given field name is expensive -- O(n) for a large and unsorted
metadata dictionary.
known_field_names: HashMap<&'m str, u32>,
@@ -506,7 +506,7 @@ pub struct ReadOnlyMetadataBuilder<'m> {
impl<'m> ReadOnlyMetadataBuilder<'m> {
/// Creates a new read-only metadata builder from the given metadata
dictionary.
- pub fn new(metadata: VariantMetadata<'m>) -> Self {
+ pub fn new(metadata: &'m VariantMetadata<'m>) -> Self {
Self {
metadata,
known_field_names: HashMap::new(),
@@ -3357,7 +3357,7 @@ mod tests {
// Use the metadata to build new variant values
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
- let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
+ let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
let mut value_builder = ValueBuilder::new();
{
@@ -3390,7 +3390,7 @@ mod tests {
// Use the metadata to build new variant values
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
- let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
+ let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
let mut value_builder = ValueBuilder::new();
{
@@ -3438,7 +3438,7 @@ mod tests {
// Copy using the new bytes API
let metadata = VariantMetadata::new(&metadata);
- let mut metadata = ReadOnlyMetadataBuilder::new(metadata);
+ let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
let mut builder2 = ValueBuilder::new();
let state = ParentState::variant(&mut builder2, &mut metadata);
ValueBuilder::append_variant_bytes(state, variant1);
@@ -3466,7 +3466,7 @@ mod tests {
// Create a new object copying subset of fields interleaved with new
ones
let metadata2 = VariantMetadata::new(&metadata1);
- let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+ let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
let mut builder2 = ValueBuilder::new();
let state = ParentState::variant(&mut builder2, &mut metadata2);
{
@@ -3530,7 +3530,7 @@ mod tests {
// Create a new list copying subset of elements interleaved with new
ones
let metadata2 = VariantMetadata::new(&metadata1);
- let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+ let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
let mut builder2 = ValueBuilder::new();
let state = ParentState::variant(&mut builder2, &mut metadata2);
{
@@ -3626,7 +3626,7 @@ mod tests {
// Create filtered/modified version: only copy active users and inject
new data
let metadata2 = VariantMetadata::new(&metadata1);
- let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+ let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
let mut builder2 = ValueBuilder::new();
let state = ParentState::variant(&mut builder2, &mut metadata2);
{