This is an automated email from the ASF dual-hosted git repository.

mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 686d0851a4 [Variant] ReadOnlyMetadataBuilder borrows its underlying 
VariantMetadata (#8502)
686d0851a4 is described below

commit 686d0851a4aaeb556b7a1b342d82e9e8ec678cc4
Author: Ryan Johnson <[email protected]>
AuthorDate: Tue Sep 30 02:08:04 2025 -0600

    [Variant] ReadOnlyMetadataBuilder borrows its underlying VariantMetadata 
(#8502)
    
    # Which issue does this PR close?
    
    - Relates to https://github.com/apache/arrow-rs/issues/8336
    
    # Rationale for this change
    
    While pathfinding variant unshredding code, I noticed that there's a lot
    of variant metadata cloning going on. It's a 32-byte object, so cloning
    is not free. And it turns out that ~all read-only metadata builder usage
    in the code base can work just as easily with a borrowed reference
    because ultimately we're working with bytes borrowed from a binary view
    that holds the metadata column.
    
    # What changes are included in this PR?
    
    Update the `ReadOnlyMetadataBuilder` to borrow the `VariantMetadata` it
    uses, and update call sites accordingly.
    
    # Are these changes tested?
    
    Yes, the change affects several unit tests.
    
    # Are there any user-facing changes?
    
    Signature of `ReadOnlyMetadataBuilder::new` has changed.
---
 parquet-variant-compute/src/shred_variant.rs         |  4 ++--
 parquet-variant-compute/src/variant_array_builder.rs | 11 ++++++-----
 parquet-variant/src/builder.rs                       | 16 ++++++++--------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/parquet-variant-compute/src/shred_variant.rs 
b/parquet-variant-compute/src/shred_variant.rs
index 568eb1b6af..4c5a3c3f8a 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -267,7 +267,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
         };
 
         // Route the object's fields by name as either shredded or unshredded
-        let mut builder = 
self.value_builder.builder_ext(value.metadata().clone());
+        let mut builder = self.value_builder.builder_ext(value.metadata());
         let mut object_builder = builder.try_new_object()?;
         let mut seen = std::collections::HashSet::new();
         let mut partially_shredded = false;
@@ -794,7 +794,7 @@ mod tests {
         let object_with_foo_field = |i| {
             use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
             let metadata = VariantMetadata::new(metadata.value(i));
-            let mut metadata_builder = 
ReadOnlyMetadataBuilder::new(metadata.clone());
+            let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
             let mut value_builder = ValueBuilder::new();
             let state = ParentState::variant(&mut value_builder, &mut 
metadata_builder);
             ObjectBuilder::new(state, false)
diff --git a/parquet-variant-compute/src/variant_array_builder.rs 
b/parquet-variant-compute/src/variant_array_builder.rs
index 80e6c0a2a0..29e135a8ff 100644
--- a/parquet-variant-compute/src/variant_array_builder.rs
+++ b/parquet-variant-compute/src/variant_array_builder.rs
@@ -295,7 +295,8 @@ impl VariantValueArrayBuilder {
     /// builder.append_value(Variant::from(42));
     /// ```
     pub fn append_value(&mut self, value: Variant<'_, '_>) {
-        self.builder_ext(value.metadata().clone())
+        // NOTE: Have to clone because the builder consumes `value`
+        self.builder_ext(&value.metadata().clone())
             .append_value(value);
     }
 
@@ -313,7 +314,7 @@ impl VariantValueArrayBuilder {
     /// let Variant::Object(obj) = value else {
     ///     panic!("Not a variant object");
     /// };
-    /// let mut metadata_builder = 
ReadOnlyMetadataBuilder::new(obj.metadata.clone());
+    /// let mut metadata_builder = ReadOnlyMetadataBuilder::new(&obj.metadata);
     /// let state = value_array_builder.parent_state(&mut metadata_builder);
     /// let mut object_builder = ObjectBuilder::new(state, false);
     /// for (field_name, field_value) in obj.iter() {
@@ -339,7 +340,7 @@ impl VariantValueArrayBuilder {
     /// parameter (similar to the way [`parquet_variant::ObjectFieldBuilder`] 
hides field names).
     pub fn builder_ext<'a>(
         &'a mut self,
-        metadata: VariantMetadata<'a>,
+        metadata: &'a VariantMetadata<'a>,
     ) -> VariantValueArrayBuilderExt<'a> {
         VariantValueArrayBuilderExt {
             metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
@@ -548,7 +549,7 @@ mod test {
 
         // filtering fields takes more work because we need to manually create 
an object builder
         let value = array.value(1);
-        let mut builder = value_builder.builder_ext(value.metadata().clone());
+        let mut builder = value_builder.builder_ext(value.metadata());
         builder
             .new_object()
             .with_field("name", value.get_object_field("name").unwrap())
@@ -557,7 +558,7 @@ mod test {
 
         // same bytes, but now nested and duplicated inside a list
         let value = array.value(2);
-        let mut builder = value_builder.builder_ext(value.metadata().clone());
+        let mut builder = value_builder.builder_ext(value.metadata());
         builder
             .new_list()
             .with_value(value.clone())
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index 95a30c206d..17455fc4bf 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -498,7 +498,7 @@ impl MetadataBuilder for WritableMetadataBuilder {
 /// time `finish` is called, a different trait impl will be needed.
 #[derive(Debug)]
 pub struct ReadOnlyMetadataBuilder<'m> {
-    metadata: VariantMetadata<'m>,
+    metadata: &'m VariantMetadata<'m>,
     // A cache that tracks field names this builder has already seen, because 
finding the field id
     // for a given field name is expensive -- O(n) for a large and unsorted 
metadata dictionary.
     known_field_names: HashMap<&'m str, u32>,
@@ -506,7 +506,7 @@ pub struct ReadOnlyMetadataBuilder<'m> {
 
 impl<'m> ReadOnlyMetadataBuilder<'m> {
     /// Creates a new read-only metadata builder from the given metadata 
dictionary.
-    pub fn new(metadata: VariantMetadata<'m>) -> Self {
+    pub fn new(metadata: &'m VariantMetadata<'m>) -> Self {
         Self {
             metadata,
             known_field_names: HashMap::new(),
@@ -3357,7 +3357,7 @@ mod tests {
 
         // Use the metadata to build new variant values
         let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
-        let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
+        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
         let mut value_builder = ValueBuilder::new();
 
         {
@@ -3390,7 +3390,7 @@ mod tests {
 
         // Use the metadata to build new variant values
         let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
-        let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
+        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
         let mut value_builder = ValueBuilder::new();
 
         {
@@ -3438,7 +3438,7 @@ mod tests {
 
         // Copy using the new bytes API
         let metadata = VariantMetadata::new(&metadata);
-        let mut metadata = ReadOnlyMetadataBuilder::new(metadata);
+        let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
         let mut builder2 = ValueBuilder::new();
         let state = ParentState::variant(&mut builder2, &mut metadata);
         ValueBuilder::append_variant_bytes(state, variant1);
@@ -3466,7 +3466,7 @@ mod tests {
 
         // Create a new object copying subset of fields interleaved with new 
ones
         let metadata2 = VariantMetadata::new(&metadata1);
-        let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
         let mut builder2 = ValueBuilder::new();
         let state = ParentState::variant(&mut builder2, &mut metadata2);
         {
@@ -3530,7 +3530,7 @@ mod tests {
 
         // Create a new list copying subset of elements interleaved with new 
ones
         let metadata2 = VariantMetadata::new(&metadata1);
-        let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
         let mut builder2 = ValueBuilder::new();
         let state = ParentState::variant(&mut builder2, &mut metadata2);
         {
@@ -3626,7 +3626,7 @@ mod tests {
 
         // Create filtered/modified version: only copy active users and inject 
new data
         let metadata2 = VariantMetadata::new(&metadata1);
-        let mut metadata2 = ReadOnlyMetadataBuilder::new(metadata2);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
         let mut builder2 = ValueBuilder::new();
         let state = ParentState::variant(&mut builder2, &mut metadata2);
         {

Reply via email to