alamb commented on code in PR #7911:
URL: https://github.com/apache/arrow-rs/pull/7911#discussion_r2223966304


##########
parquet-variant-compute/src/variant_array_builder.rs:
##########
@@ -147,28 +147,195 @@ impl VariantArrayBuilder {
 
     /// Append the [`Variant`] to the builder as the next row
     pub fn append_variant(&mut self, variant: Variant) {
-        // TODO make this more efficient by avoiding the intermediate buffers
-        let mut variant_builder = VariantBuilder::new();
-        variant_builder.append_value(variant);
-        let (metadata, value) = variant_builder.finish();
-        self.append_variant_buffers(&metadata, &value);
+        let mut direct_builder = self.variant_builder();
+        direct_builder.variant_builder.append_value(variant);
+        direct_builder.finish()
     }
 
-    /// Append a metadata and values buffer to the builder
-    pub fn append_variant_buffers(&mut self, metadata: &[u8], value: &[u8]) {
-        self.nulls.append_non_null();
-        let metadata_length = metadata.len();
-        let metadata_offset = self.metadata_buffer.len();
-        self.metadata_locations
-            .push((metadata_offset, metadata_length));
-        self.metadata_buffer.extend_from_slice(metadata);
-        let value_length = value.len();
-        let value_offset = self.value_buffer.len();
-        self.value_locations.push((value_offset, value_length));
-        self.value_buffer.extend_from_slice(value);
+    /// Return a `VariantArrayVariantBuilder` that writes directly to the
+    /// buffers of this builder.
+    ///
+    /// You must call [`VariantArrayVariantBuilder::finish`] to complete the 
builder
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
+    /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
+    /// let mut array_builder = VariantArrayBuilder::new(10);
+    ///
+    /// // First row has a string
+    /// let mut variant_builder = array_builder.variant_builder();
+    /// variant_builder.append_value("Hello, World!");
+    /// // must call finish to write the variant to the buffers
+    /// variant_builder.finish();
+    ///
+    /// // Second row is an object
+    /// let mut variant_builder = array_builder.variant_builder();
+    /// variant_builder
+    ///     .new_object()
+    ///     .with_field("my_field", 42i64)
+    ///     .finish()
+    ///     .unwrap();
+    /// variant_builder.finish();
+    ///
+    /// // finalize the array
+    /// let variant_array: VariantArray = array_builder.build();
+    ///
+    /// // verify what we wrote is still there
+    /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
+    /// assert!(variant_array.value(1).as_object().is_some());
+    ///  ```
+    pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder {
+        // append directly into the metadata and value buffers
+        let metadata_buffer = std::mem::take(&mut self.metadata_buffer);
+        let value_buffer = std::mem::take(&mut self.value_buffer);
+        VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer)
+    }
+}
+
+/// A `VariantBuilderExt` that writes directly to the buffers of a 
`VariantArrayBuilder`.
+///
+// This struct implements [`VariantBuilderExt`], so in most cases it can be 
used as a
+// [`VariantBuilder`] to perform variant-related operations for 
[`VariantArrayBuilder`].
+///
+/// If [`Self::finish`] is not called, any changes will be rolled back
+///
+/// See [`VariantArrayBuilder::variant_builder`] for an example
+pub struct VariantArrayVariantBuilder<'a> {
+    /// was finish called?
+    finished: bool,
+    /// starting offset in the variant_builder's `metadata` buffer
+    metadata_offset: usize,
+    /// starting offset in the variant_builder's `value` buffer
+    value_offset: usize,
+    /// Parent array builder that this variant builder writes to. Buffers
+    /// have been moved into the variant builder, and must be returned on
+    /// drop
+    array_builder: &'a mut VariantArrayBuilder,
+    /// Builder for the in progress variant value, temporarily owns the buffers
+    /// from `array_builder`
+    variant_builder: VariantBuilder,
+}
+
+impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> {
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.variant_builder.append_value(value);
+    }
+
+    fn new_list(&mut self) -> ListBuilder {
+        self.variant_builder.new_list()
+    }
+
+    fn new_object(&mut self) -> ObjectBuilder {
+        self.variant_builder.new_object()
+    }
+}
+
+impl<'a> VariantArrayVariantBuilder<'a> {
+    /// Constructs a new VariantArrayVariantBuilder
+    ///
+    /// Note this is not public as this is a structure that is logically
+    /// part of the [`VariantArrayBuilder`] and relies on its internal 
structure
+    fn new(
+        array_builder: &'a mut VariantArrayBuilder,
+        metadata_buffer: Vec<u8>,
+        value_buffer: Vec<u8>,
+    ) -> Self {
+        let metadata_offset = metadata_buffer.len();
+        let value_offset = value_buffer.len();
+        VariantArrayVariantBuilder {
+            finished: false,
+            metadata_offset,
+            value_offset,
+            variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, 
value_buffer),
+            array_builder,
+        }
+    }
+
+    /// Return a reference to the underlying `VariantBuilder`
+    pub fn inner(&self) -> &VariantBuilder {
+        &self.variant_builder
+    }
+
+    /// Return a mutable reference to the underlying `VariantBuilder`
+    pub fn inner_mut(&mut self) -> &mut VariantBuilder {
+        &mut self.variant_builder
+    }
+
+    /// Called to finish the in progress variant and write it to the underlying
+    /// buffers
+    ///
+    /// Note if you do not call finish, on drop any changes made to the
+    /// underlying buffers will be rolled back.
+    pub fn finish(mut self) {
+        self.finished = true;
+
+        let metadata_offset = self.metadata_offset;

Review Comment:
   The code for finishing / finalizing is moved to `finish()`



##########
parquet-variant-compute/src/variant_array_builder.rs:
##########
@@ -147,28 +147,195 @@ impl VariantArrayBuilder {
 
     /// Append the [`Variant`] to the builder as the next row
     pub fn append_variant(&mut self, variant: Variant) {
-        // TODO make this more efficient by avoiding the intermediate buffers
-        let mut variant_builder = VariantBuilder::new();
-        variant_builder.append_value(variant);
-        let (metadata, value) = variant_builder.finish();
-        self.append_variant_buffers(&metadata, &value);
+        let mut direct_builder = self.variant_builder();
+        direct_builder.variant_builder.append_value(variant);
+        direct_builder.finish()
     }
 
-    /// Append a metadata and values buffer to the builder
-    pub fn append_variant_buffers(&mut self, metadata: &[u8], value: &[u8]) {
-        self.nulls.append_non_null();
-        let metadata_length = metadata.len();
-        let metadata_offset = self.metadata_buffer.len();
-        self.metadata_locations
-            .push((metadata_offset, metadata_length));
-        self.metadata_buffer.extend_from_slice(metadata);
-        let value_length = value.len();
-        let value_offset = self.value_buffer.len();
-        self.value_locations.push((value_offset, value_length));
-        self.value_buffer.extend_from_slice(value);
+    /// Return a `VariantArrayVariantBuilder` that writes directly to the
+    /// buffers of this builder.
+    ///
+    /// You must call [`VariantArrayVariantBuilder::finish`] to complete the 
builder
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
+    /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder};
+    /// let mut array_builder = VariantArrayBuilder::new(10);
+    ///
+    /// // First row has a string
+    /// let mut variant_builder = array_builder.variant_builder();
+    /// variant_builder.append_value("Hello, World!");
+    /// // must call finish to write the variant to the buffers
+    /// variant_builder.finish();
+    ///
+    /// // Second row is an object
+    /// let mut variant_builder = array_builder.variant_builder();
+    /// variant_builder
+    ///     .new_object()
+    ///     .with_field("my_field", 42i64)
+    ///     .finish()
+    ///     .unwrap();
+    /// variant_builder.finish();
+    ///
+    /// // finalize the array
+    /// let variant_array: VariantArray = array_builder.build();
+    ///
+    /// // verify what we wrote is still there
+    /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!"));
+    /// assert!(variant_array.value(1).as_object().is_some());
+    ///  ```
+    pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder {
+        // append directly into the metadata and value buffers
+        let metadata_buffer = std::mem::take(&mut self.metadata_buffer);
+        let value_buffer = std::mem::take(&mut self.value_buffer);
+        VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer)
+    }
+}
+
+/// A `VariantBuilderExt` that writes directly to the buffers of a 
`VariantArrayBuilder`.
+///
+// This struct implements [`VariantBuilderExt`], so in most cases it can be 
used as a
+// [`VariantBuilder`] to perform variant-related operations for 
[`VariantArrayBuilder`].
+///
+/// If [`Self::finish`] is not called, any changes will be rolled back
+///
+/// See [`VariantArrayBuilder::variant_builder`] for an example
+pub struct VariantArrayVariantBuilder<'a> {
+    /// was finish called?
+    finished: bool,
+    /// starting offset in the variant_builder's `metadata` buffer
+    metadata_offset: usize,
+    /// starting offset in the variant_builder's `value` buffer
+    value_offset: usize,
+    /// Parent array builder that this variant builder writes to. Buffers
+    /// have been moved into the variant builder, and must be returned on
+    /// drop
+    array_builder: &'a mut VariantArrayBuilder,
+    /// Builder for the in progress variant value, temporarily owns the buffers
+    /// from `array_builder`
+    variant_builder: VariantBuilder,
+}
+
+impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> {
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.variant_builder.append_value(value);
+    }
+
+    fn new_list(&mut self) -> ListBuilder {
+        self.variant_builder.new_list()
+    }
+
+    fn new_object(&mut self) -> ObjectBuilder {
+        self.variant_builder.new_object()
+    }
+}
+
+impl<'a> VariantArrayVariantBuilder<'a> {
+    /// Constructs a new VariantArrayVariantBuilder
+    ///
+    /// Note this is not public as this is a structure that is logically
+    /// part of the [`VariantArrayBuilder`] and relies on its internal 
structure
+    fn new(
+        array_builder: &'a mut VariantArrayBuilder,
+        metadata_buffer: Vec<u8>,
+        value_buffer: Vec<u8>,
+    ) -> Self {
+        let metadata_offset = metadata_buffer.len();
+        let value_offset = value_buffer.len();
+        VariantArrayVariantBuilder {
+            finished: false,
+            metadata_offset,
+            value_offset,
+            variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, 
value_buffer),
+            array_builder,
+        }
+    }
+
+    /// Return a reference to the underlying `VariantBuilder`
+    pub fn inner(&self) -> &VariantBuilder {
+        &self.variant_builder
+    }
+
+    /// Return a mutable reference to the underlying `VariantBuilder`
+    pub fn inner_mut(&mut self) -> &mut VariantBuilder {
+        &mut self.variant_builder
+    }
+
+    /// Called to finish the in progress variant and write it to the underlying
+    /// buffers
+    ///
+    /// Note if you do not call finish, on drop any changes made to the
+    /// underlying buffers will be rolled back.
+    pub fn finish(mut self) {
+        self.finished = true;
+
+        let metadata_offset = self.metadata_offset;
+        let value_offset = self.value_offset;
+        // get the buffers back from the variant builder
+        let (metadata_buffer, value_buffer) = std::mem::take(&mut 
self.variant_builder).finish();
+
+        // Sanity Check: if the buffers got smaller, something went wrong 
(previous data was lost)
+        let metadata_len = metadata_buffer
+            .len()
+            .checked_sub(metadata_offset)
+            .expect("metadata length decreased unexpectedly");
+        let value_len = value_buffer
+            .len()
+            .checked_sub(value_offset)
+            .expect("value length decreased unexpectedly");
+
+        // commit the changes by putting the
+        // offsets and lengths into the parent array builder.
+        self.array_builder
+            .metadata_locations
+            .push((metadata_offset, metadata_len));
+        self.array_builder
+            .value_locations
+            .push((value_offset, value_len));
+        self.array_builder.nulls.append_non_null();
+        // put the buffers back into the array builder
+        self.array_builder.metadata_buffer = metadata_buffer;
+        self.array_builder.value_buffer = value_buffer;
     }
+}
+
+impl<'a> Drop for VariantArrayVariantBuilder<'a> {
+    /// If the builder was not finished, roll back any changes made to the
+    /// underlying buffers (by truncating them)
+    fn drop(&mut self) {
+        if self.finished {
+            return;
+        }
+
+        // if the object was not finished, need to rollback any changes by
+        // truncating the buffers to the original offsets
+        let metadata_offset = self.metadata_offset;
+        let value_offset = self.value_offset;
+
+        // get the buffers back from the variant builder
+        let (mut metadata_buffer, mut value_buffer) =
+            std::mem::take(&mut self.variant_builder).into_buffers();

Review Comment:
   Note this now calls `into_buffers` to get ownership of the buffers back but 
doesn't call `finish`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to