friendlymatthew commented on code in PR #7740:
URL: https://github.com/apache/arrow-rs/pull/7740#discussion_r2164709801


##########
parquet-variant/src/builder.rs:
##########
@@ -420,100 +472,152 @@ impl Default for VariantBuilder {
 ///
 /// See the examples on [`VariantBuilder`] for usage.
 pub struct ListBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
+    parent_buffer: &'a mut VariantBuffer,
+    field_metadata_dictionary: &'a mut FieldMetadataDictionary,
     offsets: Vec<usize>,
+
+    buffer: VariantBuffer,
+    pending: bool,
 }
 
 impl<'a> ListBuilder<'a> {
-    fn new(parent: &'a mut VariantBuilder) -> Self {
-        let start_pos = parent.offset();
+    fn new(
+        parent_buffer: &'a mut VariantBuffer,
+        field_metadata_dictionary: &'a mut FieldMetadataDictionary,
+    ) -> Self {
         Self {
-            parent,
-            start_pos,
+            parent_buffer,
+            field_metadata_dictionary,
             offsets: vec![0],
+            buffer: VariantBuffer::default(),
+            pending: false,
+        }
+    }
+
+    fn check_new_offset(&mut self) {
+        if !self.pending {
+            return;
         }
+
+        let element_end = self.buffer.offset();
+        self.offsets.push(element_end);
+
+        self.pending = false;
+    }
+
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        self.check_new_offset();
+
+        let obj_builder = ObjectBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        obj_builder
+    }
+
+    pub fn new_list(&mut self) -> ListBuilder {
+        self.check_new_offset();
+
+        let list_builder = ListBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        list_builder
     }
 
     pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
-        self.parent.append_value(value);
-        let element_end = self.parent.offset() - self.start_pos;
+        self.check_new_offset();
+
+        self.buffer.append_value(value);
+        let element_end = self.buffer.offset();
         self.offsets.push(element_end);
     }
 
-    pub fn finish(self) {
-        let data_size = self.parent.offset() - self.start_pos;
+    pub fn finish(mut self) {
+        self.check_new_offset();
+
+        let data_size = self.buffer.offset();
         let num_elements = self.offsets.len() - 1;
         let is_large = num_elements > u8::MAX as usize;
         let size_bytes = if is_large { 4 } else { 1 };
         let offset_size = int_size(data_size);
         let header_size = 1 + size_bytes + (num_elements + 1) * offset_size as 
usize;
 
-        make_room_for_header(&mut self.parent.buffer, self.start_pos, 
header_size);
+        let parent_start_pos = self.parent_buffer.offset();

Review Comment:
   We can't replace `make_room_for_header` with `Vec::reserve` here, since 
`make_room_for_header` actually allocates `header_size` bytes worth of space, 
whereas `reserve` merely ensures capacity without changing the buffer's length.
   
   We could do this for now, 
   ```rs
   self.parent_buffer
       .0
       .resize(self.parent_buffer.0.len() + header_size, 0);
   ```
   
   but I think the actual issue is there's room to simplify the whole 
buffer-copy slicing ceremony going on here. I have some thoughts that I'd like 
to push up in a separate PR



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to