alamb commented on code in PR #7935:
URL: https://github.com/apache/arrow-rs/pull/7935#discussion_r2216382026


##########
parquet-variant/src/builder.rs:
##########
@@ -1317,7 +1414,15 @@ impl<'a> ObjectBuilder<'a> {
 /// This is to ensure that the object is always finalized before its parent 
builder
 /// is finalized.
 impl Drop for ObjectBuilder<'_> {
-    fn drop(&mut self) {}
+    fn drop(&mut self) {

Review Comment:
   ❤️ 



##########
parquet-variant/src/builder.rs:
##########
@@ -1860,10 +2046,22 @@ mod tests {
         let inner_object_variant = outer_object.field(2).unwrap();
         let inner_object = inner_object_variant.as_object().unwrap();
 
-        assert_eq!(inner_object.len(), 1);
+        assert_eq!(inner_object.len(), 3);
         assert_eq!(inner_object.field_name(0).unwrap(), "b");
         assert_eq!(inner_object.field(0).unwrap(), Variant::from("a"));
 
+        let inner_iner_object_variant_c = inner_object.field(1).unwrap();
+        let inner_inner_object_c = 
inner_iner_object_variant_c.as_object().unwrap();
+        assert_eq!(inner_inner_object_c.len(), 1);
+        assert_eq!(inner_inner_object_c.field_name(0).unwrap(), "aa");
+        assert_eq!(inner_inner_object_c.field(0).unwrap(), 
Variant::from("bb"));
+
+        let inner_iner_object_variant_d = inner_object.field(2).unwrap();
+        let inner_inner_object_d = 
inner_iner_object_variant_d.as_object().unwrap();
+        assert_eq!(inner_inner_object_d.len(), 1);
+        assert_eq!(inner_inner_object_d.field_name(0).unwrap(), "cc");
+        assert_eq!(inner_inner_object_d.field(0).unwrap(), 
Variant::from("dd"));
+
         assert_eq!(outer_object.field_name(1).unwrap(), "b");
         assert_eq!(outer_object.field(1).unwrap(), Variant::from(true));
     }

Review Comment:
   After reviewing the tests, I agree that the existing tests are sufficient
   
   
https://github.com/apache/arrow-rs/blob/0fecaa0869260b014235ef527462b07a45ffa0d2/parquet-variant/src/builder.rs#L2837-L2918
   
   I also verified test coverage with
   ```shell
   cargo llvm-cov --html test -p parquet-variant
   ```
   
   And it is indeed covered:
   <img width="950" height="372" alt="Screenshot 2025-07-18 at 11 58 35 AM" 
src="https://github.com/user-attachments/assets/900d99d9-fe47-40bc-b14e-c19eefc9e0ee";
 />
   
   
   
   



##########
parquet-variant/src/builder.rs:
##########
@@ -1275,38 +1330,80 @@ impl<'a> ObjectBuilder<'a> {
             )));
         }
 
-        let data_size = self.buffer.offset();
-        let num_fields = self.fields.len();
-        let is_large = num_fields > u8::MAX as usize;
+        let metadata_builder = self.parent_state.metadata_builder();
 
         self.fields.sort_by(|&field_a_id, _, &field_b_id, _| {
-            let key_a = &metadata_builder.field_name(field_a_id as usize);
-            let key_b = &metadata_builder.field_name(field_b_id as usize);
-            key_a.cmp(key_b)
+            let field_a_name = metadata_builder.field_name(field_a_id as 
usize);
+            let field_b_name = metadata_builder.field_name(field_b_id as 
usize);
+            field_a_name.cmp(field_b_name)
         });
-
         let max_id = self.fields.iter().map(|(i, _)| *i).max().unwrap_or(0);
-
         let id_size = int_size(max_id as usize);
-        let offset_size = int_size(data_size);
 
-        // Get parent's buffer
         let parent_buffer = self.parent_state.buffer();
-        let starting_offset = parent_buffer.offset();
+        let current_offset = parent_buffer.offset();
+        // current object starts from `object_start_offset`
+        let data_size = current_offset - self.object_start_offset;
+        let offset_size = int_size(data_size);
 
-        // Write header
+        let num_fields = self.fields.len();
+        let is_large = num_fields > u8::MAX as usize;
+
+        let header_size = 1 + // header byte
+            (if is_large { 4 } else { 1 }) + // num_fields
+            (num_fields * id_size as usize) + // field IDs
+            ((num_fields + 1) * offset_size as usize); // field offsets + 
data_size
+
+        let starting_offset = self.object_start_offset;
+
+        // Shift existing data to make room for the header
+        let buffer = parent_buffer.inner_mut();
+        buffer.splice(starting_offset..starting_offset, vec![0u8; 
header_size]);

Review Comment:
   TIL: `VEC::splice`
   
   https://doc.rust-lang.org/std/vec/struct.Vec.html#method.splice
   
   Nice



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to