alamb commented on code in PR #7740:
URL: https://github.com/apache/arrow-rs/pull/7740#discussion_r2164497957


##########
parquet-variant/src/builder.rs:
##########
@@ -420,100 +472,152 @@ impl Default for VariantBuilder {
 ///
 /// See the examples on [`VariantBuilder`] for usage.
 pub struct ListBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
+    parent_buffer: &'a mut VariantBuffer,
+    field_metadata_dictionary: &'a mut FieldMetadataDictionary,
     offsets: Vec<usize>,
+
+    buffer: VariantBuffer,
+    pending: bool,
 }
 
 impl<'a> ListBuilder<'a> {
-    fn new(parent: &'a mut VariantBuilder) -> Self {
-        let start_pos = parent.offset();
+    fn new(
+        parent_buffer: &'a mut VariantBuffer,
+        field_metadata_dictionary: &'a mut FieldMetadataDictionary,
+    ) -> Self {
         Self {
-            parent,
-            start_pos,
+            parent_buffer,
+            field_metadata_dictionary,
             offsets: vec![0],
+            buffer: VariantBuffer::default(),
+            pending: false,
+        }
+    }
+
+    fn check_new_offset(&mut self) {
+        if !self.pending {
+            return;
         }
+
+        let element_end = self.buffer.offset();
+        self.offsets.push(element_end);
+
+        self.pending = false;
+    }
+
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        self.check_new_offset();
+
+        let obj_builder = ObjectBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        obj_builder
+    }
+
+    pub fn new_list(&mut self) -> ListBuilder {
+        self.check_new_offset();
+
+        let list_builder = ListBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        list_builder
     }
 
     pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
-        self.parent.append_value(value);
-        let element_end = self.parent.offset() - self.start_pos;
+        self.check_new_offset();
+
+        self.buffer.append_value(value);
+        let element_end = self.buffer.offset();
         self.offsets.push(element_end);
     }
 
-    pub fn finish(self) {
-        let data_size = self.parent.offset() - self.start_pos;
+    pub fn finish(mut self) {
+        self.check_new_offset();
+
+        let data_size = self.buffer.offset();
         let num_elements = self.offsets.len() - 1;
         let is_large = num_elements > u8::MAX as usize;
         let size_bytes = if is_large { 4 } else { 1 };
         let offset_size = int_size(data_size);
         let header_size = 1 + size_bytes + (num_elements + 1) * offset_size as 
usize;
 
-        make_room_for_header(&mut self.parent.buffer, self.start_pos, 
header_size);
+        let parent_start_pos = self.parent_buffer.offset();
+
+        make_room_for_header(&mut self.parent_buffer.0, parent_start_pos, 
header_size);
 
         // Write header
-        let mut pos = self.start_pos;
-        self.parent.buffer[pos] = array_header(is_large, offset_size);
+        let mut pos = parent_start_pos;
+        self.parent_buffer.0[pos] = array_header(is_large, offset_size);

Review Comment:
   I don't understand how this can be writing to anything other than the end of 
parent_buffer and thus it seems like the code to handle other positions is 
unecesssary
   
   I also see you just refactored this here rather than introducing it, so no 
need to change it in this PR



##########
parquet-variant/src/builder.rs:
##########
@@ -72,7 +72,177 @@ fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: 
usize, header_size: usi
     buffer.copy_within(src_start..src_end, dst_start);
 }
 
-/// Builder for [`Variant`] values
+#[derive(Default)]
+struct VariantBuffer(Vec<u8>);
+
+impl VariantBuffer {
+    fn append_null(&mut self) {
+        self.0.push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.0.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.0.push(primitive_header(VariantPrimitiveType::Int8));
+        self.0.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.0.push(primitive_header(VariantPrimitiveType::Int16));
+        self.0.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.0.push(primitive_header(VariantPrimitiveType::Int32));
+        self.0.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.0.push(primitive_header(VariantPrimitiveType::Int64));
+        self.0.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.0.push(primitive_header(VariantPrimitiveType::Float));
+        self.0.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.0.push(primitive_header(VariantPrimitiveType::Double));
+        self.0.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.0.push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = 
value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
+        self.0.extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.0
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.0.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.0
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.0.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.0
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.0.push(scale);
+        self.0.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.0
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.0.push(scale);
+        self.0.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.0
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.0.push(scale);
+        self.0.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.0.push(primitive_header(VariantPrimitiveType::Binary));
+        self.0
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.0.extend_from_slice(value);
+    }
+
+    fn append_short_string(&mut self, value: ShortString) {
+        let inner = value.0;
+        self.0.push(short_string_header(inner.len()));
+        self.0.extend_from_slice(inner.as_bytes());
+    }
+
+    fn append_string(&mut self, value: &str) {
+        self.0.push(primitive_header(VariantPrimitiveType::String));
+        self.0
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.0.extend_from_slice(value.as_bytes());
+    }
+
+    fn offset(&self) -> usize {
+        self.0.len()
+    }
+
+    fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
+        let variant = value.into();
+        match variant {
+            Variant::Null => self.append_null(),
+            Variant::BooleanTrue => self.append_bool(true),
+            Variant::BooleanFalse => self.append_bool(false),
+            Variant::Int8(v) => self.append_int8(v),
+            Variant::Int16(v) => self.append_int16(v),
+            Variant::Int32(v) => self.append_int32(v),
+            Variant::Int64(v) => self.append_int64(v),
+            Variant::Date(v) => self.append_date(v),
+            Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
+            Variant::TimestampNtzMicros(v) => 
self.append_timestamp_ntz_micros(v),
+            Variant::Decimal4 { integer, scale } => 
self.append_decimal4(integer, scale),
+            Variant::Decimal8 { integer, scale } => 
self.append_decimal8(integer, scale),
+            Variant::Decimal16 { integer, scale } => 
self.append_decimal16(integer, scale),
+            Variant::Float(v) => self.append_float(v),
+            Variant::Double(v) => self.append_double(v),
+            Variant::Binary(v) => self.append_binary(v),
+            Variant::String(s) => self.append_string(s),
+            Variant::ShortString(s) => self.append_short_string(s),
+            Variant::Object(_) | Variant::List(_) => {
+                todo!("How does this work with the redesign?");
+            }
+        }
+    }
+}
+
+#[derive(Default)]
+struct FieldMetadataDictionary {

Review Comment:
   I suggest renaming this struct  `MetadataBuilder` to align with the fact it 
is used to create the Metadata of the variant



##########
parquet-variant/src/builder.rs:
##########
@@ -72,7 +72,177 @@ fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: 
usize, header_size: usi
     buffer.copy_within(src_start..src_end, dst_start);
 }
 
-/// Builder for [`Variant`] values
+#[derive(Default)]
+struct VariantBuffer(Vec<u8>);

Review Comment:
   I Perhaps this could be called `ValueBuilder` as it is largely responsible 
for building the value portion of the variant



##########
parquet-variant/src/builder.rs:
##########
@@ -420,100 +472,152 @@ impl Default for VariantBuilder {
 ///
 /// See the examples on [`VariantBuilder`] for usage.
 pub struct ListBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
+    parent_buffer: &'a mut VariantBuffer,
+    field_metadata_dictionary: &'a mut FieldMetadataDictionary,
     offsets: Vec<usize>,
+
+    buffer: VariantBuffer,
+    pending: bool,
 }
 
 impl<'a> ListBuilder<'a> {
-    fn new(parent: &'a mut VariantBuilder) -> Self {
-        let start_pos = parent.offset();
+    fn new(
+        parent_buffer: &'a mut VariantBuffer,
+        field_metadata_dictionary: &'a mut FieldMetadataDictionary,
+    ) -> Self {
         Self {
-            parent,
-            start_pos,
+            parent_buffer,
+            field_metadata_dictionary,
             offsets: vec![0],
+            buffer: VariantBuffer::default(),
+            pending: false,
+        }
+    }
+
+    fn check_new_offset(&mut self) {
+        if !self.pending {
+            return;
         }
+
+        let element_end = self.buffer.offset();
+        self.offsets.push(element_end);
+
+        self.pending = false;
+    }
+
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        self.check_new_offset();
+
+        let obj_builder = ObjectBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        obj_builder
+    }
+
+    pub fn new_list(&mut self) -> ListBuilder {
+        self.check_new_offset();
+
+        let list_builder = ListBuilder::new(&mut self.buffer, 
self.field_metadata_dictionary);
+        self.pending = true;
+
+        list_builder
     }
 
     pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
-        self.parent.append_value(value);
-        let element_end = self.parent.offset() - self.start_pos;
+        self.check_new_offset();
+
+        self.buffer.append_value(value);
+        let element_end = self.buffer.offset();
         self.offsets.push(element_end);
     }
 
-    pub fn finish(self) {
-        let data_size = self.parent.offset() - self.start_pos;
+    pub fn finish(mut self) {
+        self.check_new_offset();
+
+        let data_size = self.buffer.offset();
         let num_elements = self.offsets.len() - 1;
         let is_large = num_elements > u8::MAX as usize;
         let size_bytes = if is_large { 4 } else { 1 };
         let offset_size = int_size(data_size);
         let header_size = 1 + size_bytes + (num_elements + 1) * offset_size as 
usize;
 
-        make_room_for_header(&mut self.parent.buffer, self.start_pos, 
header_size);
+        let parent_start_pos = self.parent_buffer.offset();

Review Comment:
   parent_buffet.offset() seems to just return the length.
   
   I think this code is doing the equivalent of 
`self.parent_buffer.reserve(header_size)` to reserver enough space (I am not 
sure what  `make_room_for_header` is doing)



##########
parquet-variant/src/builder.rs:
##########
@@ -420,100 +472,152 @@ impl Default for VariantBuilder {
 ///
 /// See the examples on [`VariantBuilder`] for usage.
 pub struct ListBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
+    parent_buffer: &'a mut VariantBuffer,
+    field_metadata_dictionary: &'a mut FieldMetadataDictionary,
     offsets: Vec<usize>,
+
+    buffer: VariantBuffer,

Review Comment:
   As a follow on it would be really nice to avoid having to copy all the 
values back to the parent buffer.
   
   One problem is that you don't know beforehand how many elements are in the 
list (so we don't know the offset size and therefore we don't know how many 
bytes to leave empty for the list length). However, I think we could do 
something fancy like assume it will be a short list (and leave 1 byte for the 
length) and then if we end up with more we could go back and copy over the 
values
   
   We could also add a nice api like `make_large()` or something on the 
ListBuilder
   
   I am just speculating, there is no need to do this here in this PR or 
without benchmarks



##########
parquet-variant/src/builder.rs:
##########
@@ -824,39 +932,250 @@ mod tests {
     }
 
     #[test]
-    fn test_append_object() {
-        let (object_metadata, object_value) = {
-            let mut builder = VariantBuilder::new();
-            let mut obj = builder.new_object();
-            obj.append_value("name", "John");
-            obj.finish();
-            builder.finish()
-        };
-        let object_variant = Variant::try_new(&object_metadata, 
&object_value).unwrap();
+    fn test_nested_list() {
+        let mut builder = VariantBuilder::new();
+
+        let mut outer_list_builder = builder.new_list();
+
+        {
+            let mut inner_list_builder = outer_list_builder.new_list();
+
+            inner_list_builder.append_value("a");
+            inner_list_builder.append_value("b");
+            inner_list_builder.append_value("c");
+            inner_list_builder.append_value("d");
+
+            inner_list_builder.finish();
+        }
+
+        outer_list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_list = variant.as_list().unwrap();
+
+        assert_eq!(outer_list.len(), 1);
+
+        let inner_variant = outer_list.get(0).unwrap();
+        let inner_list = inner_variant.as_list().unwrap();
+
+        assert_eq!(
+            vec![
+                Variant::from("a"),
+                Variant::from("b"),
+                Variant::from("c"),
+                Variant::from("d"),
+            ],
+            inner_list.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_super_nested_list() {
+        /*
+        [[[[[1]]]]]
+        */
 
         let mut builder = VariantBuilder::new();
-        builder.append_value(object_variant.clone());
+        {
+            let mut list_builder1 = builder.new_list();
+            {
+                let mut list_builder2 = list_builder1.new_list();
+                {
+                    let mut list_builder3 = list_builder2.new_list();
+                    {
+                        let mut list_builder4 = list_builder3.new_list();
+                        {
+                            let mut list_builder5 = list_builder4.new_list();
+                            list_builder5.append_value(1);
+                            list_builder5.finish();
+                        }
+                        list_builder4.finish();
+                    }
+                    list_builder3.finish();
+                }
+                list_builder2.finish();
+            }
+            list_builder1.finish();
+        }
+
         let (metadata, value) = builder.finish();
+
         let variant = Variant::try_new(&metadata, &value).unwrap();
-        assert_eq!(variant, object_variant);
+        let list1 = variant.as_list().unwrap();
+        assert_eq!(list1.len(), 1);
+
+        let list2_variant = list1.get(0).unwrap();
+        let list2 = list2_variant.as_list().unwrap();
+        assert_eq!(list2.len(), 1);
+
+        let list3_variant = list2.get(0).unwrap();
+        let list3 = list3_variant.as_list().unwrap();
+        assert_eq!(list3.len(), 1);
+
+        let list4_variant = list3.get(0).unwrap();
+        let list4 = list4_variant.as_list().unwrap();
+        assert_eq!(list4.len(), 1);
+
+        let list5_variant = list4.get(0).unwrap();
+        let list5 = list5_variant.as_list().unwrap();
+        assert_eq!(list5.len(), 1);
+
+        assert_eq!(list5.len(), 1);
+
+        assert_eq!(list5.get(0).unwrap(), Variant::from(1));
     }
 
     #[test]
-    fn test_append_list() {
-        let (list_metadata, list_value) = {
-            let mut builder = VariantBuilder::new();
-            let mut list = builder.new_list();
-            list.append_value(1i8);
-            list.append_value(2i8);
-            list.finish();
-            builder.finish()
-        };
-        let list_variant = Variant::try_new(&list_metadata, 
&list_value).unwrap();
+    fn test_object_list() {
+        let mut builder = VariantBuilder::new();
+
+        let mut list_builder = builder.new_list();
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.append_value("id", 1);
+            object_builder.append_value("type", "Cauliflower");
+            object_builder.finish();
+        }
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.append_value("id", 2);
+            object_builder.append_value("type", "Beets");
+            object_builder.finish();
+        }
+
+        list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
 
+        assert_eq!(list.len(), 2);
+
+        let obj1_variant = list.get(0).unwrap();
+        let obj1 = obj1_variant.as_object().unwrap();
+
+        assert_eq!(
+            vec![
+                ("id", Variant::from(1)),
+                ("type", Variant::from("Cauliflower")),
+            ],
+            obj1.iter().collect::<Vec<_>>()
+        );
+
+        let obj2_variant = list.get(1).unwrap();
+        let obj2 = obj2_variant.as_object().unwrap();
+
+        assert_eq!(
+            vec![("id", Variant::from(2)), ("type", Variant::from("Beets")),],
+            obj2.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_object_list2() {
         let mut builder = VariantBuilder::new();
-        builder.append_value(list_variant.clone());
+
+        let mut list_builder = builder.new_list();
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.append_value("a", 1);
+            object_builder.finish();
+        }
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.append_value("b", 2);
+            object_builder.finish();
+        }
+
+        list_builder.finish();
+
         let (metadata, value) = builder.finish();
+
         let variant = Variant::try_new(&metadata, &value).unwrap();
-        assert_eq!(variant, list_variant);
+        let list = variant.as_list().unwrap();
+        assert_eq!(list.len(), 2);
+
+        let obj1_variant = list.get(0).unwrap();
+        let obj1 = obj1_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("a", Variant::from(1)),],
+            obj1.iter().collect::<Vec<_>>()
+        );
+
+        let obj2_variant = list.get(1).unwrap();
+        let obj2 = obj2_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("b", Variant::from(2)),],
+            obj2.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_hetergenous_list() {

Review Comment:
   💯 



##########
parquet-variant/src/builder.rs:
##########
@@ -769,8 +877,8 @@ mod tests {
             // dict is ordered by field names
             // NOTE: when we support nested objects, we'll want to perform a 
filter by fields_map field ids
             let dict_metadata = obj
-                .parent
-                .dict
+                .field_metadata_dictionary

Review Comment:
   do we need to update the comment above about nested objects?



##########
parquet-variant/src/builder.rs:
##########
@@ -148,189 +314,108 @@ fn make_room_for_header(buffer: &mut Vec<u8>, 
start_pos: usize, header_size: usi
 ///
 /// # Example: [`Variant::List`] of  [`Variant::Object`]s
 ///
-/// THis example shows how to create an list  of objects:
+/// This example shows how to create an list of objects:
 /// ```json
 /// [
-///  {
-///   "first_name": "Jiaying",
-///  "last_name": "Li"
-/// },
 ///   {
-///    "first_name": "Malthe",
-///    "last_name": "Karbo"
-/// }
+///      "id": 1,
+///      "type": "Cauliflower"
+///   },
+///   {
+///      "id": 2,
+///      "type": "Beets"
+///   }
 /// ]
 /// ```
+/// ```
+/// use parquet_variant::{Variant, VariantBuilder};
+/// let mut builder = VariantBuilder::new();
+///
+/// // Create a builder that will write elements to the list
+/// let mut list_builder = builder.new_list();
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.append_value("id", 1);
+///     object_builder.append_value("type", "Cauliflower");
+///     object_builder.finish();
+/// }
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.append_value("id", 2);
+///     object_builder.append_value("type", "Beets");
+///     object_builder.finish();
+/// }
+///
+/// list_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let variant_list = variant.as_list().unwrap();
+///
+///
+/// let obj1_variant = variant_list.get(0).unwrap();
+/// let obj1 = obj1_variant.as_object().unwrap();
+/// assert_eq!(
+///     obj1.field_by_name("id").unwrap(),
+///     Some(Variant::from(1))
+/// );
+/// assert_eq!(
+///     obj1.field_by_name("type").unwrap(),
+///     Some(Variant::from("Cauliflower"))
+/// );
 ///
-/// TODO
+/// let obj2_variant = variant_list.get(1).unwrap();
+/// let obj2 = obj2_variant.as_object().unwrap();
+///
+/// assert_eq!(
+///     obj2.field_by_name("id").unwrap(),
+///     Some(Variant::from(2))
+/// );
+/// assert_eq!(
+///     obj2.field_by_name("type").unwrap(),
+///     Some(Variant::from("Beets"))
+/// );
 ///
+/// ```
 pub struct VariantBuilder {
-    buffer: Vec<u8>,
-    dict: BTreeMap<String, u32>,
-    dict_keys: Vec<String>,
+    buffer: VariantBuffer,
+    field_metadata_dictionary: FieldMetadataDictionary,
 }
 
 impl VariantBuilder {
     pub fn new() -> Self {
         Self {
-            buffer: Vec::new(),
-            dict: BTreeMap::new(),
-            dict_keys: Vec::new(),
-        }
-    }
-
-    fn append_null(&mut self) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Null));
-    }
-
-    fn append_bool(&mut self, value: bool) {
-        let primitive_type = if value {
-            VariantPrimitiveType::BooleanTrue
-        } else {
-            VariantPrimitiveType::BooleanFalse
-        };
-        self.buffer.push(primitive_header(primitive_type));
-    }
-
-    fn append_int8(&mut self, value: i8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int8));
-        self.buffer.push(value as u8);
-    }
-
-    fn append_int16(&mut self, value: i16) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int16));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_int32(&mut self, value: i32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int32));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_int64(&mut self, value: i64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int64));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_float(&mut self, value: f32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Float));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_double(&mut self, value: f64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Double));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_date(&mut self, value: chrono::NaiveDate) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Date));
-        let days_since_epoch = 
value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
-        self.buffer
-            .extend_from_slice(&days_since_epoch.to_le_bytes());
-    }
-
-    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
-        let micros = value.timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
-    }
-
-    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
-        let micros = value.and_utc().timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
-    }
-
-    fn append_decimal4(&mut self, integer: i32, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal4));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_decimal8(&mut self, integer: i64, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal8));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_decimal16(&mut self, integer: i128, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal16));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_binary(&mut self, value: &[u8]) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Binary));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value);
-    }
-
-    fn append_short_string(&mut self, value: ShortString) {
-        let inner = value.0;
-        self.buffer.push(short_string_header(inner.len()));
-        self.buffer.extend_from_slice(inner.as_bytes());
-    }
-
-    fn append_string(&mut self, value: &str) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::String));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value.as_bytes());
-    }
-
-    /// Add key to dictionary, return its ID
-    fn add_key(&mut self, key: &str) -> u32 {
-        use std::collections::btree_map::Entry;
-        match self.dict.entry(key.to_string()) {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                let id = self.dict_keys.len() as u32;
-                entry.insert(id);
-                self.dict_keys.push(key.to_string());
-                id
-            }
+            buffer: VariantBuffer::default(),
+            field_metadata_dictionary: FieldMetadataDictionary::default(),
         }
     }
 
-    fn offset(&self) -> usize {
-        self.buffer.len()
-    }
-
     /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
     ///
     /// See the examples on [`VariantBuilder`] for usage.
     pub fn new_list(&mut self) -> ListBuilder {
-        ListBuilder::new(self)
+        ListBuilder::new(&mut self.buffer, &mut self.field_metadata_dictionary)
     }
 
     /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
     ///
     /// See the examples on [`VariantBuilder`] for usage.
     pub fn new_object(&mut self) -> ObjectBuilder {
-        ObjectBuilder::new(self)
+        ObjectBuilder::new(&mut self.buffer, &mut 
self.field_metadata_dictionary)
+    }
+
+    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
+        self.buffer.append_value(value);
     }
 
     pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
-        let nkeys = self.dict_keys.len();
+        let nkeys = self.field_metadata_dictionary.num_field_names();

Review Comment:
   A minor thought would be to move the construction of the metadata / 
dictionary into the `field_metadata_dictionary` itself 
   
   Something like
   ```rust
   let metadata = self.field_metadata_dictionary.build()
   ```
   
   I am thinking the more encapsulated building the metadata dictionary is, the 
easier it will be to optimize it later (for example to avoid allocating and 
copying so many strings)
   
   Doesn't have to be for this PR, I just noticed it during review



##########
parquet-variant/src/builder.rs:
##########
@@ -148,189 +314,108 @@ fn make_room_for_header(buffer: &mut Vec<u8>, 
start_pos: usize, header_size: usi
 ///
 /// # Example: [`Variant::List`] of  [`Variant::Object`]s
 ///
-/// THis example shows how to create an list  of objects:
+/// This example shows how to create an list of objects:
 /// ```json
 /// [
-///  {
-///   "first_name": "Jiaying",
-///  "last_name": "Li"
-/// },
 ///   {
-///    "first_name": "Malthe",
-///    "last_name": "Karbo"
-/// }
+///      "id": 1,
+///      "type": "Cauliflower"
+///   },
+///   {
+///      "id": 2,
+///      "type": "Beets"
+///   }
 /// ]
 /// ```
+/// ```
+/// use parquet_variant::{Variant, VariantBuilder};
+/// let mut builder = VariantBuilder::new();
+///
+/// // Create a builder that will write elements to the list
+/// let mut list_builder = builder.new_list();
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.append_value("id", 1);
+///     object_builder.append_value("type", "Cauliflower");
+///     object_builder.finish();
+/// }
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.append_value("id", 2);
+///     object_builder.append_value("type", "Beets");
+///     object_builder.finish();
+/// }
+///
+/// list_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let variant_list = variant.as_list().unwrap();
+///
+///
+/// let obj1_variant = variant_list.get(0).unwrap();
+/// let obj1 = obj1_variant.as_object().unwrap();
+/// assert_eq!(
+///     obj1.field_by_name("id").unwrap(),
+///     Some(Variant::from(1))
+/// );
+/// assert_eq!(
+///     obj1.field_by_name("type").unwrap(),
+///     Some(Variant::from("Cauliflower"))
+/// );
 ///
-/// TODO
+/// let obj2_variant = variant_list.get(1).unwrap();
+/// let obj2 = obj2_variant.as_object().unwrap();
+///
+/// assert_eq!(
+///     obj2.field_by_name("id").unwrap(),
+///     Some(Variant::from(2))
+/// );
+/// assert_eq!(
+///     obj2.field_by_name("type").unwrap(),
+///     Some(Variant::from("Beets"))
+/// );
 ///
+/// ```
 pub struct VariantBuilder {
-    buffer: Vec<u8>,
-    dict: BTreeMap<String, u32>,
-    dict_keys: Vec<String>,
+    buffer: VariantBuffer,
+    field_metadata_dictionary: FieldMetadataDictionary,
 }
 
 impl VariantBuilder {
     pub fn new() -> Self {
         Self {
-            buffer: Vec::new(),
-            dict: BTreeMap::new(),
-            dict_keys: Vec::new(),
-        }
-    }
-
-    fn append_null(&mut self) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Null));
-    }
-
-    fn append_bool(&mut self, value: bool) {
-        let primitive_type = if value {
-            VariantPrimitiveType::BooleanTrue
-        } else {
-            VariantPrimitiveType::BooleanFalse
-        };
-        self.buffer.push(primitive_header(primitive_type));
-    }
-
-    fn append_int8(&mut self, value: i8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int8));
-        self.buffer.push(value as u8);
-    }
-
-    fn append_int16(&mut self, value: i16) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int16));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_int32(&mut self, value: i32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int32));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_int64(&mut self, value: i64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int64));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_float(&mut self, value: f32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Float));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_double(&mut self, value: f64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Double));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
-    }
-
-    fn append_date(&mut self, value: chrono::NaiveDate) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Date));
-        let days_since_epoch = 
value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
-        self.buffer
-            .extend_from_slice(&days_since_epoch.to_le_bytes());
-    }
-
-    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
-        let micros = value.timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
-    }
-
-    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
-        let micros = value.and_utc().timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
-    }
-
-    fn append_decimal4(&mut self, integer: i32, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal4));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_decimal8(&mut self, integer: i64, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal8));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_decimal16(&mut self, integer: i128, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal16));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
-    }
-
-    fn append_binary(&mut self, value: &[u8]) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Binary));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value);
-    }
-
-    fn append_short_string(&mut self, value: ShortString) {
-        let inner = value.0;
-        self.buffer.push(short_string_header(inner.len()));
-        self.buffer.extend_from_slice(inner.as_bytes());
-    }
-
-    fn append_string(&mut self, value: &str) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::String));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value.as_bytes());
-    }
-
-    /// Add key to dictionary, return its ID
-    fn add_key(&mut self, key: &str) -> u32 {
-        use std::collections::btree_map::Entry;
-        match self.dict.entry(key.to_string()) {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                let id = self.dict_keys.len() as u32;
-                entry.insert(id);
-                self.dict_keys.push(key.to_string());
-                id
-            }
+            buffer: VariantBuffer::default(),
+            field_metadata_dictionary: FieldMetadataDictionary::default(),
         }
     }
 
-    fn offset(&self) -> usize {
-        self.buffer.len()
-    }
-
     /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
     ///
     /// See the examples on [`VariantBuilder`] for usage.
     pub fn new_list(&mut self) -> ListBuilder {
-        ListBuilder::new(self)
+        ListBuilder::new(&mut self.buffer, &mut self.field_metadata_dictionary)

Review Comment:
   This is a very nice pattern



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to