alamb commented on code in PR #7452:
URL: https://github.com/apache/arrow-rs/pull/7452#discussion_r2071846604


##########
arrow-schema/src/extension/canonical/mod.rs:
##########
@@ -37,6 +37,8 @@ mod uuid;
 pub use uuid::Uuid;
 mod variable_shape_tensor;
 pub use variable_shape_tensor::{VariableShapeTensor, 
VariableShapeTensorMetadata};
+mod variant;

Review Comment:
   I recommend we postpone adding the canonical extension type classes until we 
get farther along in the process and are in a better position to write tests.
   
   In other words I recommend removing the changes in 
arrow-schema/src/extension/ as well in this pR



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;
+//! use arrow_variant::builder::{VariantBuilder, PrimitiveValue};
+//!
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! // metadata_buffer now contains valid variant metadata bytes
+//! ```
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::io::Write;
+
+use crate::encoder::{
+    encode_array_from_pre_encoded, encode_binary, encode_boolean, encode_date, 
encode_decimal16,
+    encode_decimal4, encode_decimal8, encode_float, encode_integer, 
encode_null,
+    encode_object_from_pre_encoded, encode_string, encode_time_ntz, 
encode_timestamp,
+    encode_timestamp_nanos, encode_timestamp_ntz, encode_timestamp_ntz_nanos, 
encode_uuid,
+    min_bytes_needed, write_int_with_size,
+};
+use arrow_schema::ArrowError;
+
+/// Values that can be stored in a Variant.
+#[derive(Debug, Clone)]
+pub enum PrimitiveValue {
+    /// Null value
+    Null,
+    /// Boolean value
+    Boolean(bool),
+    /// 8-bit integer
+    Int8(i8),
+    /// 16-bit integer
+    Int16(i16),
+    /// 32-bit integer
+    Int32(i32),
+    /// 64-bit integer
+    Int64(i64),
+    /// Single-precision floating point
+    Float(f32),
+    /// Double-precision floating point
+    Double(f64),
+    /// UTF-8 string
+    String(String),
+    /// Binary data
+    Binary(Vec<u8>),
+    /// Date value (days since epoch)
+    Date(i32),
+    /// Timestamp (milliseconds since epoch)
+    Timestamp(i64),
+    /// Timestamp without timezone (milliseconds since epoch)
+    TimestampNTZ(i64),
+    /// Time without timezone (milliseconds)
+    TimeNTZ(i64),
+    /// Timestamp with nanosecond precision
+    TimestampNanos(i64),
+    /// Timestamp without timezone with nanosecond precision
+    TimestampNTZNanos(i64),
+    /// UUID as 16 bytes
+    Uuid([u8; 16]),
+    /// Decimal with scale and 32-bit unscaled value (precision 1-9)
+    Decimal4(u8, i32),
+    /// Decimal with scale and 64-bit unscaled value (precision 10-18)
+    Decimal8(u8, i64),
+    /// Decimal with scale and 128-bit unscaled value (precision 19-38)
+    Decimal16(u8, i128),
+}
+
+impl From<i32> for PrimitiveValue {
+    fn from(value: i32) -> Self {
+        PrimitiveValue::Int32(value)
+    }
+}
+
+impl From<i64> for PrimitiveValue {
+    fn from(value: i64) -> Self {
+        PrimitiveValue::Int64(value)
+    }
+}
+
+impl From<i16> for PrimitiveValue {
+    fn from(value: i16) -> Self {
+        PrimitiveValue::Int16(value)
+    }
+}
+
+impl From<i8> for PrimitiveValue {
+    fn from(value: i8) -> Self {
+        PrimitiveValue::Int8(value)
+    }
+}
+
+impl From<f32> for PrimitiveValue {
+    fn from(value: f32) -> Self {
+        PrimitiveValue::Float(value)
+    }
+}
+
+impl From<f64> for PrimitiveValue {
+    fn from(value: f64) -> Self {
+        PrimitiveValue::Double(value)
+    }
+}
+
+impl From<bool> for PrimitiveValue {
+    fn from(value: bool) -> Self {
+        PrimitiveValue::Boolean(value)
+    }
+}
+
+impl From<String> for PrimitiveValue {
+    fn from(value: String) -> Self {
+        PrimitiveValue::String(value)
+    }
+}
+
+impl From<&str> for PrimitiveValue {
+    fn from(value: &str) -> Self {
+        PrimitiveValue::String(value.to_string())
+    }
+}
+
+impl From<Vec<u8>> for PrimitiveValue {
+    fn from(value: Vec<u8>) -> Self {
+        PrimitiveValue::Binary(value)
+    }
+}
+
+impl From<&[u8]> for PrimitiveValue {
+    fn from(value: &[u8]) -> Self {
+        PrimitiveValue::Binary(value.to_vec())
+    }
+}
+
+impl<T: Into<PrimitiveValue>> From<Option<T>> for PrimitiveValue {
+    fn from(value: Option<T>) -> Self {
+        match value {
+            Some(v) => v.into(),
+            None => PrimitiveValue::Null,
+        }
+    }
+}
+
+/// Builder for Variant values.
+///
+/// This builder creates Variant values in the Arrow binary format.
+/// It manages metadata and helps create nested objects and arrays.
+///
+/// The builder follows a pattern similar to other Arrow array builders,
+/// but is specialized for creating Variant binary values.
+pub struct VariantBuilder<'a> {
+    /// Dictionary mapping field names to indexes
+    dictionary: HashMap<String, usize>,
+    /// Whether keys should be sorted in metadata
+    sort_keys: bool,
+    /// Whether the metadata is finalized
+    is_finalized: bool,
+    /// The output destination for metadata
+    metadata_output: Box<dyn Write + 'a>,
+}
+
+impl<'a> std::fmt::Debug for VariantBuilder<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("VariantBuilder")
+            .field("dictionary", &self.dictionary)
+            .field("sort_keys", &self.sort_keys)
+            .field("is_finalized", &self.is_finalized)
+            .field("metadata_output", &"<dyn Write>")
+            .finish()
+    }
+}
+
+impl<'a> VariantBuilder<'a> {
+    /// Creates a new VariantBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    pub fn new(metadata_output: impl Write + 'a) -> Self {
+        Self::new_with_sort(metadata_output, false)
+    }
+
+    /// Creates a new VariantBuilder with optional key sorting.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    /// * `sort_keys` - Whether keys should be sorted in metadata
+    pub fn new_with_sort(metadata_output: impl Write + 'a, sort_keys: bool) -> 
Self {
+        Self {
+            dictionary: HashMap::new(),
+            sort_keys,
+            is_finalized: false,
+            metadata_output: Box::new(metadata_output),
+        }
+    }
+
+    /// Creates a new ObjectBuilder for building an object variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    pub fn new_object<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ObjectBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new object after the builder has been 
finalized");
+        }
+
+        ObjectBuilder::new(output, self)
+    }
+
+    /// Creates a new ArrayBuilder for building an array variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    pub fn new_array<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ArrayBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new array after the builder has been 
finalized");
+        }
+
+        ArrayBuilder::new(output, self)
+    }
+
+    /// Adds a key to the dictionary if it doesn't already exist.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key to add
+    ///
+    /// # Returns
+    ///
+    /// The index of the key in the dictionary
+    pub(crate) fn add_key(&mut self, key: &str) -> Result<usize, ArrowError> {
+        if self.is_finalized {
+            return Err(ArrowError::VariantError(
+                "Cannot add keys after metadata has been 
finalized".to_string(),
+            ));
+        }
+
+        if let Some(idx) = self.dictionary.get(key) {
+            return Ok(*idx);
+        }
+
+        let idx = self.dictionary.len();
+        self.dictionary.insert(key.to_string(), idx);
+        Ok(idx)
+    }
+
+    /// Finalizes the metadata and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Get keys in sorted or insertion order
+        let mut keys: Vec<_> = self.dictionary.keys().cloned().collect();
+        if self.sort_keys {
+            keys.sort();
+
+            // Re-index keys based on sorted order
+            for (i, key) in keys.iter().enumerate() {
+                self.dictionary.insert(key.clone(), i);
+            }
+        }
+
+        // Calculate total size of dictionary strings
+        let total_string_size: usize = keys.iter().map(|k| k.len()).sum();
+
+        // Determine offset size based on max possible offset value
+        let max_offset = std::cmp::max(total_string_size, keys.len() + 1);
+        let offset_size = min_bytes_needed(max_offset);
+        let offset_size_minus_one = offset_size - 1;
+
+        // Construct header byte
+        let sorted_bit = if self.sort_keys { 1 } else { 0 };
+        let header = 0x01 | (sorted_bit << 4) | ((offset_size_minus_one as u8) 
<< 6);
+
+        // Write header byte
+        if let Err(e) = self.metadata_output.write_all(&[header]) {
+            panic!("Failed to write metadata header: {}", e);
+        }
+
+        // Write dictionary size (number of keys)
+        let dict_size = keys.len() as u32;
+        if let Err(e) = write_int_with_size(dict_size, offset_size, &mut 
self.metadata_output) {
+            panic!("Failed to write dictionary size: {}", e);
+        }
+
+        // Calculate and write offsets
+        let mut current_offset = 0u32;
+        let mut offsets = Vec::with_capacity(keys.len() + 1);
+
+        offsets.push(current_offset);
+        for key in &keys {
+            current_offset += key.len() as u32;
+            offsets.push(current_offset);
+        }
+
+        // Write offsets using the helper function
+        for offset in offsets {
+            if let Err(e) = write_int_with_size(offset, offset_size, &mut 
self.metadata_output) {
+                panic!("Failed to write offset: {}", e);
+            }
+        }
+
+        // Write dictionary strings
+        for key in keys {
+            if let Err(e) = self.metadata_output.write_all(key.as_bytes()) {
+                panic!("Failed to write dictionary string: {}", e);
+            }
+        }
+
+        self.is_finalized = true;
+    }
+
+    /// Returns whether the builder has been finalized.
+    pub fn is_finalized(&self) -> bool {
+        self.is_finalized
+    }
+}
+
+/// Builder for Variant object values.
+pub struct ObjectBuilder<'a, 'b> {
+    /// Destination for the object value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffer for field values - stored as key_index -> value_buffer
+    /// Using IndexMap for O(1) access with ability to sort by key
+    value_buffers: IndexMap<usize, Vec<u8>>,
+    /// Whether the object has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ObjectBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ObjectBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ObjectBuilder<'a, 'b> {
+    /// Creates a new ObjectBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: IndexMap::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the object.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the value
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, key: &str, value: 
T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this field - will overwrite if key already 
exists
+        self.value_buffers.insert(key_index, buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested object
+    pub fn append_object<'c>(&'c mut self, key: &str) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested object and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested array
+    pub fn append_array<'c>(&'c mut self, key: &str) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested array and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the object and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Sort the entries by key index
+        self.value_buffers.sort_keys();
+
+        // Prepare field IDs and values for encoding
+        let field_ids: Vec<usize> = 
self.value_buffers.keys().copied().collect();
+        let field_values: Vec<&[u8]> = self.value_buffers.values().map(|v| 
v.as_slice()).collect();
+
+        // Encode the object directly to output
+        if let Err(e) = encode_object_from_pre_encoded(&field_ids, 
&field_values, self.output) {
+            panic!("Failed to encode object: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Builder for Variant array values.
+pub struct ArrayBuilder<'a, 'b> {
+    /// Destination for the array value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffers for array elements
+    value_buffers: Vec<Vec<u8>>,
+    /// Whether the array has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ArrayBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrayBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ArrayBuilder<'a, 'b> {
+    /// Creates a new ArrayBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: Vec::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the array.
+    ///
+    /// # Arguments
+    ///
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, value: T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this element
+        self.value_buffers.push(buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Returns the index of the nested object in the array
+    pub fn append_object<'c>(&'c mut self) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested object
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Returns the index of the nested array in the array
+    pub fn append_array<'c>(&'c mut self) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested array
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the array and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Prepare slices for values
+        let values: Vec<&[u8]> = self.value_buffers.iter().map(|v| 
v.as_slice()).collect();
+
+        // Encode the array directly to output
+        if let Err(e) = encode_array_from_pre_encoded(&values, self.output) {
+            panic!("Failed to encode array: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Writes a primitive value to a buffer using the Variant format.
+///
+/// This function handles the correct encoding of primitive values by utilizing
+/// the encoder module functionality.
+fn write_value(buffer: &mut Vec<u8>, value: &PrimitiveValue) -> Result<(), 
ArrowError> {
+    match value {
+        PrimitiveValue::Null => {
+            encode_null(buffer);
+        }
+        PrimitiveValue::Boolean(val) => {
+            encode_boolean(*val, buffer);
+        }
+        PrimitiveValue::Int8(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int16(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int32(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int64(val) => {
+            encode_integer(*val, buffer);
+        }
+        PrimitiveValue::Float(val) => {
+            encode_float(*val as f64, buffer);
+        }
+        PrimitiveValue::Double(val) => {
+            encode_float(*val, buffer);
+        }
+        PrimitiveValue::String(val) => {
+            encode_string(val, buffer);
+        }
+        PrimitiveValue::Binary(val) => {
+            encode_binary(val, buffer);
+        }
+        PrimitiveValue::Date(val) => {
+            encode_date(*val, buffer);
+        }
+        PrimitiveValue::Timestamp(val) => {
+            encode_timestamp(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZ(val) => {
+            encode_timestamp_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimeNTZ(val) => {
+            encode_time_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimestampNanos(val) => {
+            encode_timestamp_nanos(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZNanos(val) => {
+            encode_timestamp_ntz_nanos(*val, buffer);
+        }
+        PrimitiveValue::Uuid(val) => {
+            encode_uuid(val, buffer);
+        }
+        PrimitiveValue::Decimal4(scale, unscaled_value) => {
+            encode_decimal4(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal8(scale, unscaled_value) => {
+            encode_decimal8(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal16(scale, unscaled_value) => {
+            encode_decimal16(*scale, *unscaled_value, buffer);
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::encoder::VariantBasicType;
+    use arrow_schema::extension::Variant;
+
+    // Helper function to extract keys from metadata for testing
+    fn get_metadata_keys(metadata: &[u8]) -> Vec<String> {
+        // Simple implementation to extract keys from metadata buffer
+        // This avoids dependency on VariantReader which might not be 
accessible
+
+        // Skip the header byte
+        let mut pos = 1;
+
+        // Get offset size from header byte
+        let offset_size = ((metadata[0] >> 6) & 0x03) + 1;
+
+        // Read dictionary size
+        let mut dict_size = 0usize;
+        for i in 0..offset_size {
+            dict_size |= (metadata[pos + i as usize] as usize) << (i * 8);
+        }
+        pos += offset_size as usize;
+
+        if dict_size == 0 {
+            return vec![];
+        }
+
+        // Read offsets
+        let mut offsets = Vec::with_capacity(dict_size + 1);
+        for _ in 0..=dict_size {
+            let mut offset = 0usize;
+            for i in 0..offset_size {
+                offset |= (metadata[pos + i as usize] as usize) << (i * 8);
+            }
+            offsets.push(offset);
+            pos += offset_size as usize;
+        }
+
+        // Extract keys using offsets
+        let mut keys = Vec::with_capacity(dict_size);
+        for i in 0..dict_size {
+            let start = offsets[i];
+            let end = offsets[i + 1];
+            let key_bytes = &metadata[pos + start..pos + end];
+            keys.push(String::from_utf8_lossy(key_bytes).to_string());
+        }
+
+        keys
+    }
+
+    // 
=========================================================================
+    // Basic builder functionality tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_basic_object_builder() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut object_builder = builder.new_object(&mut value_buffer);
+
+            // Test various primitive types
+            object_builder.append_value("null", Option::<i32>::None);
+            object_builder.append_value("bool_true", true);
+            object_builder.append_value("bool_false", false);
+            object_builder.append_value("int8", 42i8);
+            object_builder.append_value("int16", 1000i16);
+            object_builder.append_value("int32", 100000i32);
+            object_builder.append_value("int64", 1000000000i64);
+            object_builder.append_value("float", 3.14f32);
+            object_builder.append_value("double", 2.71828f64);
+            object_builder.append_value("string", "hello world");
+            object_builder.append_value("binary", vec![1u8, 2u8, 3u8]);
+
+            object_builder.finish();
+            builder.finish();
+        }
+
+        // Verify object encoding
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Verify metadata contains all keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 11, "Should have 11 keys in metadata");
+        assert!(keys.contains(&"null".to_string()), "Missing 'null' key");
+        assert!(
+            keys.contains(&"bool_true".to_string()),
+            "Missing 'bool_true' key"
+        );
+        assert!(keys.contains(&"string".to_string()), "Missing 'string' key");
+
+        // Verify object has the correct number of entries
+        // First byte after header is the number of fields (if small object)
+        assert!(value_buffer.len() > 1, "Value buffer too small");
+        let num_fields = value_buffer[1];
+        assert_eq!(num_fields as usize, 11, "Object should have 11 fields");
+
+        let _variant = Variant::new(metadata_buffer, value_buffer);
+    }
+
+    #[test]
+    fn test_basic_array_builder() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+        let num_elements = 11; // Number of elements we'll add
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut array_builder = builder.new_array(&mut value_buffer);
+
+            // Test various primitive types
+            array_builder.append_value(Option::<i32>::None);
+            array_builder.append_value(true);
+            array_builder.append_value(false);
+            array_builder.append_value(42i8);
+            array_builder.append_value(1000i16);
+            array_builder.append_value(100000i32);
+            array_builder.append_value(1000000000i64);
+            array_builder.append_value(3.14f32);
+            array_builder.append_value(2.71828f64);
+            array_builder.append_value("hello world");
+            array_builder.append_value(vec![1u8, 2u8, 3u8]);
+
+            array_builder.finish();
+            builder.finish();
+        }
+
+        // Verify array encoding
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Array as u8);
+
+        // Verify array length
+        // First byte after header is the array length (if small array)
+        assert!(value_buffer.len() > 1, "Value buffer too small");
+        let array_length = value_buffer[1];
+        assert_eq!(
+            array_length as usize, num_elements,
+            "Array should have exactly {num_elements} elements"
+        );
+
+        // Verify metadata format is valid (version 1)
+        assert_eq!(
+            metadata_buffer[0] & 0x0F,
+            0x01,
+            "Metadata should be version 1"
+        );
+
+        // Metadata should have dictionary size of 0 (no keys in a plain array)
+        // Second and potentially following bytes are dictionary size 
depending on offset size
+        let offset_size = ((metadata_buffer[0] >> 6) & 0x03) + 1;
+        let dict_size_bytes = &metadata_buffer[1..1 + offset_size as usize];
+        if offset_size == 1 {
+            assert_eq!(
+                dict_size_bytes[0], 0,
+                "Dictionary should be empty for array"
+            );
+        }
+
+        // Create variant and verify it's structurally valid
+        let variant = Variant::new(metadata_buffer, value_buffer);
+        assert!(!variant.metadata().is_empty());
+        assert!(!variant.value().is_empty());
+    }
+
+    // 
=========================================================================
+    // Nested structure tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_nested_objects() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut root = builder.new_object(&mut value_buffer);
+
+            // Add primitive values
+            root.append_value("name", "Test User");
+            root.append_value("age", 30);
+
+            // Add nested object
+            {
+                let mut address = root.append_object("address");
+                address.append_value("street", "123 Main St");
+                address.append_value("city", "Anytown");
+                address.append_value("zip", 12345);
+
+                // Add deeply nested object
+                {
+                    let mut geo = address.append_object("geo");
+                    geo.append_value("lat", 40.7128);
+                    geo.append_value("lng", -74.0060);
+                    geo.finish();
+                }
+
+                address.finish();
+            }
+
+            root.finish();
+            builder.finish();
+        }
+
+        // Verify metadata contains the correct keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 9, "Should have 9 keys in metadata");
+
+        // Check all required keys exist
+        let required_keys = [
+            "name", "age", "address", "street", "city", "zip", "geo", "lat", 
"lng",
+        ];
+        for key in required_keys.iter() {
+            assert!(keys.contains(&key.to_string()), "Missing '{key}' key");
+        }
+
+        // Verify object structure - first byte should be object type
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Create variant and verify it's valid
+        let variant = Variant::new(metadata_buffer, value_buffer);
+        assert!(!variant.metadata().is_empty());
+        assert!(!variant.value().is_empty());
+    }
+
+    #[test]
+    fn test_nested_arrays() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut root = builder.new_object(&mut value_buffer);
+
+            // Add array of primitives with expected length 3
+            {
+                let mut scores = root.append_array("scores");
+                scores.append_value(95);
+                scores.append_value(87);
+                scores.append_value(91);
+                scores.finish();
+            }
+
+            // Add array of objects with expected length 2
+            {
+                let mut contacts = root.append_array("contacts");
+
+                // First contact
+                {
+                    let mut contact = contacts.append_object();
+                    contact.append_value("name", "Alice");
+                    contact.append_value("phone", "555-1234");
+                    contact.finish();
+                }
+
+                // Second contact
+                {
+                    let mut contact = contacts.append_object();
+                    contact.append_value("name", "Bob");
+                    contact.append_value("phone", "555-5678");
+                    contact.finish();
+                }
+
+                contacts.finish();
+            }
+
+            root.finish();
+            builder.finish();
+        }
+
+        // Verify metadata contains the expected keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 4, "Should have 4 keys in metadata");
+
+        // Check required keys
+        let required_keys = ["scores", "contacts", "name", "phone"];
+        for key in required_keys.iter() {
+            assert!(keys.contains(&key.to_string()), "Missing '{key}' key");
+        }
+
+        // Create variant
+        let variant = Variant::new(metadata_buffer, value_buffer);
+        assert!(!variant.metadata().is_empty());
+        assert!(!variant.value().is_empty());
+    }
+
+    // 
=========================================================================
+    // Advanced feature tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_metadata_reuse() {
+        let mut metadata_buffer = vec![];
+
+        // Create multiple value buffers
+        let mut value_buffer1 = vec![];
+        let mut value_buffer2 = vec![];
+        let mut value_buffer3 = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+
+            // First object with all keys
+            {
+                let mut object = builder.new_object(&mut value_buffer1);
+                object.append_value("foo", 1);
+                object.append_value("bar", 100);
+                object.append_value("baz", "hello");
+                object.finish();
+            }
+
+            // Second object with subset of keys
+            {
+                let mut object = builder.new_object(&mut value_buffer2);
+                object.append_value("foo", 2);
+                object.append_value("bar", 200);
+                // No "baz" key
+                object.finish();
+            }
+
+            // Third object with different subset and order
+            {
+                let mut object = builder.new_object(&mut value_buffer3);
+                // Different order
+                object.append_value("baz", "world");
+                object.append_value("foo", 3);
+                // No "bar" key
+                object.finish();
+            }
+
+            builder.finish();
+        }
+
+        // Verify metadata has expected number of keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 3, "Should have 3 keys in metadata");
+
+        // Create variants with same metadata
+        let variant1 = Variant::new(metadata_buffer.clone(), value_buffer1);
+        let variant2 = Variant::new(metadata_buffer.clone(), value_buffer2);
+        let variant3 = Variant::new(metadata_buffer, value_buffer3);
+
+        // Verify shared metadata has identical bytes
+        assert_eq!(
+            variant1.metadata(),
+            variant2.metadata(),
+            "Metadata should be exactly the same"
+        );
+        assert_eq!(
+            variant2.metadata(),
+            variant3.metadata(),
+            "Metadata should be exactly the same"
+        );
+
+        // Verify different values
+        assert_ne!(
+            variant1.value(),
+            variant2.value(),
+            "Values should be different"
+        );
+        assert_ne!(
+            variant2.value(),
+            variant3.value(),
+            "Values should be different"
+        );
+        assert_ne!(
+            variant1.value(),
+            variant3.value(),
+            "Values should be different"
+        );
+    }
+
+    #[test]
+    fn test_sorted_keys() {
+        // Test sorted keys vs unsorted
+        let mut sorted_metadata = vec![];
+        let mut unsorted_metadata = vec![];
+        let mut value_buffer1 = vec![];
+        let mut value_buffer2 = vec![];
+
+        // Define keys in a non-alphabetical order
+        let keys = ["zoo", "apple", "banana"];
+
+        // Build with sorted keys
+        {
+            let mut builder = VariantBuilder::new_with_sort(&mut 
sorted_metadata, true);
+            let mut object = builder.new_object(&mut value_buffer1);
+
+            // Add keys in random order
+            for (i, key) in keys.iter().enumerate() {
+                object.append_value(key, (i + 1) as i32);
+            }
+
+            object.finish();
+            builder.finish();
+        }
+
+        // Build with unsorted keys
+        {
+            let mut builder = VariantBuilder::new_with_sort(&mut 
unsorted_metadata, false);
+            let mut object = builder.new_object(&mut value_buffer2);
+
+            // Add keys in same order
+            for (i, key) in keys.iter().enumerate() {
+                object.append_value(key, (i + 1) as i32);
+            }
+
+            object.finish();
+            builder.finish();
+        }
+
+        // Verify sort flag in metadata header (bit 4)
+        assert_eq!(sorted_metadata[0] & 0x10, 0x10, "Sorted flag should be 
set");
+        assert_eq!(
+            unsorted_metadata[0] & 0x10,
+            0,
+            "Sorted flag should not be set"
+        );
+
+        // Verify actual sorting of keys
+        let sorted_keys = get_metadata_keys(&sorted_metadata);
+        let unsorted_keys = get_metadata_keys(&unsorted_metadata);
+
+        // Verify number of keys
+        assert_eq!(sorted_keys.len(), 3, "Should have 3 keys");
+        assert_eq!(unsorted_keys.len(), 3, "Should have 3 keys");
+
+        // Verify sorted keys are in alphabetical order
+        let mut expected_sorted = keys.to_vec();
+        expected_sorted.sort();
+
+        // Convert to Vec to make comparison easier
+        let sorted_keys_vec: Vec<_> = sorted_keys.iter().collect();
+
+        // Verify first key is alphabetically first
+        assert_eq!(
+            sorted_keys_vec[0], "apple",
+            "First key should be 'apple' in sorted metadata"
+        );
+    }
+
+    // 
=========================================================================
+    // Encoding validation tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_object_encoding() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut object = builder.new_object(&mut value_buffer);
+
+            // Add a few values
+            object.append_value("name", "Test User");
+            object.append_value("age", 30);
+            object.append_value("active", true);
+
+            object.finish();
+            builder.finish();
+        }
+
+        // Validate object encoding format
+        // First byte should have Object type in lower 2 bits
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Check field ID and offset sizes from header
+        let is_large = (value_buffer[0] & 0x40) != 0;
+        // Verify correct sizes based on our data
+        assert!(!is_large, "Should not need large format for 3 fields");
+        // Validate number of fields
+        let num_fields = value_buffer[1];
+        assert_eq!(num_fields, 3, "Should have 3 fields");
+
+        // Verify metadata contains the correct keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 3, "Should have 3 keys in metadata");
+
+        // Check all keys exist
+        assert!(keys.contains(&"name".to_string()));
+        assert!(keys.contains(&"age".to_string()));
+        assert!(keys.contains(&"active".to_string()));
+    }
+
+    #[test]
+    fn test_array_encoding() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+        let expected_len = 4; // We'll add 4 elements
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut array = builder.new_array(&mut value_buffer);
+
+            // Add a few values
+            array.append_value(1);
+            array.append_value(2);
+            array.append_value("hello");
+            array.append_value(true);
+
+            array.finish();
+            builder.finish();
+        }
+
+        // Validate array encoding format
+        // First byte should have Array type in lower 2 bits
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Array as u8);
+
+        // Check if large format and offset size from header
+        let is_large = (value_buffer[0] & 0x10) != 0;
+        let offset_size = ((value_buffer[0] >> 2) & 0x03) + 1;
+
+        // Verify correct sizes based on our data
+        assert!(!is_large, "Should not need large format for 4 elements");
+
+        // Validate array length
+        let array_length = value_buffer[1];
+        assert_eq!(
+            array_length, expected_len,
+            "Array should have {expected_len} elements"
+        );
+
+        // Verify offsets section exists
+        // The offsets start after the header (1 byte) and length (1 byte if 
small)
+        // and there should be n+1 offsets where n is the array length
+        let offsets_section_size = (expected_len as usize + 1) * (offset_size 
as usize);
+        assert!(
+            value_buffer.len() > 2 + offsets_section_size,
+            "Value buffer should contain offsets section of size 
{offsets_section_size}"
+        );
+    }
+
+    #[test]
+    fn test_metadata_encoding() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new_with_sort(&mut 
metadata_buffer, true);
+            let mut object = builder.new_object(&mut value_buffer);
+
+            // Add keys in non-alphabetical order
+            object.append_value("zzz", 3);
+            object.append_value("aaa", 1);
+            object.append_value("mmm", 2);
+
+            object.finish();
+            builder.finish();
+        }
+
+        // Validate metadata encoding
+        // First byte should have metadata version and sorted flag
+        assert_eq!(
+            metadata_buffer[0] & 0x0F,
+            0x01,
+            "Metadata should be version 1"
+        );
+        assert_eq!(metadata_buffer[0] & 0x10, 0x10, "Sorted flag should be 
set");
+
+        // Get offset size from header
+        let offset_size = ((metadata_buffer[0] >> 6) & 0x03) + 1;
+
+        // Read dictionary size based on offset size
+        let mut dict_size = 0usize;
+        for i in 0..offset_size {
+            dict_size |= (metadata_buffer[1 + i as usize] as usize) << (i * 8);
+        }
+
+        assert_eq!(dict_size, 3, "Dictionary should have 3 entries");
+
+        // Verify key ordering by reading keys
+        let keys = get_metadata_keys(&metadata_buffer);
+
+        // Convert to Vec to make validation easier
+        let keys_vec: Vec<_> = keys.iter().collect();
+
+        // Verify keys are in alphabetical order
+        assert_eq!(keys_vec[0], "aaa", "First key should be 'aaa'");
+        assert_eq!(keys_vec[1], "mmm", "Second key should be 'mmm'");
+        assert_eq!(keys_vec[2], "zzz", "Third key should be 'zzz'");
+    }
+
+    #[test]
+    fn test_primitive_type_encoding() {
+        // Test encoding of each primitive type
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut object = builder.new_object(&mut value_buffer);
+
+            // Add one of each primitive type
+            object.append_value("null", Option::<i32>::None);
+            object.append_value("bool_true", true);
+            object.append_value("bool_false", false);
+            object.append_value("int8", 42i8);
+            object.append_value("int16", 1000i16);
+            object.append_value("int32", 100000i32);
+            object.append_value("int64", 1000000000i64);
+            object.append_value("float", 3.14f32);
+            object.append_value("double", 2.71828f64);
+            object.append_value("string_short", "abc"); // Short string
+            object.append_value("string_long", "a".repeat(64)); // Long string
+            object.append_value("binary", vec![1u8, 2u8, 3u8]);
+
+            object.finish();
+            builder.finish();
+        }
+
+        // Verify object encoding
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Verify number of fields
+        let num_fields = value_buffer[1];
+        assert_eq!(num_fields, 12, "Object should have 12 fields");
+
+        // Create variant
+        let variant = Variant::new(metadata_buffer, value_buffer);
+        assert!(!variant.metadata().is_empty());
+        assert!(!variant.value().is_empty());
+    }
+
+    // 
=========================================================================

Review Comment:
   this is a very impressive set of test cases 👌 



##########
arrow-variant/Cargo.toml:
##########
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   After reviewing this PR, I think it might make sense to put the variant 
builder/reader into a different crate -- `parquet-variant` as technically 
speaking variant is defined in the parquet spec, not the arrow spec



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;
+//! use arrow_variant::builder::{VariantBuilder, PrimitiveValue};
+//!
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! // metadata_buffer now contains valid variant metadata bytes
+//! ```
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::io::Write;
+
+use crate::encoder::{
+    encode_array_from_pre_encoded, encode_binary, encode_boolean, encode_date, 
encode_decimal16,
+    encode_decimal4, encode_decimal8, encode_float, encode_integer, 
encode_null,
+    encode_object_from_pre_encoded, encode_string, encode_time_ntz, 
encode_timestamp,
+    encode_timestamp_nanos, encode_timestamp_ntz, encode_timestamp_ntz_nanos, 
encode_uuid,
+    min_bytes_needed, write_int_with_size,
+};
+use arrow_schema::ArrowError;
+
+/// Values that can be stored in a Variant.
+#[derive(Debug, Clone)]
+pub enum PrimitiveValue {
+    /// Null value
+    Null,
+    /// Boolean value
+    Boolean(bool),
+    /// 8-bit integer
+    Int8(i8),
+    /// 16-bit integer
+    Int16(i16),
+    /// 32-bit integer
+    Int32(i32),
+    /// 64-bit integer
+    Int64(i64),
+    /// Single-precision floating point
+    Float(f32),
+    /// Double-precision floating point
+    Double(f64),
+    /// UTF-8 string
+    String(String),
+    /// Binary data
+    Binary(Vec<u8>),
+    /// Date value (days since epoch)
+    Date(i32),
+    /// Timestamp (milliseconds since epoch)
+    Timestamp(i64),
+    /// Timestamp without timezone (milliseconds since epoch)
+    TimestampNTZ(i64),
+    /// Time without timezone (milliseconds)
+    TimeNTZ(i64),
+    /// Timestamp with nanosecond precision
+    TimestampNanos(i64),
+    /// Timestamp without timezone with nanosecond precision
+    TimestampNTZNanos(i64),
+    /// UUID as 16 bytes
+    Uuid([u8; 16]),
+    /// Decimal with scale and 32-bit unscaled value (precision 1-9)
+    Decimal4(u8, i32),
+    /// Decimal with scale and 64-bit unscaled value (precision 10-18)
+    Decimal8(u8, i64),
+    /// Decimal with scale and 128-bit unscaled value (precision 19-38)
+    Decimal16(u8, i128),
+}
+
+impl From<i32> for PrimitiveValue {

Review Comment:
   👍 



##########
arrow-variant/src/lib.rs:
##########
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`arrow-variant`] contains utilities for working with the [Arrow 
Variant][format] binary format.

Review Comment:
   ❤️ 📖 



##########
arrow-variant/src/lib.rs:
##########
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`arrow-variant`] contains utilities for working with the [Arrow 
Variant][format] binary format.
+//!
+//! The Arrow Variant binary format is a serialization of a JSON-like value 
into a binary format
+//! optimized for columnar storage and processing in Apache Arrow. It supports 
storing primitive
+//! values, objects, and arrays with support for complex nested structures.
+//!
+//! # Creating Variant Values
+//!
+//! ```
+//! # use std::io::Cursor;
+//! # use arrow_variant::builder::VariantBuilder;
+//! # use arrow_schema::ArrowError;
+//! # fn main() -> Result<(), ArrowError> {
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Create a nested object: the equivalent of {"foo": {"bar": 100}}
+//! let mut value_buffer3 = vec![];
+//! let mut object_builder3 = builder.new_object(&mut value_buffer3);
+//!
+//! // Create a nested object under the "foo" field
+//! let mut foo_builder = object_builder3.append_object("foo");
+//! foo_builder.append_value("bar", 100);
+//! foo_builder.finish();
+//!
+//! // Finish the root object builder
+//! object_builder3.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! # Ok(())
+//! # }
+//! ```
+
+#![deny(rustdoc::broken_intra_doc_links)]
+#![warn(missing_docs)]
+
+/// Builder API for creating variant values
+pub mod builder;
+/// Encoder module for converting values to Variant binary format
+pub mod encoder;

Review Comment:
   I think we should start with a minimal API surface area (only expose the 
Builder and Varaint types directly)
   ```suggestion
   /// Builder API for creating variant values 
   mod builder;
   /// Encoder module for converting values to Variant binary format
   mod encoder;
   ```



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;
+//! use arrow_variant::builder::{VariantBuilder, PrimitiveValue};
+//!
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! // metadata_buffer now contains valid variant metadata bytes
+//! ```
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::io::Write;
+
+use crate::encoder::{
+    encode_array_from_pre_encoded, encode_binary, encode_boolean, encode_date, 
encode_decimal16,
+    encode_decimal4, encode_decimal8, encode_float, encode_integer, 
encode_null,
+    encode_object_from_pre_encoded, encode_string, encode_time_ntz, 
encode_timestamp,
+    encode_timestamp_nanos, encode_timestamp_ntz, encode_timestamp_ntz_nanos, 
encode_uuid,
+    min_bytes_needed, write_int_with_size,
+};
+use arrow_schema::ArrowError;
+
+/// Values that can be stored in a Variant.
+#[derive(Debug, Clone)]
+pub enum PrimitiveValue {
+    /// Null value
+    Null,
+    /// Boolean value
+    Boolean(bool),
+    /// 8-bit integer
+    Int8(i8),
+    /// 16-bit integer
+    Int16(i16),
+    /// 32-bit integer
+    Int32(i32),
+    /// 64-bit integer
+    Int64(i64),
+    /// Single-precision floating point
+    Float(f32),
+    /// Double-precision floating point
+    Double(f64),
+    /// UTF-8 string
+    String(String),
+    /// Binary data
+    Binary(Vec<u8>),
+    /// Date value (days since epoch)
+    Date(i32),
+    /// Timestamp (milliseconds since epoch)
+    Timestamp(i64),
+    /// Timestamp without timezone (milliseconds since epoch)
+    TimestampNTZ(i64),
+    /// Time without timezone (milliseconds)
+    TimeNTZ(i64),
+    /// Timestamp with nanosecond precision
+    TimestampNanos(i64),
+    /// Timestamp without timezone with nanosecond precision
+    TimestampNTZNanos(i64),
+    /// UUID as 16 bytes
+    Uuid([u8; 16]),
+    /// Decimal with scale and 32-bit unscaled value (precision 1-9)
+    Decimal4(u8, i32),
+    /// Decimal with scale and 64-bit unscaled value (precision 10-18)
+    Decimal8(u8, i64),
+    /// Decimal with scale and 128-bit unscaled value (precision 19-38)
+    Decimal16(u8, i128),
+}
+
+impl From<i32> for PrimitiveValue {
+    fn from(value: i32) -> Self {
+        PrimitiveValue::Int32(value)
+    }
+}
+
+impl From<i64> for PrimitiveValue {
+    fn from(value: i64) -> Self {
+        PrimitiveValue::Int64(value)
+    }
+}
+
+impl From<i16> for PrimitiveValue {
+    fn from(value: i16) -> Self {
+        PrimitiveValue::Int16(value)
+    }
+}
+
+impl From<i8> for PrimitiveValue {
+    fn from(value: i8) -> Self {
+        PrimitiveValue::Int8(value)
+    }
+}
+
+impl From<f32> for PrimitiveValue {
+    fn from(value: f32) -> Self {
+        PrimitiveValue::Float(value)
+    }
+}
+
+impl From<f64> for PrimitiveValue {
+    fn from(value: f64) -> Self {
+        PrimitiveValue::Double(value)
+    }
+}
+
+impl From<bool> for PrimitiveValue {
+    fn from(value: bool) -> Self {
+        PrimitiveValue::Boolean(value)
+    }
+}
+
+impl From<String> for PrimitiveValue {
+    fn from(value: String) -> Self {
+        PrimitiveValue::String(value)
+    }
+}
+
+impl From<&str> for PrimitiveValue {
+    fn from(value: &str) -> Self {
+        PrimitiveValue::String(value.to_string())
+    }
+}
+
+impl From<Vec<u8>> for PrimitiveValue {
+    fn from(value: Vec<u8>) -> Self {
+        PrimitiveValue::Binary(value)
+    }
+}
+
+impl From<&[u8]> for PrimitiveValue {
+    fn from(value: &[u8]) -> Self {
+        PrimitiveValue::Binary(value.to_vec())
+    }
+}
+
+impl<T: Into<PrimitiveValue>> From<Option<T>> for PrimitiveValue {
+    fn from(value: Option<T>) -> Self {
+        match value {
+            Some(v) => v.into(),
+            None => PrimitiveValue::Null,
+        }
+    }
+}
+
+/// Builder for Variant values.
+///
+/// This builder creates Variant values in the Arrow binary format.
+/// It manages metadata and helps create nested objects and arrays.
+///
+/// The builder follows a pattern similar to other Arrow array builders,
+/// but is specialized for creating Variant binary values.
+pub struct VariantBuilder<'a> {
+    /// Dictionary mapping field names to indexes
+    dictionary: HashMap<String, usize>,
+    /// Whether keys should be sorted in metadata
+    sort_keys: bool,
+    /// Whether the metadata is finalized
+    is_finalized: bool,
+    /// The output destination for metadata
+    metadata_output: Box<dyn Write + 'a>,
+}
+
+impl<'a> std::fmt::Debug for VariantBuilder<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("VariantBuilder")
+            .field("dictionary", &self.dictionary)
+            .field("sort_keys", &self.sort_keys)
+            .field("is_finalized", &self.is_finalized)
+            .field("metadata_output", &"<dyn Write>")
+            .finish()
+    }
+}
+
+impl<'a> VariantBuilder<'a> {
+    /// Creates a new VariantBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    pub fn new(metadata_output: impl Write + 'a) -> Self {
+        Self::new_with_sort(metadata_output, false)
+    }
+
+    /// Creates a new VariantBuilder with optional key sorting.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    /// * `sort_keys` - Whether keys should be sorted in metadata
+    pub fn new_with_sort(metadata_output: impl Write + 'a, sort_keys: bool) -> 
Self {
+        Self {
+            dictionary: HashMap::new(),
+            sort_keys,
+            is_finalized: false,
+            metadata_output: Box::new(metadata_output),
+        }
+    }
+
+    /// Creates a new ObjectBuilder for building an object variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    pub fn new_object<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ObjectBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new object after the builder has been 
finalized");

Review Comment:
   I think in general, the crates in parquet return errors rather than 
panic'ing. There might be a fancier way to use the Rust type system here to 
avoid having to panic at runtime. Something like
   
   ```rust
   struct FinalizedVariantBuilder<'a> { 
     ...
   }
   ```
   
   That basically has the same fields as Variant builder but can't add new 
fields, etc
   
   (Let's not do this initially, I am just thinking out loud here)
   
   The important thing to sort out is if `new_object` will return a `Result`



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;
+//! use arrow_variant::builder::{VariantBuilder, PrimitiveValue};
+//!
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! // metadata_buffer now contains valid variant metadata bytes
+//! ```
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::io::Write;
+
+use crate::encoder::{
+    encode_array_from_pre_encoded, encode_binary, encode_boolean, encode_date, 
encode_decimal16,
+    encode_decimal4, encode_decimal8, encode_float, encode_integer, 
encode_null,
+    encode_object_from_pre_encoded, encode_string, encode_time_ntz, 
encode_timestamp,
+    encode_timestamp_nanos, encode_timestamp_ntz, encode_timestamp_ntz_nanos, 
encode_uuid,
+    min_bytes_needed, write_int_with_size,
+};
+use arrow_schema::ArrowError;
+
+/// Values that can be stored in a Variant.
+#[derive(Debug, Clone)]
+pub enum PrimitiveValue {
+    /// Null value
+    Null,
+    /// Boolean value
+    Boolean(bool),
+    /// 8-bit integer
+    Int8(i8),
+    /// 16-bit integer
+    Int16(i16),
+    /// 32-bit integer
+    Int32(i32),
+    /// 64-bit integer
+    Int64(i64),
+    /// Single-precision floating point
+    Float(f32),
+    /// Double-precision floating point
+    Double(f64),
+    /// UTF-8 string
+    String(String),
+    /// Binary data
+    Binary(Vec<u8>),
+    /// Date value (days since epoch)
+    Date(i32),
+    /// Timestamp (milliseconds since epoch)
+    Timestamp(i64),
+    /// Timestamp without timezone (milliseconds since epoch)
+    TimestampNTZ(i64),
+    /// Time without timezone (milliseconds)
+    TimeNTZ(i64),
+    /// Timestamp with nanosecond precision
+    TimestampNanos(i64),
+    /// Timestamp without timezone with nanosecond precision
+    TimestampNTZNanos(i64),
+    /// UUID as 16 bytes
+    Uuid([u8; 16]),
+    /// Decimal with scale and 32-bit unscaled value (precision 1-9)
+    Decimal4(u8, i32),
+    /// Decimal with scale and 64-bit unscaled value (precision 10-18)
+    Decimal8(u8, i64),
+    /// Decimal with scale and 128-bit unscaled value (precision 19-38)
+    Decimal16(u8, i128),
+}
+
+impl From<i32> for PrimitiveValue {
+    fn from(value: i32) -> Self {
+        PrimitiveValue::Int32(value)
+    }
+}
+
+impl From<i64> for PrimitiveValue {
+    fn from(value: i64) -> Self {
+        PrimitiveValue::Int64(value)
+    }
+}
+
+impl From<i16> for PrimitiveValue {
+    fn from(value: i16) -> Self {
+        PrimitiveValue::Int16(value)
+    }
+}
+
+impl From<i8> for PrimitiveValue {
+    fn from(value: i8) -> Self {
+        PrimitiveValue::Int8(value)
+    }
+}
+
+impl From<f32> for PrimitiveValue {
+    fn from(value: f32) -> Self {
+        PrimitiveValue::Float(value)
+    }
+}
+
+impl From<f64> for PrimitiveValue {
+    fn from(value: f64) -> Self {
+        PrimitiveValue::Double(value)
+    }
+}
+
+impl From<bool> for PrimitiveValue {
+    fn from(value: bool) -> Self {
+        PrimitiveValue::Boolean(value)
+    }
+}
+
+impl From<String> for PrimitiveValue {
+    fn from(value: String) -> Self {
+        PrimitiveValue::String(value)
+    }
+}
+
+impl From<&str> for PrimitiveValue {
+    fn from(value: &str) -> Self {
+        PrimitiveValue::String(value.to_string())
+    }
+}
+
+impl From<Vec<u8>> for PrimitiveValue {
+    fn from(value: Vec<u8>) -> Self {
+        PrimitiveValue::Binary(value)
+    }
+}
+
+impl From<&[u8]> for PrimitiveValue {
+    fn from(value: &[u8]) -> Self {
+        PrimitiveValue::Binary(value.to_vec())
+    }
+}
+
+impl<T: Into<PrimitiveValue>> From<Option<T>> for PrimitiveValue {
+    fn from(value: Option<T>) -> Self {
+        match value {
+            Some(v) => v.into(),
+            None => PrimitiveValue::Null,
+        }
+    }
+}
+
+/// Builder for Variant values.
+///
+/// This builder creates Variant values in the Arrow binary format.
+/// It manages metadata and helps create nested objects and arrays.
+///
+/// The builder follows a pattern similar to other Arrow array builders,
+/// but is specialized for creating Variant binary values.
+pub struct VariantBuilder<'a> {
+    /// Dictionary mapping field names to indexes
+    dictionary: HashMap<String, usize>,
+    /// Whether keys should be sorted in metadata
+    sort_keys: bool,
+    /// Whether the metadata is finalized
+    is_finalized: bool,
+    /// The output destination for metadata
+    metadata_output: Box<dyn Write + 'a>,
+}
+
+impl<'a> std::fmt::Debug for VariantBuilder<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("VariantBuilder")
+            .field("dictionary", &self.dictionary)
+            .field("sort_keys", &self.sort_keys)
+            .field("is_finalized", &self.is_finalized)
+            .field("metadata_output", &"<dyn Write>")
+            .finish()
+    }
+}
+
+impl<'a> VariantBuilder<'a> {
+    /// Creates a new VariantBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    pub fn new(metadata_output: impl Write + 'a) -> Self {
+        Self::new_with_sort(metadata_output, false)
+    }
+
+    /// Creates a new VariantBuilder with optional key sorting.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    /// * `sort_keys` - Whether keys should be sorted in metadata
+    pub fn new_with_sort(metadata_output: impl Write + 'a, sort_keys: bool) -> 
Self {
+        Self {
+            dictionary: HashMap::new(),
+            sort_keys,
+            is_finalized: false,
+            metadata_output: Box::new(metadata_output),
+        }
+    }
+
+    /// Creates a new ObjectBuilder for building an object variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    pub fn new_object<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ObjectBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new object after the builder has been 
finalized");
+        }
+
+        ObjectBuilder::new(output, self)
+    }
+
+    /// Creates a new ArrayBuilder for building an array variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    pub fn new_array<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ArrayBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new array after the builder has been 
finalized");
+        }
+
+        ArrayBuilder::new(output, self)
+    }
+
+    /// Adds a key to the dictionary if it doesn't already exist.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key to add
+    ///
+    /// # Returns
+    ///
+    /// The index of the key in the dictionary
+    pub(crate) fn add_key(&mut self, key: &str) -> Result<usize, ArrowError> {
+        if self.is_finalized {
+            return Err(ArrowError::VariantError(
+                "Cannot add keys after metadata has been 
finalized".to_string(),
+            ));
+        }
+
+        if let Some(idx) = self.dictionary.get(key) {
+            return Ok(*idx);
+        }
+
+        let idx = self.dictionary.len();
+        self.dictionary.insert(key.to_string(), idx);
+        Ok(idx)
+    }
+
+    /// Finalizes the metadata and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Get keys in sorted or insertion order
+        let mut keys: Vec<_> = self.dictionary.keys().cloned().collect();
+        if self.sort_keys {
+            keys.sort();
+
+            // Re-index keys based on sorted order
+            for (i, key) in keys.iter().enumerate() {
+                self.dictionary.insert(key.clone(), i);
+            }
+        }
+
+        // Calculate total size of dictionary strings
+        let total_string_size: usize = keys.iter().map(|k| k.len()).sum();
+
+        // Determine offset size based on max possible offset value
+        let max_offset = std::cmp::max(total_string_size, keys.len() + 1);
+        let offset_size = min_bytes_needed(max_offset);
+        let offset_size_minus_one = offset_size - 1;
+
+        // Construct header byte
+        let sorted_bit = if self.sort_keys { 1 } else { 0 };
+        let header = 0x01 | (sorted_bit << 4) | ((offset_size_minus_one as u8) 
<< 6);
+
+        // Write header byte
+        if let Err(e) = self.metadata_output.write_all(&[header]) {
+            panic!("Failed to write metadata header: {}", e);
+        }
+
+        // Write dictionary size (number of keys)
+        let dict_size = keys.len() as u32;
+        if let Err(e) = write_int_with_size(dict_size, offset_size, &mut 
self.metadata_output) {
+            panic!("Failed to write dictionary size: {}", e);
+        }
+
+        // Calculate and write offsets
+        let mut current_offset = 0u32;
+        let mut offsets = Vec::with_capacity(keys.len() + 1);
+
+        offsets.push(current_offset);
+        for key in &keys {
+            current_offset += key.len() as u32;
+            offsets.push(current_offset);
+        }
+
+        // Write offsets using the helper function
+        for offset in offsets {
+            if let Err(e) = write_int_with_size(offset, offset_size, &mut 
self.metadata_output) {
+                panic!("Failed to write offset: {}", e);
+            }
+        }
+
+        // Write dictionary strings
+        for key in keys {
+            if let Err(e) = self.metadata_output.write_all(key.as_bytes()) {
+                panic!("Failed to write dictionary string: {}", e);
+            }
+        }
+
+        self.is_finalized = true;
+    }
+
+    /// Returns whether the builder has been finalized.
+    pub fn is_finalized(&self) -> bool {
+        self.is_finalized
+    }
+}
+
+/// Builder for Variant object values.
+pub struct ObjectBuilder<'a, 'b> {
+    /// Destination for the object value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffer for field values - stored as key_index -> value_buffer
+    /// Using IndexMap for O(1) access with ability to sort by key
+    value_buffers: IndexMap<usize, Vec<u8>>,
+    /// Whether the object has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ObjectBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ObjectBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ObjectBuilder<'a, 'b> {
+    /// Creates a new ObjectBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: IndexMap::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the object.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the value
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, key: &str, value: 
T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this field - will overwrite if key already 
exists
+        self.value_buffers.insert(key_index, buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested object
+    pub fn append_object<'c>(&'c mut self, key: &str) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested object and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested array
+    pub fn append_array<'c>(&'c mut self, key: &str) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested array and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the object and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Sort the entries by key index
+        self.value_buffers.sort_keys();
+
+        // Prepare field IDs and values for encoding
+        let field_ids: Vec<usize> = 
self.value_buffers.keys().copied().collect();
+        let field_values: Vec<&[u8]> = self.value_buffers.values().map(|v| 
v.as_slice()).collect();
+
+        // Encode the object directly to output
+        if let Err(e) = encode_object_from_pre_encoded(&field_ids, 
&field_values, self.output) {
+            panic!("Failed to encode object: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Builder for Variant array values.
+pub struct ArrayBuilder<'a, 'b> {
+    /// Destination for the array value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffers for array elements
+    value_buffers: Vec<Vec<u8>>,
+    /// Whether the array has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ArrayBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrayBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ArrayBuilder<'a, 'b> {
+    /// Creates a new ArrayBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: Vec::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the array.
+    ///
+    /// # Arguments
+    ///
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, value: T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this element
+        self.value_buffers.push(buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Returns the index of the nested object in the array
+    pub fn append_object<'c>(&'c mut self) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested object
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Returns the index of the nested array in the array
+    pub fn append_array<'c>(&'c mut self) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested array
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the array and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Prepare slices for values
+        let values: Vec<&[u8]> = self.value_buffers.iter().map(|v| 
v.as_slice()).collect();
+
+        // Encode the array directly to output
+        if let Err(e) = encode_array_from_pre_encoded(&values, self.output) {
+            panic!("Failed to encode array: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Writes a primitive value to a buffer using the Variant format.
+///
+/// This function handles the correct encoding of primitive values by utilizing
+/// the encoder module functionality.
+fn write_value(buffer: &mut Vec<u8>, value: &PrimitiveValue) -> Result<(), 
ArrowError> {
+    match value {
+        PrimitiveValue::Null => {
+            encode_null(buffer);
+        }
+        PrimitiveValue::Boolean(val) => {
+            encode_boolean(*val, buffer);
+        }
+        PrimitiveValue::Int8(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int16(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int32(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int64(val) => {
+            encode_integer(*val, buffer);
+        }
+        PrimitiveValue::Float(val) => {
+            encode_float(*val as f64, buffer);
+        }
+        PrimitiveValue::Double(val) => {
+            encode_float(*val, buffer);
+        }
+        PrimitiveValue::String(val) => {
+            encode_string(val, buffer);
+        }
+        PrimitiveValue::Binary(val) => {
+            encode_binary(val, buffer);
+        }
+        PrimitiveValue::Date(val) => {
+            encode_date(*val, buffer);
+        }
+        PrimitiveValue::Timestamp(val) => {
+            encode_timestamp(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZ(val) => {
+            encode_timestamp_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimeNTZ(val) => {
+            encode_time_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimestampNanos(val) => {
+            encode_timestamp_nanos(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZNanos(val) => {
+            encode_timestamp_ntz_nanos(*val, buffer);
+        }
+        PrimitiveValue::Uuid(val) => {
+            encode_uuid(val, buffer);
+        }
+        PrimitiveValue::Decimal4(scale, unscaled_value) => {
+            encode_decimal4(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal8(scale, unscaled_value) => {
+            encode_decimal8(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal16(scale, unscaled_value) => {
+            encode_decimal16(*scale, *unscaled_value, buffer);
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::encoder::VariantBasicType;
+    use arrow_schema::extension::Variant;
+
+    // Helper function to extract keys from metadata for testing
+    fn get_metadata_keys(metadata: &[u8]) -> Vec<String> {
+        // Simple implementation to extract keys from metadata buffer
+        // This avoids dependency on VariantReader which might not be 
accessible
+
+        // Skip the header byte
+        let mut pos = 1;
+
+        // Get offset size from header byte
+        let offset_size = ((metadata[0] >> 6) & 0x03) + 1;
+
+        // Read dictionary size
+        let mut dict_size = 0usize;
+        for i in 0..offset_size {
+            dict_size |= (metadata[pos + i as usize] as usize) << (i * 8);
+        }
+        pos += offset_size as usize;
+
+        if dict_size == 0 {
+            return vec![];
+        }
+
+        // Read offsets
+        let mut offsets = Vec::with_capacity(dict_size + 1);
+        for _ in 0..=dict_size {
+            let mut offset = 0usize;
+            for i in 0..offset_size {
+                offset |= (metadata[pos + i as usize] as usize) << (i * 8);
+            }
+            offsets.push(offset);
+            pos += offset_size as usize;
+        }
+
+        // Extract keys using offsets
+        let mut keys = Vec::with_capacity(dict_size);
+        for i in 0..dict_size {
+            let start = offsets[i];
+            let end = offsets[i + 1];
+            let key_bytes = &metadata[pos + start..pos + end];
+            keys.push(String::from_utf8_lossy(key_bytes).to_string());
+        }
+
+        keys
+    }
+
+    // 
=========================================================================
+    // Basic builder functionality tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_basic_object_builder() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut object_builder = builder.new_object(&mut value_buffer);
+
+            // Test various primitive types
+            object_builder.append_value("null", Option::<i32>::None);
+            object_builder.append_value("bool_true", true);
+            object_builder.append_value("bool_false", false);
+            object_builder.append_value("int8", 42i8);
+            object_builder.append_value("int16", 1000i16);
+            object_builder.append_value("int32", 100000i32);
+            object_builder.append_value("int64", 1000000000i64);
+            object_builder.append_value("float", 3.14f32);
+            object_builder.append_value("double", 2.71828f64);
+            object_builder.append_value("string", "hello world");
+            object_builder.append_value("binary", vec![1u8, 2u8, 3u8]);
+
+            object_builder.finish();
+            builder.finish();
+        }
+
+        // Verify object encoding
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Verify metadata contains all keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 11, "Should have 11 keys in metadata");
+        assert!(keys.contains(&"null".to_string()), "Missing 'null' key");
+        assert!(
+            keys.contains(&"bool_true".to_string()),
+            "Missing 'bool_true' key"
+        );
+        assert!(keys.contains(&"string".to_string()), "Missing 'string' key");
+
+        // Verify object has the correct number of entries
+        // First byte after header is the number of fields (if small object)
+        assert!(value_buffer.len() > 1, "Value buffer too small");
+        let num_fields = value_buffer[1];
+        assert_eq!(num_fields as usize, 11, "Object should have 11 fields");

Review Comment:
   Rather than testing these "internal" fields I think the tests would be 
better if they tested that the resulting value is a readable Variant value. See 
my next comment below



##########
arrow-variant/Cargo.toml:
##########
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-variant"
+version = { workspace = true }
+description = "JSON to Arrow Variant conversion utilities"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+keywords = ["arrow"]
+include = [
+    "src/**/*.rs",
+    "Cargo.toml",
+]
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+name = "arrow_variant"
+path = "src/lib.rs"
+
+[features]
+default = []
+
+[dependencies]
+arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-cast = { workspace = true, optional = true }
+arrow-data = { workspace = true }
+arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
+serde = { version = "1.0", default-features = false }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
+indexmap = "2.0.0"

Review Comment:
   I don't think any of these dependencies are used so we can remove them
   
   ```
   arrow-array = { workspace = true }
   arrow-buffer = { workspace = true }
   arrow-cast = { workspace = true, optional = true }
   arrow-data = { workspace = true }
   serde = { version = "1.0", default-features = false }
   serde_json = { version = "1.0", default-features = false, features = ["std"] 
}
   ```
   



##########
arrow-schema/src/error.rs:
##########
@@ -60,6 +60,8 @@ pub enum ArrowError {
     DictionaryKeyOverflowError,
     /// Error when the run end index in a REE array is bigger than the array 
length
     RunEndIndexOverflowError,
+    /// Error during Variant operations in `arrow-variant`.

Review Comment:
   If we add a new variant to this enum, it will be a "breaking API change" as 
then downstream projects would potentially have to update their code to handle 
new variants
   
   We make releases with API changes every three months,
   
https://github.com/apache/arrow-rs?tab=readme-ov-file#release-versioning-and-schedule
   
   So in other words, it would be great to remove this change from the PR so we 
can merge it faster.  



##########
arrow-variant/Cargo.toml:
##########
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-variant"
+version = { workspace = true }
+description = "JSON to Arrow Variant conversion utilities"

Review Comment:
   ```suggestion
   description = "Rust API for reading/writing Apache Parquet Variant values"
   ```



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;
+//! use arrow_variant::builder::{VariantBuilder, PrimitiveValue};
+//!
+//! // Create a builder for variant values
+//! let mut metadata_buffer = vec![];
+//! let mut builder = VariantBuilder::new(&mut metadata_buffer);
+//!
+//! // Create an object
+//! let mut value_buffer = vec![];
+//! let mut object_builder = builder.new_object(&mut value_buffer);
+//! object_builder.append_value("foo", 1);
+//! object_builder.append_value("bar", 100);
+//! object_builder.finish();
+//!
+//! // value_buffer now contains a valid variant value
+//! // builder contains metadata with fields "foo" and "bar"
+//!
+//! // Create another object reusing the same metadata
+//! let mut value_buffer2 = vec![];
+//! let mut object_builder2 = builder.new_object(&mut value_buffer2);
+//! object_builder2.append_value("foo", 2);
+//! object_builder2.append_value("bar", 200);
+//! object_builder2.finish();
+//!
+//! // Finalize the metadata
+//! builder.finish();
+//! // metadata_buffer now contains valid variant metadata bytes
+//! ```
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::io::Write;
+
+use crate::encoder::{
+    encode_array_from_pre_encoded, encode_binary, encode_boolean, encode_date, 
encode_decimal16,
+    encode_decimal4, encode_decimal8, encode_float, encode_integer, 
encode_null,
+    encode_object_from_pre_encoded, encode_string, encode_time_ntz, 
encode_timestamp,
+    encode_timestamp_nanos, encode_timestamp_ntz, encode_timestamp_ntz_nanos, 
encode_uuid,
+    min_bytes_needed, write_int_with_size,
+};
+use arrow_schema::ArrowError;
+
+/// Values that can be stored in a Variant.
+#[derive(Debug, Clone)]
+pub enum PrimitiveValue {
+    /// Null value
+    Null,
+    /// Boolean value
+    Boolean(bool),
+    /// 8-bit integer
+    Int8(i8),
+    /// 16-bit integer
+    Int16(i16),
+    /// 32-bit integer
+    Int32(i32),
+    /// 64-bit integer
+    Int64(i64),
+    /// Single-precision floating point
+    Float(f32),
+    /// Double-precision floating point
+    Double(f64),
+    /// UTF-8 string
+    String(String),
+    /// Binary data
+    Binary(Vec<u8>),
+    /// Date value (days since epoch)
+    Date(i32),
+    /// Timestamp (milliseconds since epoch)
+    Timestamp(i64),
+    /// Timestamp without timezone (milliseconds since epoch)
+    TimestampNTZ(i64),
+    /// Time without timezone (milliseconds)
+    TimeNTZ(i64),
+    /// Timestamp with nanosecond precision
+    TimestampNanos(i64),
+    /// Timestamp without timezone with nanosecond precision
+    TimestampNTZNanos(i64),
+    /// UUID as 16 bytes
+    Uuid([u8; 16]),
+    /// Decimal with scale and 32-bit unscaled value (precision 1-9)
+    Decimal4(u8, i32),
+    /// Decimal with scale and 64-bit unscaled value (precision 10-18)
+    Decimal8(u8, i64),
+    /// Decimal with scale and 128-bit unscaled value (precision 19-38)
+    Decimal16(u8, i128),
+}
+
+impl From<i32> for PrimitiveValue {
+    fn from(value: i32) -> Self {
+        PrimitiveValue::Int32(value)
+    }
+}
+
+impl From<i64> for PrimitiveValue {
+    fn from(value: i64) -> Self {
+        PrimitiveValue::Int64(value)
+    }
+}
+
+impl From<i16> for PrimitiveValue {
+    fn from(value: i16) -> Self {
+        PrimitiveValue::Int16(value)
+    }
+}
+
+impl From<i8> for PrimitiveValue {
+    fn from(value: i8) -> Self {
+        PrimitiveValue::Int8(value)
+    }
+}
+
+impl From<f32> for PrimitiveValue {
+    fn from(value: f32) -> Self {
+        PrimitiveValue::Float(value)
+    }
+}
+
+impl From<f64> for PrimitiveValue {
+    fn from(value: f64) -> Self {
+        PrimitiveValue::Double(value)
+    }
+}
+
+impl From<bool> for PrimitiveValue {
+    fn from(value: bool) -> Self {
+        PrimitiveValue::Boolean(value)
+    }
+}
+
+impl From<String> for PrimitiveValue {
+    fn from(value: String) -> Self {
+        PrimitiveValue::String(value)
+    }
+}
+
+impl From<&str> for PrimitiveValue {
+    fn from(value: &str) -> Self {
+        PrimitiveValue::String(value.to_string())
+    }
+}
+
+impl From<Vec<u8>> for PrimitiveValue {
+    fn from(value: Vec<u8>) -> Self {
+        PrimitiveValue::Binary(value)
+    }
+}
+
+impl From<&[u8]> for PrimitiveValue {
+    fn from(value: &[u8]) -> Self {
+        PrimitiveValue::Binary(value.to_vec())
+    }
+}
+
+impl<T: Into<PrimitiveValue>> From<Option<T>> for PrimitiveValue {
+    fn from(value: Option<T>) -> Self {
+        match value {
+            Some(v) => v.into(),
+            None => PrimitiveValue::Null,
+        }
+    }
+}
+
+/// Builder for Variant values.
+///
+/// This builder creates Variant values in the Arrow binary format.
+/// It manages metadata and helps create nested objects and arrays.
+///
+/// The builder follows a pattern similar to other Arrow array builders,
+/// but is specialized for creating Variant binary values.
+pub struct VariantBuilder<'a> {
+    /// Dictionary mapping field names to indexes
+    dictionary: HashMap<String, usize>,
+    /// Whether keys should be sorted in metadata
+    sort_keys: bool,
+    /// Whether the metadata is finalized
+    is_finalized: bool,
+    /// The output destination for metadata
+    metadata_output: Box<dyn Write + 'a>,
+}
+
+impl<'a> std::fmt::Debug for VariantBuilder<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("VariantBuilder")
+            .field("dictionary", &self.dictionary)
+            .field("sort_keys", &self.sort_keys)
+            .field("is_finalized", &self.is_finalized)
+            .field("metadata_output", &"<dyn Write>")
+            .finish()
+    }
+}
+
+impl<'a> VariantBuilder<'a> {
+    /// Creates a new VariantBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    pub fn new(metadata_output: impl Write + 'a) -> Self {
+        Self::new_with_sort(metadata_output, false)
+    }
+
+    /// Creates a new VariantBuilder with optional key sorting.
+    ///
+    /// # Arguments
+    ///
+    /// * `metadata_output` - The destination for metadata
+    /// * `sort_keys` - Whether keys should be sorted in metadata
+    pub fn new_with_sort(metadata_output: impl Write + 'a, sort_keys: bool) -> 
Self {
+        Self {
+            dictionary: HashMap::new(),
+            sort_keys,
+            is_finalized: false,
+            metadata_output: Box::new(metadata_output),
+        }
+    }
+
+    /// Creates a new ObjectBuilder for building an object variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    pub fn new_object<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ObjectBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new object after the builder has been 
finalized");
+        }
+
+        ObjectBuilder::new(output, self)
+    }
+
+    /// Creates a new ArrayBuilder for building an array variant.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    pub fn new_array<'b>(&'b mut self, output: &'b mut Vec<u8>) -> 
ArrayBuilder<'b, 'a>
+    where
+        'a: 'b,
+    {
+        if self.is_finalized {
+            panic!("Cannot create a new array after the builder has been 
finalized");
+        }
+
+        ArrayBuilder::new(output, self)
+    }
+
+    /// Adds a key to the dictionary if it doesn't already exist.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key to add
+    ///
+    /// # Returns
+    ///
+    /// The index of the key in the dictionary
+    pub(crate) fn add_key(&mut self, key: &str) -> Result<usize, ArrowError> {
+        if self.is_finalized {
+            return Err(ArrowError::VariantError(
+                "Cannot add keys after metadata has been 
finalized".to_string(),
+            ));
+        }
+
+        if let Some(idx) = self.dictionary.get(key) {
+            return Ok(*idx);
+        }
+
+        let idx = self.dictionary.len();
+        self.dictionary.insert(key.to_string(), idx);
+        Ok(idx)
+    }
+
+    /// Finalizes the metadata and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Get keys in sorted or insertion order
+        let mut keys: Vec<_> = self.dictionary.keys().cloned().collect();
+        if self.sort_keys {
+            keys.sort();
+
+            // Re-index keys based on sorted order
+            for (i, key) in keys.iter().enumerate() {
+                self.dictionary.insert(key.clone(), i);
+            }
+        }
+
+        // Calculate total size of dictionary strings
+        let total_string_size: usize = keys.iter().map(|k| k.len()).sum();
+
+        // Determine offset size based on max possible offset value
+        let max_offset = std::cmp::max(total_string_size, keys.len() + 1);
+        let offset_size = min_bytes_needed(max_offset);
+        let offset_size_minus_one = offset_size - 1;
+
+        // Construct header byte
+        let sorted_bit = if self.sort_keys { 1 } else { 0 };
+        let header = 0x01 | (sorted_bit << 4) | ((offset_size_minus_one as u8) 
<< 6);
+
+        // Write header byte
+        if let Err(e) = self.metadata_output.write_all(&[header]) {
+            panic!("Failed to write metadata header: {}", e);
+        }
+
+        // Write dictionary size (number of keys)
+        let dict_size = keys.len() as u32;
+        if let Err(e) = write_int_with_size(dict_size, offset_size, &mut 
self.metadata_output) {
+            panic!("Failed to write dictionary size: {}", e);
+        }
+
+        // Calculate and write offsets
+        let mut current_offset = 0u32;
+        let mut offsets = Vec::with_capacity(keys.len() + 1);
+
+        offsets.push(current_offset);
+        for key in &keys {
+            current_offset += key.len() as u32;
+            offsets.push(current_offset);
+        }
+
+        // Write offsets using the helper function
+        for offset in offsets {
+            if let Err(e) = write_int_with_size(offset, offset_size, &mut 
self.metadata_output) {
+                panic!("Failed to write offset: {}", e);
+            }
+        }
+
+        // Write dictionary strings
+        for key in keys {
+            if let Err(e) = self.metadata_output.write_all(key.as_bytes()) {
+                panic!("Failed to write dictionary string: {}", e);
+            }
+        }
+
+        self.is_finalized = true;
+    }
+
+    /// Returns whether the builder has been finalized.
+    pub fn is_finalized(&self) -> bool {
+        self.is_finalized
+    }
+}
+
+/// Builder for Variant object values.
+pub struct ObjectBuilder<'a, 'b> {
+    /// Destination for the object value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffer for field values - stored as key_index -> value_buffer
+    /// Using IndexMap for O(1) access with ability to sort by key
+    value_buffers: IndexMap<usize, Vec<u8>>,
+    /// Whether the object has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ObjectBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ObjectBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ObjectBuilder<'a, 'b> {
+    /// Creates a new ObjectBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the object value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: IndexMap::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the object.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the value
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, key: &str, value: 
T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this field - will overwrite if key already 
exists
+        self.value_buffers.insert(key_index, buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested object
+    pub fn append_object<'c>(&'c mut self, key: &str) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested object and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Arguments
+    ///
+    /// * `key` - The key for the nested array
+    pub fn append_array<'c>(&'c mut self, key: &str) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized object");
+        }
+
+        // Add the key to metadata and get its index
+        let key_index = match self.variant_builder.add_key(key) {
+            Ok(idx) => idx,
+            Err(e) => panic!("Failed to add key: {}", e),
+        };
+
+        // Create a temporary buffer for the nested array and store it
+        let nested_buffer = Vec::new();
+        self.value_buffers.insert(key_index, nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.get_mut(&key_index).unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the object and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Sort the entries by key index
+        self.value_buffers.sort_keys();
+
+        // Prepare field IDs and values for encoding
+        let field_ids: Vec<usize> = 
self.value_buffers.keys().copied().collect();
+        let field_values: Vec<&[u8]> = self.value_buffers.values().map(|v| 
v.as_slice()).collect();
+
+        // Encode the object directly to output
+        if let Err(e) = encode_object_from_pre_encoded(&field_ids, 
&field_values, self.output) {
+            panic!("Failed to encode object: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Builder for Variant array values.
+pub struct ArrayBuilder<'a, 'b> {
+    /// Destination for the array value
+    output: &'a mut Vec<u8>,
+    /// Reference to the variant builder
+    variant_builder: &'a mut VariantBuilder<'b>,
+    /// Temporary buffers for array elements
+    value_buffers: Vec<Vec<u8>>,
+    /// Whether the array has been finalized
+    is_finalized: bool,
+}
+
+impl<'a, 'b> std::fmt::Debug for ArrayBuilder<'a, 'b> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrayBuilder")
+            .field("variant_builder", &self.variant_builder)
+            .field("value_buffers", &self.value_buffers)
+            .field("is_finalized", &self.is_finalized)
+            .finish()
+    }
+}
+
+impl<'a, 'b> ArrayBuilder<'a, 'b> {
+    /// Creates a new ArrayBuilder.
+    ///
+    /// # Arguments
+    ///
+    /// * `output` - The destination for the array value
+    /// * `variant_builder` - The parent variant builder
+    fn new(output: &'a mut Vec<u8>, variant_builder: &'a mut 
VariantBuilder<'b>) -> Self {
+        Self {
+            output,
+            variant_builder,
+            value_buffers: Vec::new(),
+            is_finalized: false,
+        }
+    }
+
+    /// Adds a primitive value to the array.
+    ///
+    /// # Arguments
+    ///
+    /// * `value` - The primitive value to add
+    pub fn append_value<T: Into<PrimitiveValue>>(&mut self, value: T) {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a buffer for this value
+        let mut buffer = Vec::new();
+
+        // Convert the value to PrimitiveValue and write it
+        let primitive_value = value.into();
+        if let Err(e) = write_value(&mut buffer, &primitive_value) {
+            panic!("Failed to write value: {}", e);
+        }
+
+        // Store the buffer for this element
+        self.value_buffers.push(buffer);
+    }
+
+    /// Creates a nested object builder.
+    ///
+    /// # Returns the index of the nested object in the array
+    pub fn append_object<'c>(&'c mut self) -> ObjectBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested object
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new object builder for this nested buffer
+        ObjectBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Creates a nested array builder.
+    ///
+    /// # Returns the index of the nested array in the array
+    pub fn append_array<'c>(&'c mut self) -> ArrayBuilder<'c, 'b>
+    where
+        'a: 'c,
+    {
+        if self.is_finalized {
+            panic!("Cannot append to a finalized array");
+        }
+
+        // Create a temporary buffer for the nested array
+        let nested_buffer = Vec::new();
+        self.value_buffers.push(nested_buffer);
+
+        // Get a mutable reference to the value buffer we just inserted
+        let nested_buffer = self.value_buffers.last_mut().unwrap();
+
+        // Create a new array builder for this nested buffer
+        ArrayBuilder::new(nested_buffer, self.variant_builder)
+    }
+
+    /// Finalizes the array and writes it to the output.
+    pub fn finish(&mut self) {
+        if self.is_finalized {
+            return;
+        }
+
+        // Prepare slices for values
+        let values: Vec<&[u8]> = self.value_buffers.iter().map(|v| 
v.as_slice()).collect();
+
+        // Encode the array directly to output
+        if let Err(e) = encode_array_from_pre_encoded(&values, self.output) {
+            panic!("Failed to encode array: {}", e);
+        }
+
+        self.is_finalized = true;
+    }
+}
+
+/// Writes a primitive value to a buffer using the Variant format.
+///
+/// This function handles the correct encoding of primitive values by utilizing
+/// the encoder module functionality.
+fn write_value(buffer: &mut Vec<u8>, value: &PrimitiveValue) -> Result<(), 
ArrowError> {
+    match value {
+        PrimitiveValue::Null => {
+            encode_null(buffer);
+        }
+        PrimitiveValue::Boolean(val) => {
+            encode_boolean(*val, buffer);
+        }
+        PrimitiveValue::Int8(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int16(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int32(val) => {
+            encode_integer(*val as i64, buffer);
+        }
+        PrimitiveValue::Int64(val) => {
+            encode_integer(*val, buffer);
+        }
+        PrimitiveValue::Float(val) => {
+            encode_float(*val as f64, buffer);
+        }
+        PrimitiveValue::Double(val) => {
+            encode_float(*val, buffer);
+        }
+        PrimitiveValue::String(val) => {
+            encode_string(val, buffer);
+        }
+        PrimitiveValue::Binary(val) => {
+            encode_binary(val, buffer);
+        }
+        PrimitiveValue::Date(val) => {
+            encode_date(*val, buffer);
+        }
+        PrimitiveValue::Timestamp(val) => {
+            encode_timestamp(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZ(val) => {
+            encode_timestamp_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimeNTZ(val) => {
+            encode_time_ntz(*val, buffer);
+        }
+        PrimitiveValue::TimestampNanos(val) => {
+            encode_timestamp_nanos(*val, buffer);
+        }
+        PrimitiveValue::TimestampNTZNanos(val) => {
+            encode_timestamp_ntz_nanos(*val, buffer);
+        }
+        PrimitiveValue::Uuid(val) => {
+            encode_uuid(val, buffer);
+        }
+        PrimitiveValue::Decimal4(scale, unscaled_value) => {
+            encode_decimal4(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal8(scale, unscaled_value) => {
+            encode_decimal8(*scale, *unscaled_value, buffer);
+        }
+        PrimitiveValue::Decimal16(scale, unscaled_value) => {
+            encode_decimal16(*scale, *unscaled_value, buffer);
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::encoder::VariantBasicType;
+    use arrow_schema::extension::Variant;
+
+    // Helper function to extract keys from metadata for testing
+    fn get_metadata_keys(metadata: &[u8]) -> Vec<String> {
+        // Simple implementation to extract keys from metadata buffer
+        // This avoids dependency on VariantReader which might not be 
accessible
+
+        // Skip the header byte
+        let mut pos = 1;
+
+        // Get offset size from header byte
+        let offset_size = ((metadata[0] >> 6) & 0x03) + 1;
+
+        // Read dictionary size
+        let mut dict_size = 0usize;
+        for i in 0..offset_size {
+            dict_size |= (metadata[pos + i as usize] as usize) << (i * 8);
+        }
+        pos += offset_size as usize;
+
+        if dict_size == 0 {
+            return vec![];
+        }
+
+        // Read offsets
+        let mut offsets = Vec::with_capacity(dict_size + 1);
+        for _ in 0..=dict_size {
+            let mut offset = 0usize;
+            for i in 0..offset_size {
+                offset |= (metadata[pos + i as usize] as usize) << (i * 8);
+            }
+            offsets.push(offset);
+            pos += offset_size as usize;
+        }
+
+        // Extract keys using offsets
+        let mut keys = Vec::with_capacity(dict_size);
+        for i in 0..dict_size {
+            let start = offsets[i];
+            let end = offsets[i + 1];
+            let key_bytes = &metadata[pos + start..pos + end];
+            keys.push(String::from_utf8_lossy(key_bytes).to_string());
+        }
+
+        keys
+    }
+
+    // 
=========================================================================
+    // Basic builder functionality tests
+    // 
=========================================================================
+
+    #[test]
+    fn test_basic_object_builder() {
+        let mut metadata_buffer = vec![];
+        let mut value_buffer = vec![];
+
+        {
+            let mut builder = VariantBuilder::new(&mut metadata_buffer);
+            let mut object_builder = builder.new_object(&mut value_buffer);
+
+            // Test various primitive types
+            object_builder.append_value("null", Option::<i32>::None);
+            object_builder.append_value("bool_true", true);
+            object_builder.append_value("bool_false", false);
+            object_builder.append_value("int8", 42i8);
+            object_builder.append_value("int16", 1000i16);
+            object_builder.append_value("int32", 100000i32);
+            object_builder.append_value("int64", 1000000000i64);
+            object_builder.append_value("float", 3.14f32);
+            object_builder.append_value("double", 2.71828f64);
+            object_builder.append_value("string", "hello world");
+            object_builder.append_value("binary", vec![1u8, 2u8, 3u8]);
+
+            object_builder.finish();
+            builder.finish();
+        }
+
+        // Verify object encoding
+        assert_eq!(value_buffer[0] & 0x03, VariantBasicType::Object as u8);
+
+        // Verify metadata contains all keys
+        let keys = get_metadata_keys(&metadata_buffer);
+        assert_eq!(keys.len(), 11, "Should have 11 keys in metadata");
+        assert!(keys.contains(&"null".to_string()), "Missing 'null' key");
+        assert!(
+            keys.contains(&"bool_true".to_string()),
+            "Missing 'bool_true' key"
+        );
+        assert!(keys.contains(&"string".to_string()), "Missing 'string' key");
+
+        // Verify object has the correct number of entries
+        // First byte after header is the number of fields (if small object)
+        assert!(value_buffer.len() > 1, "Value buffer too small");
+        let num_fields = value_buffer[1];
+        assert_eq!(num_fields as usize, 11, "Object should have 11 fields");
+
+        let _variant = Variant::new(metadata_buffer, value_buffer);

Review Comment:
   As written, these tests verify the binary that comes out, rather than if the 
binary that comes out contains a readable Variant value. 
   
   It seems to me that this test would be better if it tested the user visible 
API, namely that it is possible to read the fields back out. 
   
   So that might look something like
   1.  `Variant::new` returned a `Result` (in case the Variant was malformed)
   2. The test accessed the fields of the Variant and verified their values
   
   Something like
   ```rust
   let variant = Variant::try_new(&metadata_buffer, &value_buffer)?;
   assert_eq!(variant.get("int8"), Variant::from(42i8))
   ... same for the other fields ...
   ```



##########
arrow-variant/src/builder/mod.rs:
##########
@@ -0,0 +1,1458 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Builder API for creating Variant binary values.
+//!
+//! This module provides a builder-style API for creating Variant values in the
+//! Arrow binary format. The API is modeled after the Arrow array builder APIs.
+//!
+//! # Example
+//!
+//! ```
+//! use std::io::Cursor;

Review Comment:
   I don't think the Cursor is used



##########
arrow-variant/src/encoder/mod.rs:
##########
@@ -0,0 +1,761 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Core encoding primitives for the Variant binary format
+
+use arrow_schema::ArrowError;
+use std::io::Write;
+
+/// Maximum value that can be stored in a single byte (2^8 - 1)
+pub const MAX_1BYTE_VALUE: usize = 255;
+
+/// Maximum value that can be stored in two bytes (2^16 - 1)
+pub const MAX_2BYTE_VALUE: usize = 65535;
+
+/// Maximum value that can be stored in three bytes (2^24 - 1)
+pub const MAX_3BYTE_VALUE: usize = 16777215;
+
+/// Calculate the minimum number of bytes required to represent a value.
+///
+/// Returns a value between 1 and 4, representing the minimum number of
+/// bytes needed to store the given value.
+///
+/// # Arguments
+///
+/// * `value` - The value to determine the size for
+///
+/// # Returns
+///
+/// The number of bytes (1, 2, 3, or 4) needed to represent the value
+pub fn min_bytes_needed(value: usize) -> usize {
+    if value <= MAX_1BYTE_VALUE {
+        1
+    } else if value <= MAX_2BYTE_VALUE {
+        2
+    } else if value <= MAX_3BYTE_VALUE {
+        3
+    } else {
+        4
+    }
+}
+
+/// Variant basic types as defined in the Arrow Variant specification
+///
+/// Basic Type ID      Description
+/// Primitive  0       One of the primitive types
+/// Short string       1       A string with a length less than 64 bytes
+/// Object     2       A collection of (string-key, variant-value) pairs
+/// Array      3       An ordered sequence of variant values
+pub enum VariantBasicType {
+    /// Primitive type (0)
+    Primitive = 0,
+    /// Short string (1)
+    ShortString = 1,
+    /// Object (2)
+    Object = 2,
+    /// Array (3)
+    Array = 3,
+}
+
+/// Variant primitive types as defined in the Arrow Variant specification
+///
+/// Equivalence Class  Variant Physical Type   Type ID Equivalent Parquet Type 
Binary format
+/// NullType   null    0       UNKNOWN none
+/// Boolean    boolean (True)  1       BOOLEAN none
+/// Boolean    boolean (False) 2       BOOLEAN none
+/// Exact Numeric      int8    3       INT(8, signed)  1 byte
+/// Exact Numeric      int16   4       INT(16, signed) 2 byte little-endian
+/// Exact Numeric      int32   5       INT(32, signed) 4 byte little-endian
+/// Exact Numeric      int64   6       INT(64, signed) 8 byte little-endian
+/// Double     double  7       DOUBLE  IEEE little-endian
+/// Exact Numeric      decimal4        8       DECIMAL(precision, scale)       
1 byte scale in range [0, 38], followed by little-endian unscaled value
+/// Exact Numeric      decimal8        9       DECIMAL(precision, scale)       
1 byte scale in range [0, 38], followed by little-endian unscaled value
+/// Exact Numeric      decimal16       10      DECIMAL(precision, scale)       
1 byte scale in range [0, 38], followed by little-endian unscaled value
+/// Date       date    11      DATE    4 byte little-endian
+/// Timestamp  timestamp       12      TIMESTAMP(isAdjustedToUTC=true, MICROS) 
8-byte little-endian
+/// TimestampNTZ       timestamp without time zone     13      
TIMESTAMP(isAdjustedToUTC=false, MICROS)        8-byte little-endian
+/// Float      float   14      FLOAT   IEEE little-endian
+/// Binary     binary  15      BINARY  4 byte little-endian size, followed by 
bytes
+/// String     string  16      STRING  4 byte little-endian size, followed by 
UTF-8 encoded bytes
+/// TimeNTZ    time without time zone  17      TIME(isAdjustedToUTC=false, 
MICROS)     8-byte little-endian
+/// Timestamp  timestamp with time zone        18      
TIMESTAMP(isAdjustedToUTC=true, NANOS)  8-byte little-endian
+/// TimestampNTZ       timestamp without time zone     19      
TIMESTAMP(isAdjustedToUTC=false, NANOS) 8-byte little-endian
+/// UUID       uuid    20      UUID    16-byte big-endian
+pub enum VariantPrimitiveType {
+    /// Null type (0)
+    Null = 0,
+    /// Boolean true (1)
+    BooleanTrue = 1,
+    /// Boolean false (2)
+    BooleanFalse = 2,
+    /// 8-bit signed integer (3)
+    Int8 = 3,
+    /// 16-bit signed integer (4)
+    Int16 = 4,
+    /// 32-bit signed integer (5)
+    Int32 = 5,
+    /// 64-bit signed integer (6)
+    Int64 = 6,
+    /// 64-bit floating point (7)
+    Double = 7,
+    /// 32-bit decimal (8)
+    Decimal4 = 8,
+    /// 64-bit decimal (9)
+    Decimal8 = 9,
+    /// 128-bit decimal (10)
+    Decimal16 = 10,
+    /// Date (11)
+    Date = 11,
+    /// Timestamp with timezone (12)
+    Timestamp = 12,
+    /// Timestamp without timezone (13)
+    TimestampNTZ = 13,
+    /// 32-bit floating point (14)
+    Float = 14,
+    /// Binary data (15)
+    Binary = 15,
+    /// UTF-8 string (16)
+    String = 16,
+    /// Time without timezone (17)
+    TimeNTZ = 17,
+    /// Timestamp with timezone (nanos) (18)
+    TimestampNanos = 18,
+    /// Timestamp without timezone (nanos) (19)
+    TimestampNTZNanos = 19,
+    /// UUID (20)
+    Uuid = 20,
+}
+
+/// Creates a header byte for a primitive type value
+///
+/// The header byte contains:
+/// - Basic type (2 bits) in the lower bits
+/// - Type ID (6 bits) in the upper bits
+fn primitive_header(type_id: u8) -> u8 {
+    (type_id << 2) | VariantBasicType::Primitive as u8
+}
+
+/// Creates a header byte for a short string value
+///
+/// The header byte contains:
+/// - Basic type (2 bits) in the lower bits
+/// - String length (6 bits) in the upper bits
+fn short_str_header(size: u8) -> u8 {
+    (size << 2) | VariantBasicType::ShortString as u8
+}
+
+/// Creates a header byte for an object value
+///
+/// The header byte contains:
+/// - Basic type (2 bits) in the lower bits
+/// - is_large (1 bit) at position 6
+/// - field_id_size_minus_one (2 bits) at positions 4-5
+/// - field_offset_size_minus_one (2 bits) at positions 2-3
+pub fn object_header(is_large: bool, id_size: u8, offset_size: u8) -> u8 {
+    ((is_large as u8) << 6)
+        | ((id_size - 1) << 4)
+        | ((offset_size - 1) << 2)
+        | VariantBasicType::Object as u8
+}
+
+/// Creates a header byte for an array value
+///
+/// The header byte contains:
+/// - Basic type (2 bits) in the lower bits
+/// - is_large (1 bit) at position 4
+/// - field_offset_size_minus_one (2 bits) at positions 2-3
+pub fn array_header(is_large: bool, offset_size: u8) -> u8 {
+    ((is_large as u8) << 4) | ((offset_size - 1) << 2) | 
VariantBasicType::Array as u8
+}
+
+/// Encodes a null value
+pub fn encode_null(output: &mut Vec<u8>) {
+    output.push(primitive_header(VariantPrimitiveType::Null as u8));
+}
+
+/// Encodes a boolean value
+pub fn encode_boolean(value: bool, output: &mut Vec<u8>) {
+    if value {
+        output.push(primitive_header(VariantPrimitiveType::BooleanTrue as u8));
+    } else {
+        output.push(primitive_header(VariantPrimitiveType::BooleanFalse as 
u8));
+    }
+}
+
+/// Encodes an integer value, choosing the smallest sufficient type
+pub fn encode_integer(value: i64, output: &mut Vec<u8>) {
+    if value >= -128 && value <= 127 {
+        // Int8
+        output.push(primitive_header(VariantPrimitiveType::Int8 as u8));
+        output.push(value as u8);
+    } else if value >= -32768 && value <= 32767 {
+        // Int16
+        output.push(primitive_header(VariantPrimitiveType::Int16 as u8));
+        output.extend_from_slice(&(value as i16).to_le_bytes());
+    } else if value >= -2147483648 && value <= 2147483647 {
+        // Int32
+        output.push(primitive_header(VariantPrimitiveType::Int32 as u8));
+        output.extend_from_slice(&(value as i32).to_le_bytes());
+    } else {
+        // Int64
+        output.push(primitive_header(VariantPrimitiveType::Int64 as u8));
+        output.extend_from_slice(&value.to_le_bytes());
+    }
+}
+
+/// Encodes a float value
+pub fn encode_float(value: f64, output: &mut Vec<u8>) {
+    output.push(primitive_header(VariantPrimitiveType::Double as u8));
+    output.extend_from_slice(&value.to_le_bytes());
+}
+
+/// Encodes a string value
+pub fn encode_string(value: &str, output: &mut Vec<u8>) {
+    let bytes = value.as_bytes();
+    let len = bytes.len();
+
+    if len < 64 {
+        // Short string format - encode length in header
+        let header = short_str_header(len as u8);
+        output.push(header);
+        output.extend_from_slice(bytes);
+    } else {
+        // Long string format (using primitive string type)
+        let header = primitive_header(VariantPrimitiveType::String as u8);
+        output.push(header);
+
+        // Write length as 4-byte little-endian
+        output.extend_from_slice(&(len as u32).to_le_bytes());
+
+        // Write string bytes
+        output.extend_from_slice(bytes);
+    }
+}
+
+/// Encodes a binary value
+pub fn encode_binary(value: &[u8], output: &mut Vec<u8>) {
+    // Use primitive + binary type
+    let header = primitive_header(VariantPrimitiveType::Binary as u8);
+    output.push(header);
+
+    // Write length followed by bytes
+    let len = value.len() as u32;
+    output.extend_from_slice(&len.to_le_bytes());
+    output.extend_from_slice(value);
+}
+
+/// Encodes a date value (days since epoch)
+pub fn encode_date(value: i32, output: &mut Vec<u8>) {
+    // Use primitive + date type
+    let header = primitive_header(VariantPrimitiveType::Date as u8);
+    output.push(header);
+    output.extend_from_slice(&value.to_le_bytes());
+}
+
+/// Encodes a timestamp value (milliseconds since epoch)
+pub fn encode_timestamp(value: i64, output: &mut Vec<u8>) {
+    // Use primitive + timestamp type
+    let header = primitive_header(VariantPrimitiveType::Timestamp as u8);
+    output.push(header);
+    output.extend_from_slice(&value.to_le_bytes());
+}
+
+/// Encodes a timestamp without timezone value (milliseconds since epoch)
+pub fn encode_timestamp_ntz(value: i64, output: &mut Vec<u8>) {
+    // Use primitive + timestamp_ntz type
+    let header = primitive_header(VariantPrimitiveType::TimestampNTZ as u8);
+    output.push(header);
+    output.extend_from_slice(&value.to_le_bytes());
+}
+
+/// Encodes a time without timezone value (milliseconds)
+pub fn encode_time_ntz(value: i64, output: &mut Vec<u8>) {
+    // Use primitive + time_ntz type
+    let header = primitive_header(VariantPrimitiveType::TimeNTZ as u8);
+    output.push(header);
+    output.extend_from_slice(&value.to_le_bytes());
+}

Review Comment:
   That is a good point
   
   I also think they don't need to be `pub` (maybe we could start with 
`pub(crate)`) as the main API people would use is the builder I think



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to