scovich commented on code in PR #7670:
URL: https://github.com/apache/arrow-rs/pull/7670#discussion_r2150821734


##########
parquet-variant/src/encoder/variant_to_json.rs:
##########


Review Comment:
   nit: could probably just call it `to_json.rs`? We already know it's variant?
   
   Also, is there a reason this is under the `encoder` module? It seems like 
its own thing?



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal16 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Date(date) => {
+            write!(json_buffer, "\"{}\"", date.format("%Y-%m-%d"))?;
+        }
+        Variant::TimestampMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.to_rfc3339())?;
+        }
+        Variant::TimestampNtzMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.format("%Y-%m-%dT%H:%M:%S%.6f"))?;
+        }
+        Variant::Binary(bytes) => {
+            // Encode binary as base64 string
+            let base64_str = general_purpose::STANDARD.encode(bytes);
+            let json_str = serde_json::to_string(&base64_str)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::String(s) | Variant::ShortString(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::Object(obj) => {
+            convert_object_to_json(json_buffer, obj)?;
+        }
+        Variant::Array(arr) => {
+            convert_array_to_json(json_buffer, arr)?;
+        }
+    }
+    Ok(())
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json<W: Write>(
+    buffer: &mut W,
+    obj: &VariantObject,
+) -> Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+    
+    // Get all fields from the object
+    let fields = obj.fields()?;
+    let mut first = true;
+    
+    for (key, value) in fields {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+        
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON key 
encoding error: {}", e)))?;
+        write!(buffer, "{}:", json_key)?;
+        
+        // Recursively convert the value
+        variant_to_json(buffer, &value)?;
+    }
+    
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json<W: Write>(
+    buffer: &mut W,

Review Comment:
   nit: Any particular reason not to use `impl` in these signatures?
   ```suggestion
   fn convert_array_to_json(
       buffer: &mut impl Write,
   ```



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal16 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Date(date) => {
+            write!(json_buffer, "\"{}\"", date.format("%Y-%m-%d"))?;
+        }
+        Variant::TimestampMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.to_rfc3339())?;
+        }
+        Variant::TimestampNtzMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.format("%Y-%m-%dT%H:%M:%S%.6f"))?;
+        }
+        Variant::Binary(bytes) => {
+            // Encode binary as base64 string
+            let base64_str = general_purpose::STANDARD.encode(bytes);
+            let json_str = serde_json::to_string(&base64_str)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::String(s) | Variant::ShortString(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::Object(obj) => {
+            convert_object_to_json(json_buffer, obj)?;
+        }
+        Variant::Array(arr) => {
+            convert_array_to_json(json_buffer, arr)?;
+        }
+    }
+    Ok(())
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json<W: Write>(
+    buffer: &mut W,
+    obj: &VariantObject,
+) -> Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+    
+    // Get all fields from the object
+    let fields = obj.fields()?;
+    let mut first = true;
+    
+    for (key, value) in fields {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+        
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON key 
encoding error: {}", e)))?;
+        write!(buffer, "{}:", json_key)?;
+        
+        // Recursively convert the value
+        variant_to_json(buffer, &value)?;
+    }
+    
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json<W: Write>(
+    buffer: &mut W,
+    arr: &VariantArray,
+) -> Result<(), ArrowError> {
+    write!(buffer, "[")?;
+    
+    let len = arr.len();
+    for i in 0..len {
+        if i > 0 {
+            write!(buffer, ",")?;
+        }
+        
+        let element = arr.get(i)?;
+        variant_to_json(buffer, &element)?;
+    }
+    
+    write!(buffer, "]")?;
+    Ok(())
+}
+
+/// Convert Variant to JSON string
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(String)` containing the JSON representation
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_string};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::String("hello");
+///     let json = variant_to_json_string(&variant)?;
+///     assert_eq!(json, "\"hello\"");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> 
{
+    let mut buffer = Vec::new();
+    variant_to_json(&mut buffer, variant)?;
+    String::from_utf8(buffer)
+        .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 
conversion error: {}", e)))
+}
+
+/// Convert Variant to serde_json::Value
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(Value)` containing the JSON value
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_value};
+/// use serde_json::Value;
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let json_value = variant_to_json_value(&variant)?;
+///     assert_eq!(json_value, Value::Number(42.into()));
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
+    match variant {
+        Variant::Null => Ok(Value::Null),
+        Variant::BooleanTrue => Ok(Value::Bool(true)),
+        Variant::BooleanFalse => Ok(Value::Bool(false)),
+        Variant::Int8(i) => Ok(Value::Number((*i).into())),
+        Variant::Int16(i) => Ok(Value::Number((*i).into())),
+        Variant::Int32(i) => Ok(Value::Number((*i).into())),
+        Variant::Int64(i) => Ok(Value::Number((*i).into())),
+        Variant::Float(f) => {
+            serde_json::Number::from_f64(*f as f64)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float 
value".to_string()))
+        }
+        Variant::Double(f) => {
+            serde_json::Number::from_f64(*f)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
double value".to_string()))
+        }
+        Variant::Decimal4 { integer, scale } => {
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal8 { integer, scale } => {
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal16 { integer, scale } => {
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Date(date) => 
Ok(Value::String(date.format("%Y-%m-%d").to_string())),
+        Variant::TimestampMicros(ts) => Ok(Value::String(ts.to_rfc3339())),
+        Variant::TimestampNtzMicros(ts) => 
Ok(Value::String(ts.format("%Y-%m-%dT%H:%M:%S%.6f").to_string())),
+        Variant::Binary(bytes) => 
Ok(Value::String(general_purpose::STANDARD.encode(bytes))),

Review Comment:
   Duplicated logic. Probably better to define helper functions to completely 
isolate the logic from callers. At a minimum, we should hoist out those format 
strings as named `const` that can be reused, since they're especially 
error-prone.



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal16 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Date(date) => {
+            write!(json_buffer, "\"{}\"", date.format("%Y-%m-%d"))?;
+        }
+        Variant::TimestampMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.to_rfc3339())?;
+        }
+        Variant::TimestampNtzMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.format("%Y-%m-%dT%H:%M:%S%.6f"))?;
+        }
+        Variant::Binary(bytes) => {
+            // Encode binary as base64 string
+            let base64_str = general_purpose::STANDARD.encode(bytes);
+            let json_str = serde_json::to_string(&base64_str)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::String(s) | Variant::ShortString(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::Object(obj) => {
+            convert_object_to_json(json_buffer, obj)?;
+        }
+        Variant::Array(arr) => {
+            convert_array_to_json(json_buffer, arr)?;
+        }
+    }
+    Ok(())
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json<W: Write>(
+    buffer: &mut W,
+    obj: &VariantObject,
+) -> Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+    
+    // Get all fields from the object
+    let fields = obj.fields()?;
+    let mut first = true;
+    
+    for (key, value) in fields {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+        
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON key 
encoding error: {}", e)))?;
+        write!(buffer, "{}:", json_key)?;
+        
+        // Recursively convert the value
+        variant_to_json(buffer, &value)?;
+    }
+    
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json<W: Write>(
+    buffer: &mut W,
+    arr: &VariantArray,
+) -> Result<(), ArrowError> {
+    write!(buffer, "[")?;
+    
+    let len = arr.len();
+    for i in 0..len {
+        if i > 0 {
+            write!(buffer, ",")?;
+        }
+        
+        let element = arr.get(i)?;
+        variant_to_json(buffer, &element)?;
+    }
+    
+    write!(buffer, "]")?;
+    Ok(())
+}
+
+/// Convert Variant to JSON string
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(String)` containing the JSON representation
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_string};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::String("hello");
+///     let json = variant_to_json_string(&variant)?;
+///     assert_eq!(json, "\"hello\"");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> 
{
+    let mut buffer = Vec::new();
+    variant_to_json(&mut buffer, variant)?;
+    String::from_utf8(buffer)
+        .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 
conversion error: {}", e)))
+}
+
+/// Convert Variant to serde_json::Value
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(Value)` containing the JSON value
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_value};
+/// use serde_json::Value;
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let json_value = variant_to_json_value(&variant)?;
+///     assert_eq!(json_value, Value::Number(42.into()));
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
+    match variant {
+        Variant::Null => Ok(Value::Null),
+        Variant::BooleanTrue => Ok(Value::Bool(true)),
+        Variant::BooleanFalse => Ok(Value::Bool(false)),
+        Variant::Int8(i) => Ok(Value::Number((*i).into())),
+        Variant::Int16(i) => Ok(Value::Number((*i).into())),
+        Variant::Int32(i) => Ok(Value::Number((*i).into())),
+        Variant::Int64(i) => Ok(Value::Number((*i).into())),
+        Variant::Float(f) => {
+            serde_json::Number::from_f64(*f as f64)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float 
value".to_string()))
+        }
+        Variant::Double(f) => {
+            serde_json::Number::from_f64(*f)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
double value".to_string()))
+        }
+        Variant::Decimal4 { integer, scale } => {
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal8 { integer, scale } => {
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal16 { integer, scale } => {
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }

Review Comment:
   Same issue as above... f64 cannot accurately represent all 
decimal8/decimal16 values. But f64 might be the best we can do here, because 
`serde_json::Number` is compatible with the [JSON 
spec](https://datatracker.ietf.org/doc/html/rfc7159.html#section-6) which 
states:
   > This specification allows implementations to set limits on the range and 
precision of numbers accepted.  Since software that implements IEEE 754-2008 
binary64 (double precision) numbers 
[[IEEE754](https://datatracker.ietf.org/doc/html/rfc7159.html#ref-IEEE754)] is 
generally available and widely used, good interoperability can be achieved by 
implementations that expect no more precision or range than these provide, in 
the sense that implementations will approximate JSON numbers within the 
expected precision.  A JSON number such as `1E400` or 
`3.141592653589793238462643383279` may indicate potential interoperability 
problems, since it suggests that the software that created it expects receiving 
software to have greater capabilities for numeric magnitude and precision than 
is widely available.
   >
   >   Note that when such software is used, numbers that are integers and are 
in the range `[-(2**53)+1, (2**53)-1]` are interoperable in the sense that 
implementations will agree exactly on their numeric values.
   
   (it actually goes above and beyond by allowing to capture i64/u64, in 
addition to f64)



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;

Review Comment:
   `f64` (53-bit precision) cannot accurately represent all decimal8 values 
(63-bit precision), let alone decimal16 (127-bit precision). For example, round 
tripping a value like `0x4000_0000_0000_ffff` from i64 to f64 and loses 
information about the least-significant bits, producing 
`0x4000_0000_0000_fe48`.  See 
[playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=c2a1721e08d702fd2e394de4003befee).
   
   I have no idea why it clips to _that_ specific value, but you get the idea.
   
   Instead, we will need to extract the quotient and remainder resulting from 
division by an integer power of ten, and then print the two parts back to back 
with e.g. `"{quotient}.{remainder}"`.
   
   Once we have that capability, we should probably also use it for decimal4 
values, just for uniformity.



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;

Review Comment:
   Additionally -- floating point (base 2) can't accurately represent powers of 
ten (and both bases have different errors when representing e.g. powers of 3), 
so even a decimal4 could potentially lose information through a round trip. Tho 
I think most floating point parsers work very hard to avoid exposing that kind 
of inaccuracy, by ensuring that every representable value round trips cleanly 
(I have vague memories of seeing a bug report about a case that slipped past 
the java floating point parser several years ago).



##########
parquet-variant/src/encoder/variant_to_json.rs:
##########
@@ -0,0 +1,652 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantArray, VariantObject};
+
+/// Converts a Variant to JSON and writes it to the provided buffer
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let mut buffer = Vec::new();
+///     variant_to_json(&mut buffer, &variant)?;
+///     assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json<W: Write>(
+    json_buffer: &mut W,
+    variant: &Variant,
+) -> Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal8 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Decimal16 { integer, scale } => {
+            // Convert decimal to string representation
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            write!(json_buffer, "{}", decimal_value)?;
+        }
+        Variant::Date(date) => {
+            write!(json_buffer, "\"{}\"", date.format("%Y-%m-%d"))?;
+        }
+        Variant::TimestampMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.to_rfc3339())?;
+        }
+        Variant::TimestampNtzMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.format("%Y-%m-%dT%H:%M:%S%.6f"))?;
+        }
+        Variant::Binary(bytes) => {
+            // Encode binary as base64 string
+            let base64_str = general_purpose::STANDARD.encode(bytes);
+            let json_str = serde_json::to_string(&base64_str)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::String(s) | Variant::ShortString(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s)
+                .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON 
encoding error: {}", e)))?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::Object(obj) => {
+            convert_object_to_json(json_buffer, obj)?;
+        }
+        Variant::Array(arr) => {
+            convert_array_to_json(json_buffer, arr)?;
+        }
+    }
+    Ok(())
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json<W: Write>(
+    buffer: &mut W,
+    obj: &VariantObject,
+) -> Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+    
+    // Get all fields from the object
+    let fields = obj.fields()?;
+    let mut first = true;
+    
+    for (key, value) in fields {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+        
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON key 
encoding error: {}", e)))?;
+        write!(buffer, "{}:", json_key)?;
+        
+        // Recursively convert the value
+        variant_to_json(buffer, &value)?;
+    }
+    
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json<W: Write>(
+    buffer: &mut W,
+    arr: &VariantArray,
+) -> Result<(), ArrowError> {
+    write!(buffer, "[")?;
+    
+    let len = arr.len();
+    for i in 0..len {
+        if i > 0 {
+            write!(buffer, ",")?;
+        }
+        
+        let element = arr.get(i)?;
+        variant_to_json(buffer, &element)?;
+    }
+    
+    write!(buffer, "]")?;
+    Ok(())
+}
+
+/// Convert Variant to JSON string
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(String)` containing the JSON representation
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_string};
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::String("hello");
+///     let json = variant_to_json_string(&variant)?;
+///     assert_eq!(json, "\"hello\"");
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> 
{
+    let mut buffer = Vec::new();
+    variant_to_json(&mut buffer, variant)?;
+    String::from_utf8(buffer)
+        .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 
conversion error: {}", e)))
+}
+
+/// Convert Variant to serde_json::Value
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(Value)` containing the JSON value
+/// * `Err` with error details if conversion fails
+///
+/// # Example
+///
+/// ```rust
+/// use parquet_variant::{Variant, variant_to_json_value};
+/// use serde_json::Value;
+/// use arrow_schema::ArrowError;
+/// 
+/// fn example() -> Result<(), ArrowError> {
+///     let variant = Variant::Int8(42);
+///     let json_value = variant_to_json_value(&variant)?;
+///     assert_eq!(json_value, Value::Number(42.into()));
+///     Ok(())
+/// }
+/// example().unwrap();
+/// ```
+pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
+    match variant {
+        Variant::Null => Ok(Value::Null),
+        Variant::BooleanTrue => Ok(Value::Bool(true)),
+        Variant::BooleanFalse => Ok(Value::Bool(false)),
+        Variant::Int8(i) => Ok(Value::Number((*i).into())),
+        Variant::Int16(i) => Ok(Value::Number((*i).into())),
+        Variant::Int32(i) => Ok(Value::Number((*i).into())),
+        Variant::Int64(i) => Ok(Value::Number((*i).into())),
+        Variant::Float(f) => {
+            serde_json::Number::from_f64(*f as f64)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float 
value".to_string()))
+        }
+        Variant::Double(f) => {
+            serde_json::Number::from_f64(*f)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
double value".to_string()))
+        }
+        Variant::Decimal4 { integer, scale } => {
+            let divisor = 10_i32.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal8 { integer, scale } => {
+            let divisor = 10_i64.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Decimal16 { integer, scale } => {
+            let divisor = 10_i128.pow(*scale as u32);
+            let decimal_value = *integer as f64 / divisor as f64;
+            serde_json::Number::from_f64(decimal_value)
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid 
decimal value".to_string()))
+        }
+        Variant::Date(date) => 
Ok(Value::String(date.format("%Y-%m-%d").to_string())),
+        Variant::TimestampMicros(ts) => Ok(Value::String(ts.to_rfc3339())),
+        Variant::TimestampNtzMicros(ts) => 
Ok(Value::String(ts.format("%Y-%m-%dT%H:%M:%S%.6f").to_string())),
+        Variant::Binary(bytes) => 
Ok(Value::String(general_purpose::STANDARD.encode(bytes))),
+        Variant::String(s) | Variant::ShortString(s) => 
Ok(Value::String(s.to_string())),
+        Variant::Object(obj) => {
+            let mut map = serde_json::Map::new();
+            let fields = obj.fields()?;
+            
+            for (key, value) in fields {
+                let json_value = variant_to_json_value(&value)?;
+                map.insert(key.to_string(), json_value);
+            }
+            
+            Ok(Value::Object(map))

Review Comment:
   This might be a profitable place to use `Iterator::collect`?
   ```suggestion
               let fields = obj
                   .fields()?.
                   .map(|(key, value)| {
                       Ok((key.to_string(), variant_to_json_value(&value)?))
                   })
                   .collect::<Result<_, _>>()?;
               Ok(Value::Object(fields))
   ```
   (again below for `Variant::Array`, using `values()` method)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to