scovich commented on code in PR #7670:
URL: https://github.com/apache/arrow-rs/pull/7670#discussion_r2165109613


##########
parquet-variant/src/to_json.rs:
##########
@@ -0,0 +1,1272 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+
+use arrow_schema::ArrowError;
+use base64::{engine::general_purpose, Engine as _};
+use serde_json::Value;
+use std::io::Write;
+
+use crate::variant::{Variant, VariantList, VariantObject};
+use crate::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
+
+// Format string constants to avoid duplication and reduce errors
+const DATE_FORMAT: &str = "%Y-%m-%d";
+const TIMESTAMP_NTZ_FORMAT: &str = "%Y-%m-%dT%H:%M:%S%.6f";
+
+// Helper functions for consistent formatting
+fn format_date_string(date: &chrono::NaiveDate) -> String {
+    date.format(DATE_FORMAT).to_string()
+}
+
+fn format_timestamp_ntz_string(ts: &chrono::NaiveDateTime) -> String {
+    ts.format(TIMESTAMP_NTZ_FORMAT).to_string()
+}
+
+fn format_binary_base64(bytes: &[u8]) -> String {
+    general_purpose::STANDARD.encode(bytes)
+}
+
+/// Converts a Variant to JSON and writes it to the provided `Write`
+///
+/// This function writes JSON directly to any type that implements [`Write`],
+/// making it efficient for streaming or when you want to control the output 
destination.
+///
+/// # Arguments
+///
+/// * `json_buffer` - Writer to output JSON to
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if conversion fails
+///
+/// # Examples
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json};
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::Int32(42);
+/// let mut buffer = Vec::new();
+/// variant_to_json(&mut buffer, &variant)?;
+/// assert_eq!(String::from_utf8(buffer).unwrap(), "42");
+/// # Ok::<(), ArrowError>(())
+/// ```
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json};
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::String("Hello, World!");
+/// let mut buffer = Vec::new();
+/// variant_to_json(&mut buffer, &variant)?;
+/// assert_eq!(String::from_utf8(buffer).unwrap(), "\"Hello, World!\"");
+/// # Ok::<(), ArrowError>(())
+/// ```
+pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> 
Result<(), ArrowError> {
+    match variant {
+        Variant::Null => {
+            write!(json_buffer, "null")?;
+        }
+        Variant::BooleanTrue => {
+            write!(json_buffer, "true")?;
+        }
+        Variant::BooleanFalse => {
+            write!(json_buffer, "false")?;
+        }
+        Variant::Int8(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int16(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int32(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Int64(i) => {
+            write!(json_buffer, "{}", i)?;
+        }
+        Variant::Float(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Double(f) => {
+            write!(json_buffer, "{}", f)?;
+        }
+        Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
+            // Convert decimal to string representation using integer 
arithmetic
+            if *scale == 0 {
+                write!(json_buffer, "{}", integer)?;
+            } else {
+                let divisor = 10_i32.pow(*scale as u32);
+                let quotient = integer / divisor;
+                let remainder = (integer % divisor).abs();
+                let formatted_remainder = format!("{:0width$}", remainder, 
width = *scale as usize);
+                let trimmed_remainder = 
formatted_remainder.trim_end_matches('0');
+                if trimmed_remainder.is_empty() {
+                    write!(json_buffer, "{}", quotient)?;
+                } else {
+                    write!(json_buffer, "{}.{}", quotient, trimmed_remainder)?;
+                }
+            }
+        }
+        Variant::Decimal8(VariantDecimal8 { integer, scale }) => {
+            // Convert decimal to string representation using integer 
arithmetic
+            if *scale == 0 {
+                write!(json_buffer, "{}", integer)?;
+            } else {
+                let divisor = 10_i64.pow(*scale as u32);
+                let quotient = integer / divisor;
+                let remainder = (integer % divisor).abs();
+                let formatted_remainder = format!("{:0width$}", remainder, 
width = *scale as usize);
+                let trimmed_remainder = 
formatted_remainder.trim_end_matches('0');
+                if trimmed_remainder.is_empty() {
+                    write!(json_buffer, "{}", quotient)?;
+                } else {
+                    write!(json_buffer, "{}.{}", quotient, trimmed_remainder)?;
+                }
+            }
+        }
+        Variant::Decimal16(VariantDecimal16 { integer, scale }) => {
+            // Convert decimal to string representation using integer 
arithmetic
+            if *scale == 0 {
+                write!(json_buffer, "{}", integer)?;
+            } else {
+                let divisor = 10_i128.pow(*scale as u32);
+                let quotient = integer / divisor;
+                let remainder = (integer % divisor).abs();
+                let formatted_remainder = format!("{:0width$}", remainder, 
width = *scale as usize);
+                let trimmed_remainder = 
formatted_remainder.trim_end_matches('0');
+                if trimmed_remainder.is_empty() {
+                    write!(json_buffer, "{}", quotient)?;
+                } else {
+                    write!(json_buffer, "{}.{}", quotient, trimmed_remainder)?;
+                }
+            }
+        }
+        Variant::Date(date) => {
+            write!(json_buffer, "\"{}\"", format_date_string(date))?;
+        }
+        Variant::TimestampMicros(ts) => {
+            write!(json_buffer, "\"{}\"", ts.to_rfc3339())?;
+        }
+        Variant::TimestampNtzMicros(ts) => {
+            write!(json_buffer, "\"{}\"", format_timestamp_ntz_string(ts))?;
+        }
+        Variant::Binary(bytes) => {
+            // Encode binary as base64 string
+            let base64_str = format_binary_base64(bytes);
+            let json_str = serde_json::to_string(&base64_str).map_err(|e| {
+                ArrowError::InvalidArgumentError(format!("JSON encoding error: 
{}", e))
+            })?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::String(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s).map_err(|e| {
+                ArrowError::InvalidArgumentError(format!("JSON encoding error: 
{}", e))
+            })?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::ShortString(s) => {
+            // Use serde_json to properly escape the string
+            let json_str = serde_json::to_string(s.as_str()).map_err(|e| {
+                ArrowError::InvalidArgumentError(format!("JSON encoding error: 
{}", e))
+            })?;
+            write!(json_buffer, "{}", json_str)?;
+        }
+        Variant::Object(obj) => {
+            convert_object_to_json(json_buffer, obj)?;
+        }
+        Variant::List(arr) => {
+            convert_array_to_json(json_buffer, arr)?;
+        }
+    }
+    Ok(())
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json(buffer: &mut impl Write, obj: &VariantObject) -> 
Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+
+    // Get all fields from the object
+    let mut first = true;
+
+    for (key, value) in obj.iter() {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key).map_err(|e| {
+            ArrowError::InvalidArgumentError(format!("JSON key encoding error: 
{}", e))
+        })?;
+        write!(buffer, "{}:", json_key)?;
+
+        // Recursively convert the value
+        variant_to_json(buffer, &value)?;
+    }
+
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json(buffer: &mut impl Write, arr: &VariantList) -> 
Result<(), ArrowError> {
+    write!(buffer, "[")?;
+
+    let len = arr.len();
+    for i in 0..len {
+        if i > 0 {
+            write!(buffer, ",")?;
+        }
+
+        let element = arr.get(i)?;
+        variant_to_json(buffer, &element)?;
+    }
+
+    write!(buffer, "]")?;
+    Ok(())
+}
+
+/// Convert Variant to JSON string
+///
+/// This is a convenience function that converts a Variant to a JSON string.
+/// This is the same as calling variant_to_json with a Vec
+/// It's the simplest way to get a JSON representation when you just need a 
String result.
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(String)` containing the JSON representation
+/// * `Err` with error details if conversion fails
+///
+/// # Examples
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json_string};
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::Int32(42);
+/// let json = variant_to_json_string(&variant)?;
+/// assert_eq!(json, "42");
+/// # Ok::<(), ArrowError>(())
+/// ```
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json_string};
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::String("Hello, World!");
+/// let json = variant_to_json_string(&variant)?;
+/// assert_eq!(json, "\"Hello, World!\"");
+/// # Ok::<(), ArrowError>(())
+/// ```
+///
+/// # Example: Create a [`Variant::Object`] and convert to JSON
+///
+/// This example shows how to create an object with two fields and convert it 
to JSON:
+/// ```json
+/// {
+///   "first_name": "Jiaying",
+///   "last_name": "Li"
+/// }
+/// ```
+///
+/// ```rust
+/// # use parquet_variant::{Variant, VariantBuilder, variant_to_json_string};
+/// # use arrow_schema::ArrowError;
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // Create the Variant and convert to JSON
+/// let variant = Variant::try_new(&metadata, &value)?;
+/// let json = variant_to_json_string(&variant)?;
+/// assert!(json.contains("\"first_name\":\"Jiaying\""));
+/// assert!(json.contains("\"last_name\":\"Li\""));
+/// # Ok::<(), ArrowError>(())
+/// ```
+pub fn variant_to_json_string(variant: &Variant) -> Result<String, ArrowError> 
{
+    let mut buffer = Vec::new();
+    variant_to_json(&mut buffer, variant)?;
+    String::from_utf8(buffer)
+        .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 
conversion error: {}", e)))
+}
+
+/// Convert Variant to serde_json::Value
+///
+/// This function converts a Variant to a [`serde_json::Value`], which is 
useful
+/// when you need to work with the JSON data programmatically or integrate with
+/// other serde-based JSON processing.
+///
+/// # Arguments
+///
+/// * `variant` - The Variant value to convert
+///
+/// # Returns
+///
+/// * `Ok(Value)` containing the JSON value
+/// * `Err` with error details if conversion fails
+///
+/// # Examples
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json_value};
+/// # use serde_json::Value;
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::Int32(42);
+/// let json_value = variant_to_json_value(&variant)?;
+/// assert_eq!(json_value, Value::Number(42.into()));
+/// # Ok::<(), ArrowError>(())
+/// ```
+///
+/// ```rust
+/// # use parquet_variant::{Variant, variant_to_json_value};
+/// # use serde_json::Value;
+/// # use arrow_schema::ArrowError;
+/// let variant = Variant::String("hello");
+/// let json_value = variant_to_json_value(&variant)?;
+/// assert_eq!(json_value, Value::String("hello".to_string()));
+/// # Ok::<(), ArrowError>(())
+/// ```
+pub fn variant_to_json_value(variant: &Variant) -> Result<Value, ArrowError> {
+    match variant {
+        Variant::Null => Ok(Value::Null),
+        Variant::BooleanTrue => Ok(Value::Bool(true)),
+        Variant::BooleanFalse => Ok(Value::Bool(false)),
+        Variant::Int8(i) => Ok(Value::Number((*i).into())),
+        Variant::Int16(i) => Ok(Value::Number((*i).into())),
+        Variant::Int32(i) => Ok(Value::Number((*i).into())),
+        Variant::Int64(i) => Ok(Value::Number((*i).into())),
+        Variant::Float(f) => serde_json::Number::from_f64(*f as f64)
+            .map(Value::Number)
+            .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float 
value".to_string())),
+        Variant::Double(f) => serde_json::Number::from_f64(*f)
+            .map(Value::Number)
+            .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid double 
value".to_string())),
+        Variant::Decimal4(VariantDecimal4 { integer, scale }) => {
+            // Use integer arithmetic to avoid f64 precision loss
+            if *scale == 0 {
+                Ok(Value::Number((*integer).into()))
+            } else {
+                let divisor = 10_i32.pow(*scale as u32);
+                let quotient = integer / divisor;
+                let remainder = (integer % divisor).abs();
+                let formatted_remainder = format!("{:0width$}", remainder, 
width = *scale as usize);
+                let trimmed_remainder = 
formatted_remainder.trim_end_matches('0');
+
+                let decimal_str = if trimmed_remainder.is_empty() {
+                    quotient.to_string()
+                } else {
+                    format!("{}.{}", quotient, trimmed_remainder)
+                };
+
+                // Parse as serde_json::Number to preserve precision

Review Comment:
   This is an interesting conundrum... I don't think parsing actually preserves 
precision at all, other than the difference between `f64` and `i64`? Also, 
`serde_json::Value` provides an extensive set of `impl From`: 
   ```rust
   let integer = if *scale == 0 {
       *integer
   } else {
       let divisor = ...;
       if integer % divisor != 0 {
           // fall back to floating point
           return Ok(Value::From(integer as f64 / divisor as f64))
       }
       integer / divisor
   }
   Ok(Value::from(integer));
   ```
   
   The above works for i32 and i64, but i128 would need somewhat different 
handling of the final `integer`:
   ```rust
   // Prefer to emit as i64, but fall back to u64 or even f64 (lossy) if 
necessary
   let value = i64::try_from(integer)
       .map(Value::from)
       .or_else(|| u64::try_from(integer))
       .map(Value::from)
       .unwrap_or_else(|| Value::from(integer as f64));
   Ok(value)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to