This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 4bb9127c8b [Variant] Add human-readable impl Debug for Variant (#8140)
4bb9127c8b is described below

commit 4bb9127c8b5e3c1bb6697253d02aa378ece9e917
Author: Ryan Johnson <[email protected]>
AuthorDate: Thu Aug 14 10:35:47 2025 -0700

    [Variant] Add human-readable impl Debug for Variant (#8140)
    
    # Which issue does this PR close?
    
    - Related to https://github.com/apache/arrow-rs/issues/8136
    
    # Rationale for this change
    
    Unit tests need a way to verify two `Variant` are logically equivalent,
    and `Debug` is a good way to achieve that without wading into the
    complexities of a proper `PartialEq` implementation that would become
    part of the public API.
    
    More generally, byte slices are not very easy for humans to interpret,
    so it makes sense for `Debug` to do something nicer.
    
    # What changes are included in this PR?
    
    Manually `impl Debug for Variant`, maintaining a traditional look but
    with nicer handling of `Variant::Binary`, `Variant::Object` and
    `Variant::List` subtypes. The latter two use fallible iteration to avoid
    potential panics, since `Debug` is likely to be used when formatting
    error messages.
    
    # Are these changes tested?
    
    New unit test
    
    # Are there any user-facing changes?
    
    The debug formatting of `Variant` has changed.
---
 parquet-variant/src/variant.rs | 286 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 285 insertions(+), 1 deletion(-)

diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 82de637b06..eabf0ebffb 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -211,7 +211,7 @@ impl Deref for ShortString<'_> {
 /// [metadata]: VariantMetadata#Validation
 /// [object]: VariantObject#Validation
 /// [array]: VariantList#Validation
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Clone, PartialEq)]
 pub enum Variant<'m, 'v> {
     /// Primitive type: Null
     Null,
@@ -1286,6 +1286,77 @@ impl TryFrom<(i128, u8)> for Variant<'_, '_> {
     }
 }
 
+// helper to print <invalid> instead of "<invalid>" in debug mode when a 
VariantObject or VariantList contains invalid values.
+struct InvalidVariant;
+
+impl std::fmt::Debug for InvalidVariant {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "<invalid>")
+    }
+}
+
+// helper to print binary data in hex format in debug mode, as space-separated 
hex byte values.
+struct HexString<'a>(&'a [u8]);
+
+impl<'a> std::fmt::Debug for HexString<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some((first, rest)) = self.0.split_first() {
+            write!(f, "{:02x}", first)?;
+            for b in rest {
+                write!(f, " {:02x}", b)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl std::fmt::Debug for Variant<'_, '_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Variant::Null => write!(f, "Null"),
+            Variant::BooleanTrue => write!(f, "BooleanTrue"),
+            Variant::BooleanFalse => write!(f, "BooleanFalse"),
+            Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
+            Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
+            Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
+            Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
+            Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
+            Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
+            Variant::Decimal4(d) => 
f.debug_tuple("Decimal4").field(d).finish(),
+            Variant::Decimal8(d) => 
f.debug_tuple("Decimal8").field(d).finish(),
+            Variant::Decimal16(d) => 
f.debug_tuple("Decimal16").field(d).finish(),
+            Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
+            Variant::TimestampMicros(ts) => 
f.debug_tuple("TimestampMicros").field(ts).finish(),
+            Variant::TimestampNtzMicros(ts) => {
+                f.debug_tuple("TimestampNtzMicros").field(ts).finish()
+            }
+            Variant::Binary(bytes) => write!(f, "Binary({:?})", 
HexString(bytes)),
+            Variant::String(s) => f.debug_tuple("String").field(s).finish(),
+            Variant::ShortString(s) => 
f.debug_tuple("ShortString").field(s).finish(),
+            Variant::Object(obj) => {
+                let mut map = f.debug_map();
+                for res in obj.iter_try() {
+                    match res {
+                        Ok((k, v)) => map.entry(&k, &v),
+                        Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
+                    };
+                }
+                map.finish()
+            }
+            Variant::List(arr) => {
+                let mut list = f.debug_list();
+                for res in arr.iter_try() {
+                    match res {
+                        Ok(v) => list.entry(&v),
+                        Err(_) => list.entry(&InvalidVariant),
+                    };
+                }
+                list.finish()
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
@@ -1326,4 +1397,217 @@ mod tests {
         let variant = Variant::from(decimal16);
         assert_eq!(variant.as_decimal16(), Some(decimal16));
     }
+
+    #[test]
+    fn test_variant_all_subtypes_debug() {
+        use crate::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        // Create a root object that contains one of every variant subtype
+        let mut root_obj = builder.new_object();
+
+        // Add primitive types
+        root_obj.insert("null", ());
+        root_obj.insert("boolean_true", true);
+        root_obj.insert("boolean_false", false);
+        root_obj.insert("int8", 42i8);
+        root_obj.insert("int16", 1234i16);
+        root_obj.insert("int32", 123456i32);
+        root_obj.insert("int64", 1234567890123456789i64);
+        root_obj.insert("float", 1.234f32);
+        root_obj.insert("double", 1.23456789f64);
+
+        // Add date and timestamp types
+        let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
+        root_obj.insert("date", date);
+
+        let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
+            .unwrap()
+            .and_hms_milli_opt(15, 30, 45, 123)
+            .unwrap()
+            .and_utc();
+        root_obj.insert("timestamp_micros", 
Variant::TimestampMicros(timestamp_utc));
+
+        let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
+            .unwrap()
+            .and_hms_milli_opt(15, 30, 45, 123)
+            .unwrap();
+        root_obj.insert(
+            "timestamp_ntz_micros",
+            Variant::TimestampNtzMicros(timestamp_ntz),
+        );
+
+        // Add decimal types
+        let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
+        root_obj.insert("decimal4", decimal4);
+
+        let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
+        root_obj.insert("decimal8", decimal8);
+
+        let decimal16 = 
VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
+        root_obj.insert("decimal16", decimal16);
+
+        // Add binary and string types
+        let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
+        root_obj.insert("binary", binary_data.as_slice());
+
+        let long_string =
+            "This is a long string that exceeds the short string limit and 
contains emoji 🦀";
+        root_obj.insert("string", long_string);
+        root_obj.insert("short_string", "Short string with emoji 🎉");
+
+        // Add nested object
+        let mut nested_obj = root_obj.new_object("nested_object");
+        nested_obj.insert("inner_key1", "inner_value1");
+        nested_obj.insert("inner_key2", 999i32);
+        nested_obj.finish().unwrap();
+
+        // Add list with mixed types
+        let mut mixed_list = root_obj.new_list("mixed_list");
+        mixed_list.append_value(1i32);
+        mixed_list.append_value("two");
+        mixed_list.append_value(true);
+        mixed_list.append_value(4.0f32);
+        mixed_list.append_value(());
+
+        // Add nested list inside the mixed list
+        let mut nested_list = mixed_list.new_list();
+        nested_list.append_value("nested");
+        nested_list.append_value(10i8);
+        nested_list.finish();
+
+        mixed_list.finish();
+
+        root_obj.finish().unwrap();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        // Test Debug formatter (?)
+        let debug_output = format!("{:?}", variant);
+
+        // Verify that the debug output contains all the expected types
+        assert!(debug_output.contains("\"null\": Null"));
+        assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
+        assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
+        assert!(debug_output.contains("\"int8\": Int8(42)"));
+        assert!(debug_output.contains("\"int16\": Int16(1234)"));
+        assert!(debug_output.contains("\"int32\": Int32(123456)"));
+        assert!(debug_output.contains("\"int64\": 
Int64(1234567890123456789)"));
+        assert!(debug_output.contains("\"float\": Float(1.234)"));
+        assert!(debug_output.contains("\"double\": Double(1.23456789"));
+        assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
+        assert!(debug_output.contains("\"timestamp_micros\": 
TimestampMicros("));
+        assert!(debug_output.contains("\"timestamp_ntz_micros\": 
TimestampNtzMicros("));
+        assert!(debug_output.contains("\"decimal4\": Decimal4("));
+        assert!(debug_output.contains("\"decimal8\": Decimal8("));
+        assert!(debug_output.contains("\"decimal16\": Decimal16("));
+        assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be 
ef)"));
+        assert!(debug_output.contains("\"string\": String("));
+        assert!(debug_output.contains("\"short_string\": ShortString("));
+        assert!(debug_output.contains("\"nested_object\":"));
+        assert!(debug_output.contains("\"mixed_list\":"));
+
+        let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), 
"boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": 
Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 
123456789012345678901234567890, scale: 4 }), "decimal4": 
Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": 
Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": 
Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": 
Int32(123456), "i [...]
+        assert_eq!(debug_output, expected);
+
+        // Test alternate Debug formatter (#?)
+        let alt_debug_output = format!("{:#?}", variant);
+        let expected = r#"{
+    "binary": Binary(01 02 03 04 de ad be ef),
+    "boolean_false": BooleanFalse,
+    "boolean_true": BooleanTrue,
+    "date": Date(
+        2024-12-25,
+    ),
+    "decimal16": Decimal16(
+        VariantDecimal16 {
+            integer: 123456789012345678901234567890,
+            scale: 4,
+        },
+    ),
+    "decimal4": Decimal4(
+        VariantDecimal4 {
+            integer: 1234,
+            scale: 2,
+        },
+    ),
+    "decimal8": Decimal8(
+        VariantDecimal8 {
+            integer: 123456789,
+            scale: 3,
+        },
+    ),
+    "double": Double(
+        1.23456789,
+    ),
+    "float": Float(
+        1.234,
+    ),
+    "int16": Int16(
+        1234,
+    ),
+    "int32": Int32(
+        123456,
+    ),
+    "int64": Int64(
+        1234567890123456789,
+    ),
+    "int8": Int8(
+        42,
+    ),
+    "mixed_list": [
+        Int32(
+            1,
+        ),
+        ShortString(
+            ShortString(
+                "two",
+            ),
+        ),
+        BooleanTrue,
+        Float(
+            4.0,
+        ),
+        Null,
+        [
+            ShortString(
+                ShortString(
+                    "nested",
+                ),
+            ),
+            Int8(
+                10,
+            ),
+        ],
+    ],
+    "nested_object": {
+        "inner_key1": ShortString(
+            ShortString(
+                "inner_value1",
+            ),
+        ),
+        "inner_key2": Int32(
+            999,
+        ),
+    },
+    "null": Null,
+    "short_string": ShortString(
+        ShortString(
+            "Short string with emoji 🎉",
+        ),
+    ),
+    "string": String(
+        "This is a long string that exceeds the short string limit and 
contains emoji 🦀",
+    ),
+    "timestamp_micros": TimestampMicros(
+        2024-12-25T15:30:45.123Z,
+    ),
+    "timestamp_ntz_micros": TimestampNtzMicros(
+        2024-12-25T15:30:45.123,
+    ),
+}"#;
+        assert_eq!(alt_debug_output, expected);
+    }
 }

Reply via email to