This is an automated email from the ASF dual-hosted git repository.

blackmwk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b8e78c9c fix(spec): support JSON binary literals (#2476)
5b8e78c9c is described below

commit 5b8e78c9c7a71d8569715c4a97ce9f87bf3fb550
Author: Minh Vu <[email protected]>
AuthorDate: Thu May 21 11:13:13 2026 +0200

    fix(spec): support JSON binary literals (#2476)
    
    ### Summary
    
    - implement JSON hex parsing for `binary` and `fixed` literals
    - serialize binary-like literals as two-character-per-byte lowercase hex
    - validate `fixed[N]` JSON values have exactly `N` decoded bytes
    - add coverage for valid hex, uppercase input, invalid hex, and
    fixed-size mismatches
    
    ### Why
    
    The previous JSON literal parser used `todo!()` for `binary` and
    `fixed`, so valid JSON values could panic. Serialization also emitted
    bytes without zero padding, which could collapse byte boundaries for
    values containing `0x00` through `0x0f`.
    
    Fixes #2475.
    
    ### Tests
    
    - `cargo fmt --check`
    - `cargo test -p iceberg json`
    - `cargo test -p iceberg spec::values`
    - `cargo test -p iceberg`
---
 crates/iceberg/src/spec/values/literal.rs | 77 +++++++++++++++++++++++++++----
 crates/iceberg/src/spec/values/tests.rs   | 72 +++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 9 deletions(-)

diff --git a/crates/iceberg/src/spec/values/literal.rs 
b/crates/iceberg/src/spec/values/literal.rs
index e82fa197c..69e5cc02a 100644
--- a/crates/iceberg/src/spec/values/literal.rs
+++ b/crates/iceberg/src/spec/values/literal.rs
@@ -499,8 +499,14 @@ impl Literal {
                 (PrimitiveType::Uuid, JsonValue::String(s)) => 
Ok(Some(Literal::Primitive(
                     PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()),
                 ))),
-                (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(),
-                (PrimitiveType::Binary, JsonValue::String(_)) => todo!(),
+                (PrimitiveType::Fixed(size), JsonValue::String(s)) => {
+                    let bytes = decode_hex_bytes(&s)?;
+                    validate_fixed_size(bytes.len(), *size)?;
+                    
Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes))))
+                }
+                (PrimitiveType::Binary, JsonValue::String(s)) => 
Ok(Some(Literal::Primitive(
+                    PrimitiveLiteral::Binary(decode_hex_bytes(&s)?),
+                ))),
                 (
                     PrimitiveType::Decimal {
                         precision: _,
@@ -659,13 +665,13 @@ impl Literal {
                 (_, PrimitiveLiteral::UInt128(val)) => {
                     Ok(JsonValue::String(Uuid::from_u128(val).to_string()))
                 }
-                (_, PrimitiveLiteral::Binary(val)) => 
Ok(JsonValue::String(val.iter().fold(
-                    String::new(),
-                    |mut acc, x| {
-                        acc.push_str(&format!("{x:x}"));
-                        acc
-                    },
-                ))),
+                (PrimitiveType::Fixed(size), PrimitiveLiteral::Binary(val)) => 
{
+                    validate_fixed_size(val.len(), *size)?;
+                    Ok(JsonValue::String(encode_hex_bytes(&val)))
+                }
+                (PrimitiveType::Binary, PrimitiveLiteral::Binary(val)) => {
+                    Ok(JsonValue::String(encode_hex_bytes(&val)))
+                }
                 (_, PrimitiveLiteral::Int128(val)) => match r#type {
                     Type::Primitive(PrimitiveType::Decimal {
                         precision: _precision,
@@ -744,3 +750,56 @@ impl Literal {
         }
     }
 }
+
+fn decode_hex_bytes(value: &str) -> Result<Vec<u8>> {
+    if !value.len().is_multiple_of(2) {
+        return Err(Error::new(
+            ErrorKind::DataInvalid,
+            format!("Hex string must have an even number of characters: 
{value:?}"),
+        ));
+    }
+
+    value
+        .as_bytes()
+        .chunks_exact(2)
+        .map(|chunk| {
+            let high = decode_hex_digit(chunk[0], value)?;
+            let low = decode_hex_digit(chunk[1], value)?;
+            Ok((high << 4) | low)
+        })
+        .collect()
+}
+
+fn decode_hex_digit(digit: u8, value: &str) -> Result<u8> {
+    match digit {
+        b'0'..=b'9' => Ok(digit - b'0'),
+        b'a'..=b'f' => Ok(digit - b'a' + 10),
+        b'A'..=b'F' => Ok(digit - b'A' + 10),
+        _ => Err(Error::new(
+            ErrorKind::DataInvalid,
+            format!("Hex string contains invalid character: {value:?}"),
+        )),
+    }
+}
+
+fn encode_hex_bytes(bytes: &[u8]) -> String {
+    const HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
+
+    let mut output = String::with_capacity(bytes.len() * 2);
+    for byte in bytes {
+        output.push(HEX_DIGITS[(byte >> 4) as usize] as char);
+        output.push(HEX_DIGITS[(byte & 0x0f) as usize] as char);
+    }
+    output
+}
+
+fn validate_fixed_size(actual: usize, expected: u64) -> Result<()> {
+    if actual as u64 == expected {
+        Ok(())
+    } else {
+        Err(Error::new(
+            ErrorKind::DataInvalid,
+            format!("Fixed type must be exactly {expected} bytes, got 
{actual}"),
+        ))
+    }
+}
diff --git a/crates/iceberg/src/spec/values/tests.rs 
b/crates/iceberg/src/spec/values/tests.rs
index 41238ed89..d3e0a455c 100644
--- a/crates/iceberg/src/spec/values/tests.rs
+++ b/crates/iceberg/src/spec/values/tests.rs
@@ -251,6 +251,78 @@ fn json_decimal() {
     );
 }
 
+#[test]
+fn json_binary() {
+    let record = r#""00010fff""#;
+
+    check_json_serde(
+        record,
+        Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 15, 255])),
+        &Type::Primitive(PrimitiveType::Binary),
+    );
+}
+
+#[test]
+fn json_fixed() {
+    let record = r#""00010fff""#;
+
+    check_json_serde(
+        record,
+        Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 15, 255])),
+        &Type::Primitive(PrimitiveType::Fixed(4)),
+    );
+}
+
+#[test]
+fn test_should_parse_json_binary_if_hex_uses_uppercase_digits() {
+    let result = Literal::try_from_json(
+        serde_json::json!("00010FFF"),
+        &Type::Primitive(PrimitiveType::Binary),
+    )
+    .unwrap();
+
+    assert_eq!(
+        result,
+        Some(Literal::Primitive(PrimitiveLiteral::Binary(vec![
+            0, 1, 15, 255
+        ])))
+    );
+}
+
+#[test]
+fn test_should_reject_json_binary_if_hex_is_invalid() {
+    assert!(
+        Literal::try_from_json(
+            serde_json::json!("f"),
+            &Type::Primitive(PrimitiveType::Binary),
+        )
+        .is_err()
+    );
+    assert!(
+        Literal::try_from_json(
+            serde_json::json!("fg"),
+            &Type::Primitive(PrimitiveType::Binary),
+        )
+        .is_err()
+    );
+}
+
+#[test]
+fn test_should_reject_json_fixed_if_length_does_not_match() {
+    assert!(
+        Literal::try_from_json(
+            serde_json::json!("ff"),
+            &Type::Primitive(PrimitiveType::Fixed(2)),
+        )
+        .is_err()
+    );
+    assert!(
+        Literal::Primitive(PrimitiveLiteral::Binary(vec![255]))
+            .try_into_json(&Type::Primitive(PrimitiveType::Fixed(2)))
+            .is_err()
+    );
+}
+
 #[test]
 fn json_struct() {
     let record = r#"{"1": 1, "2": "bar", "3": null}"#;

Reply via email to