This is an automated email from the ASF dual-hosted git repository.
blackmwk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 5b8e78c9c fix(spec): support JSON binary literals (#2476)
5b8e78c9c is described below
commit 5b8e78c9c7a71d8569715c4a97ce9f87bf3fb550
Author: Minh Vu <[email protected]>
AuthorDate: Thu May 21 11:13:13 2026 +0200
fix(spec): support JSON binary literals (#2476)
### Summary
- implement JSON hex parsing for `binary` and `fixed` literals
- serialize binary-like literals as two-character-per-byte lowercase hex
- validate `fixed[N]` JSON values have exactly `N` decoded bytes
- add coverage for valid hex, uppercase input, invalid hex, and
fixed-size mismatches
### Why
The previous JSON literal parser used `todo!()` for `binary` and
`fixed`, so valid JSON values could panic. Serialization also emitted
bytes without zero padding, which could collapse byte boundaries for
values containing `0x00` through `0x0f`.
Fixes #2475.
### Tests
- `cargo fmt --check`
- `cargo test -p iceberg json`
- `cargo test -p iceberg spec::values`
- `cargo test -p iceberg`
---
crates/iceberg/src/spec/values/literal.rs | 77 +++++++++++++++++++++++++++----
crates/iceberg/src/spec/values/tests.rs | 72 +++++++++++++++++++++++++++++
2 files changed, 140 insertions(+), 9 deletions(-)
diff --git a/crates/iceberg/src/spec/values/literal.rs
b/crates/iceberg/src/spec/values/literal.rs
index e82fa197c..69e5cc02a 100644
--- a/crates/iceberg/src/spec/values/literal.rs
+++ b/crates/iceberg/src/spec/values/literal.rs
@@ -499,8 +499,14 @@ impl Literal {
(PrimitiveType::Uuid, JsonValue::String(s)) =>
Ok(Some(Literal::Primitive(
PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()),
))),
- (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(),
- (PrimitiveType::Binary, JsonValue::String(_)) => todo!(),
+ (PrimitiveType::Fixed(size), JsonValue::String(s)) => {
+ let bytes = decode_hex_bytes(&s)?;
+ validate_fixed_size(bytes.len(), *size)?;
+
Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes))))
+ }
+ (PrimitiveType::Binary, JsonValue::String(s)) =>
Ok(Some(Literal::Primitive(
+ PrimitiveLiteral::Binary(decode_hex_bytes(&s)?),
+ ))),
(
PrimitiveType::Decimal {
precision: _,
@@ -659,13 +665,13 @@ impl Literal {
(_, PrimitiveLiteral::UInt128(val)) => {
Ok(JsonValue::String(Uuid::from_u128(val).to_string()))
}
- (_, PrimitiveLiteral::Binary(val)) =>
Ok(JsonValue::String(val.iter().fold(
- String::new(),
- |mut acc, x| {
- acc.push_str(&format!("{x:x}"));
- acc
- },
- ))),
+ (PrimitiveType::Fixed(size), PrimitiveLiteral::Binary(val)) =>
{
+ validate_fixed_size(val.len(), *size)?;
+ Ok(JsonValue::String(encode_hex_bytes(&val)))
+ }
+ (PrimitiveType::Binary, PrimitiveLiteral::Binary(val)) => {
+ Ok(JsonValue::String(encode_hex_bytes(&val)))
+ }
(_, PrimitiveLiteral::Int128(val)) => match r#type {
Type::Primitive(PrimitiveType::Decimal {
precision: _precision,
@@ -744,3 +750,56 @@ impl Literal {
}
}
}
+
+fn decode_hex_bytes(value: &str) -> Result<Vec<u8>> {
+ if !value.len().is_multiple_of(2) {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Hex string must have an even number of characters:
{value:?}"),
+ ));
+ }
+
+ value
+ .as_bytes()
+ .chunks_exact(2)
+ .map(|chunk| {
+ let high = decode_hex_digit(chunk[0], value)?;
+ let low = decode_hex_digit(chunk[1], value)?;
+ Ok((high << 4) | low)
+ })
+ .collect()
+}
+
+fn decode_hex_digit(digit: u8, value: &str) -> Result<u8> {
+ match digit {
+ b'0'..=b'9' => Ok(digit - b'0'),
+ b'a'..=b'f' => Ok(digit - b'a' + 10),
+ b'A'..=b'F' => Ok(digit - b'A' + 10),
+ _ => Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Hex string contains invalid character: {value:?}"),
+ )),
+ }
+}
+
+fn encode_hex_bytes(bytes: &[u8]) -> String {
+ const HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
+
+ let mut output = String::with_capacity(bytes.len() * 2);
+ for byte in bytes {
+ output.push(HEX_DIGITS[(byte >> 4) as usize] as char);
+ output.push(HEX_DIGITS[(byte & 0x0f) as usize] as char);
+ }
+ output
+}
+
+fn validate_fixed_size(actual: usize, expected: u64) -> Result<()> {
+ if actual as u64 == expected {
+ Ok(())
+ } else {
+ Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Fixed type must be exactly {expected} bytes, got
{actual}"),
+ ))
+ }
+}
diff --git a/crates/iceberg/src/spec/values/tests.rs
b/crates/iceberg/src/spec/values/tests.rs
index 41238ed89..d3e0a455c 100644
--- a/crates/iceberg/src/spec/values/tests.rs
+++ b/crates/iceberg/src/spec/values/tests.rs
@@ -251,6 +251,78 @@ fn json_decimal() {
);
}
+#[test]
+fn json_binary() {
+ let record = r#""00010fff""#;
+
+ check_json_serde(
+ record,
+ Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 15, 255])),
+ &Type::Primitive(PrimitiveType::Binary),
+ );
+}
+
+#[test]
+fn json_fixed() {
+ let record = r#""00010fff""#;
+
+ check_json_serde(
+ record,
+ Literal::Primitive(PrimitiveLiteral::Binary(vec![0, 1, 15, 255])),
+ &Type::Primitive(PrimitiveType::Fixed(4)),
+ );
+}
+
+#[test]
+fn test_should_parse_json_binary_if_hex_uses_uppercase_digits() {
+ let result = Literal::try_from_json(
+ serde_json::json!("00010FFF"),
+ &Type::Primitive(PrimitiveType::Binary),
+ )
+ .unwrap();
+
+ assert_eq!(
+ result,
+ Some(Literal::Primitive(PrimitiveLiteral::Binary(vec![
+ 0, 1, 15, 255
+ ])))
+ );
+}
+
+#[test]
+fn test_should_reject_json_binary_if_hex_is_invalid() {
+ assert!(
+ Literal::try_from_json(
+ serde_json::json!("f"),
+ &Type::Primitive(PrimitiveType::Binary),
+ )
+ .is_err()
+ );
+ assert!(
+ Literal::try_from_json(
+ serde_json::json!("fg"),
+ &Type::Primitive(PrimitiveType::Binary),
+ )
+ .is_err()
+ );
+}
+
+#[test]
+fn test_should_reject_json_fixed_if_length_does_not_match() {
+ assert!(
+ Literal::try_from_json(
+ serde_json::json!("ff"),
+ &Type::Primitive(PrimitiveType::Fixed(2)),
+ )
+ .is_err()
+ );
+ assert!(
+ Literal::Primitive(PrimitiveLiteral::Binary(vec![255]))
+ .try_into_json(&Type::Primitive(PrimitiveType::Fixed(2)))
+ .is_err()
+ );
+}
+
#[test]
fn json_struct() {
let record = r#"{"1": 1, "2": "bar", "3": null}"#;