This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new bb607dc5 Support deserializing bytes (#1820)
bb607dc5 is described below

commit bb607dc5f550b694dc1741b1d95294500b5959c6
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Nov 10 01:38:14 2025 +0000

    Support deserializing bytes (#1820)
    
    ## Which issue does this PR close?
    
    - Closes #86
    
    ## What changes are included in this PR?
    
    ## Are these changes tested?
---
 crates/iceberg/src/spec/values.rs | 303 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 297 insertions(+), 6 deletions(-)

diff --git a/crates/iceberg/src/spec/values.rs 
b/crates/iceberg/src/spec/values.rs
index 4c763a39..2b4ac927 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -2682,12 +2682,71 @@ mod _serde {
                     Type::Primitive(PrimitiveType::String) => 
Ok(Some(Literal::string(v))),
                     _ => Err(invalid_err("string")),
                 },
-                // # TODO:https://github.com/apache/iceberg-rust/issues/86
-                // rust avro don't support deserialize any bytes 
representation now.
-                RawLiteralEnum::Bytes(_) => Err(invalid_err_with_reason(
-                    "bytes",
-                    "todo: rust avro doesn't support deserialize any bytes 
representation now",
-                )),
+                RawLiteralEnum::Bytes(v) => match ty {
+                    Type::Primitive(PrimitiveType::Binary) => 
Ok(Some(Literal::binary(v.to_vec()))),
+                    Type::Primitive(PrimitiveType::Fixed(expected_len)) => {
+                        if v.len() == *expected_len as usize {
+                            Ok(Some(Literal::fixed(v.to_vec())))
+                        } else {
+                            Err(invalid_err_with_reason(
+                                "bytes",
+                                &format!(
+                                    "Fixed type must be exactly {} bytes, got 
{}",
+                                    expected_len,
+                                    v.len()
+                                ),
+                            ))
+                        }
+                    }
+                    Type::Primitive(PrimitiveType::Uuid) => {
+                        if v.len() == 16 {
+                            let bytes: [u8; 16] = 
v.as_slice().try_into().map_err(|_| {
+                                invalid_err_with_reason("bytes", "UUID must be 
exactly 16 bytes")
+                            })?;
+                            
Ok(Some(Literal::uuid(uuid::Uuid::from_bytes(bytes))))
+                        } else {
+                            Err(invalid_err_with_reason(
+                                "bytes",
+                                "UUID must be exactly 16 bytes",
+                            ))
+                        }
+                    }
+                    Type::Primitive(PrimitiveType::Decimal { precision, .. }) 
=> {
+                        let required_bytes = 
Type::decimal_required_bytes(*precision)? as usize;
+
+                        if v.len() == required_bytes {
+                            // Pad the bytes to 16 bytes (i128 size) with sign 
extension
+                            let mut padded_bytes = [0u8; 16];
+                            let start_idx = 16 - v.len();
+
+                            // Copy the input bytes to the end of the array
+                            padded_bytes[start_idx..].copy_from_slice(&v);
+
+                            // Sign extend if the number is negative (MSB is 1)
+                            if !v.is_empty() && (v[0] & 0x80) != 0 {
+                                // Fill the padding with 0xFF for negative 
numbers
+                                for byte in &mut padded_bytes[..start_idx] {
+                                    *byte = 0xFF;
+                                }
+                            }
+
+                            
Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128(
+                                i128::from_be_bytes(padded_bytes),
+                            ))))
+                        } else {
+                            Err(invalid_err_with_reason(
+                                "bytes",
+                                &format!(
+                                    "Decimal with precision {} must be exactly 
{} bytes, got {}",
+                                    precision,
+                                    required_bytes,
+                                    v.len()
+                                ),
+                            ))
+                        }
+                    }
+                    _ => Err(invalid_err("bytes")),
+                },
                 RawLiteralEnum::List(v) => match ty {
                     Type::List(ty) => Ok(Some(Literal::List(
                         v.list
@@ -3309,6 +3368,238 @@ mod tests {
         }
     }
 
+    fn check_raw_literal_bytes_serde_via_avro(
+        input_bytes: Vec<u8>,
+        expected_literal: Literal,
+        expected_type: &Type,
+    ) {
+        use apache_avro::types::Value;
+
+        // Create an Avro bytes value and deserialize it through the 
RawLiteral path
+        let avro_value = Value::Bytes(input_bytes);
+        let raw_literal: _serde::RawLiteral = 
apache_avro::from_value(&avro_value).unwrap();
+        let result = raw_literal.try_into(expected_type).unwrap();
+        assert_eq!(result, Some(expected_literal));
+    }
+
+    fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec<u8>, 
expected_type: &Type) {
+        use apache_avro::types::Value;
+
+        let avro_value = Value::Bytes(input_bytes);
+        let raw_literal: _serde::RawLiteral = 
apache_avro::from_value(&avro_value).unwrap();
+        let result = raw_literal.try_into(expected_type);
+        assert!(result.is_err(), "Expected error but got: {:?}", result);
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_binary() {
+        let bytes = vec![1u8, 2u8, 3u8, 4u8, 5u8];
+        check_raw_literal_bytes_serde_via_avro(
+            bytes.clone(),
+            Literal::binary(bytes),
+            &Type::Primitive(PrimitiveType::Binary),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_binary_empty() {
+        let bytes = vec![];
+        check_raw_literal_bytes_serde_via_avro(
+            bytes.clone(),
+            Literal::binary(bytes),
+            &Type::Primitive(PrimitiveType::Binary),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_fixed_correct_length() {
+        let bytes = vec![1u8, 2u8, 3u8, 4u8];
+        check_raw_literal_bytes_serde_via_avro(
+            bytes.clone(),
+            Literal::fixed(bytes),
+            &Type::Primitive(PrimitiveType::Fixed(4)),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_fixed_wrong_length() {
+        let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but expecting 4
+        check_raw_literal_bytes_error_via_avro(bytes, 
&Type::Primitive(PrimitiveType::Fixed(4)));
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_fixed_empty_correct_length() {
+        let bytes = vec![];
+        check_raw_literal_bytes_serde_via_avro(
+            bytes.clone(),
+            Literal::fixed(bytes),
+            &Type::Primitive(PrimitiveType::Fixed(0)),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_uuid_correct_length() {
+        let uuid_bytes = vec![
+            0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 
0x67, 0x89, 0xab,
+            0xcd, 0xef,
+        ];
+        let expected_uuid = u128::from_be_bytes([
+            0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 
0x67, 0x89, 0xab,
+            0xcd, 0xef,
+        ]);
+        check_raw_literal_bytes_serde_via_avro(
+            uuid_bytes,
+            Literal::Primitive(PrimitiveLiteral::UInt128(expected_uuid)),
+            &Type::Primitive(PrimitiveType::Uuid),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_uuid_wrong_length() {
+        let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but UUID needs 16
+        check_raw_literal_bytes_error_via_avro(bytes, 
&Type::Primitive(PrimitiveType::Uuid));
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_4_scale_2() {
+        // Precision 4 requires 2 bytes
+        let decimal_bytes = vec![0x04, 0xd2]; // 1234 in 2 bytes
+        let expected_decimal = 1234i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 4,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_4_negative() {
+        // Precision 4 requires 2 bytes, negative number
+        let decimal_bytes = vec![0xfb, 0x2e]; // -1234 in 2 bytes
+        let expected_decimal = -1234i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 4,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_9_scale_2() {
+        // Precision 9 requires 4 bytes
+        let decimal_bytes = vec![0x00, 0x12, 0xd6, 0x87]; // 1234567 in 4 bytes
+        let expected_decimal = 1234567i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 9,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_18_scale_2() {
+        // Precision 18 requires 8 bytes
+        let decimal_bytes = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 
0xd2]; // 1234 in 8 bytes
+        let expected_decimal = 1234i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 18,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_38_scale_2() {
+        // Precision 38 requires 16 bytes (maximum precision)
+        let decimal_bytes = vec![
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00,
+            0x04, 0xd2, // 1234 in 16 bytes
+        ];
+        let expected_decimal = 1234i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 38,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_1_scale_0() {
+        // Precision 1 requires 1 byte
+        let decimal_bytes = vec![0x07]; // 7 in 1 byte
+        let expected_decimal = 7i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 1,
+                scale: 0,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_precision_1_negative() {
+        // Precision 1 requires 1 byte, negative number
+        let decimal_bytes = vec![0xf9]; // -7 in 1 byte (two's complement)
+        let expected_decimal = -7i128;
+        check_raw_literal_bytes_serde_via_avro(
+            decimal_bytes,
+            Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)),
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 1,
+                scale: 0,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_wrong_length() {
+        // 3 bytes provided, but precision 4 requires 2 bytes
+        let bytes = vec![1u8, 2u8, 3u8];
+        check_raw_literal_bytes_error_via_avro(
+            bytes,
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 4,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_decimal_wrong_length_too_few() {
+        // 1 byte provided, but precision 9 requires 4 bytes
+        let bytes = vec![0x42];
+        check_raw_literal_bytes_error_via_avro(
+            bytes,
+            &Type::Primitive(PrimitiveType::Decimal {
+                precision: 9,
+                scale: 2,
+            }),
+        );
+    }
+
+    #[test]
+    fn test_raw_literal_bytes_unsupported_type() {
+        let bytes = vec![1u8, 2u8, 3u8, 4u8];
+        check_raw_literal_bytes_error_via_avro(bytes, 
&Type::Primitive(PrimitiveType::Int));
+    }
+
     #[test]
     fn avro_convert_test_int() {
         check_convert_with_avro(

Reply via email to