This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 08c46fb  remove binary serialize in literal (#456)
08c46fb is described below

commit 08c46fbb98c55ae284b31977e6307f40ef5f381e
Author: ZENOTME <[email protected]>
AuthorDate: Mon Jul 15 20:47:29 2024 +0800

    remove binary serialize in literal (#456)
    
    Co-authored-by: ZENOTME <[email protected]>
---
 crates/iceberg/src/spec/manifest.rs      |  17 +--
 crates/iceberg/src/spec/manifest_list.rs |   6 +-
 crates/iceberg/src/spec/values.rs        | 172 +++++++++----------------------
 3 files changed, 57 insertions(+), 138 deletions(-)

diff --git a/crates/iceberg/src/spec/manifest.rs 
b/crates/iceberg/src/spec/manifest.rs
index f5a5984..dd14823 100644
--- a/crates/iceberg/src/spec/manifest.rs
+++ b/crates/iceberg/src/spec/manifest.rs
@@ -1203,13 +1203,11 @@ impl std::fmt::Display for DataFileFormat {
 mod _serde {
     use std::collections::HashMap;
 
-    use serde_bytes::ByteBuf;
     use serde_derive::{Deserialize, Serialize};
     use serde_with::serde_as;
 
     use crate::spec::Datum;
     use crate::spec::Literal;
-    use crate::spec::PrimitiveLiteral;
     use crate::spec::RawLiteral;
     use crate::spec::Schema;
     use crate::spec::Struct;
@@ -1333,12 +1331,8 @@ mod _serde {
                 value_counts: Some(to_i64_entry(value.value_counts)?),
                 null_value_counts: 
Some(to_i64_entry(value.null_value_counts)?),
                 nan_value_counts: Some(to_i64_entry(value.nan_value_counts)?),
-                lower_bounds: Some(to_bytes_entry(
-                    value.lower_bounds.into_iter().map(|(k, v)| (k, v.into())),
-                )),
-                upper_bounds: Some(to_bytes_entry(
-                    value.upper_bounds.into_iter().map(|(k, v)| (k, v.into())),
-                )),
+                lower_bounds: Some(to_bytes_entry(value.lower_bounds)),
+                upper_bounds: Some(to_bytes_entry(value.upper_bounds)),
                 key_metadata: 
Some(serde_bytes::ByteBuf::from(value.key_metadata)),
                 split_offsets: Some(value.split_offsets),
                 equality_ids: Some(value.equality_ids),
@@ -1442,11 +1436,11 @@ mod _serde {
         Ok(m)
     }
 
-    fn to_bytes_entry(v: impl IntoIterator<Item = (i32, PrimitiveLiteral)>) -> 
Vec<BytesEntry> {
+    fn to_bytes_entry(v: impl IntoIterator<Item = (i32, Datum)>) -> 
Vec<BytesEntry> {
         v.into_iter()
             .map(|e| BytesEntry {
                 key: e.0,
-                value: Into::<ByteBuf>::into(e.1),
+                value: e.1.to_bytes(),
             })
             .collect()
     }
@@ -1906,8 +1900,7 @@ mod tests {
                         partition: Struct::from_iter(
                             vec![
                                 Some(
-                                    Literal::try_from_bytes(&[120], 
&Type::Primitive(PrimitiveType::String))
-                                        .unwrap()
+                                    Literal::string("x"),
                                 ),
                             ]
                                 .into_iter()
diff --git a/crates/iceberg/src/spec/manifest_list.rs 
b/crates/iceberg/src/spec/manifest_list.rs
index ec7e2d8..688bdef 100644
--- a/crates/iceberg/src/spec/manifest_list.rs
+++ b/crates/iceberg/src/spec/manifest_list.rs
@@ -675,7 +675,7 @@ pub struct FieldSummary {
 /// [ManifestFileV1] and [ManifestFileV2] are internal struct that are only 
used for serialization and deserialization.
 pub(super) mod _serde {
     use crate::{
-        spec::{Datum, PrimitiveLiteral, PrimitiveType, StructType},
+        spec::{Datum, PrimitiveType, StructType},
         Error,
     };
     pub use serde_bytes::ByteBuf;
@@ -965,8 +965,8 @@ pub(super) mod _serde {
                     .map(|v| FieldSummary {
                         contains_null: v.contains_null,
                         contains_nan: v.contains_nan,
-                        lower_bound: v.lower_bound.map(|v| 
PrimitiveLiteral::from(v).into()),
-                        upper_bound: v.upper_bound.map(|v| 
PrimitiveLiteral::from(v).into()),
+                        lower_bound: v.lower_bound.map(|v| v.to_bytes()),
+                        upper_bound: v.upper_bound.map(|v| v.to_bytes()),
                     })
                     .collect(),
             )
diff --git a/crates/iceberg/src/spec/values.rs 
b/crates/iceberg/src/spec/values.rs
index a905903..310bec1 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -382,7 +382,9 @@ impl Datum {
         Datum { r#type, literal }
     }
 
-    /// Create iceberg value from bytes
+    /// Create iceberg value from bytes.
+    ///
+    /// See [this 
spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for 
reference.
     pub fn try_from_bytes(bytes: &[u8], data_type: PrimitiveType) -> 
Result<Self> {
         let literal = match data_type {
             PrimitiveType::Boolean => {
@@ -424,6 +426,34 @@ impl Datum {
         Ok(Datum::new(data_type, literal))
     }
 
+    /// Convert the value to bytes
+    ///
+    /// See [this 
spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for 
reference.
+    pub fn to_bytes(&self) -> ByteBuf {
+        match &self.literal {
+            PrimitiveLiteral::Boolean(val) => {
+                if *val {
+                    ByteBuf::from([1u8])
+                } else {
+                    ByteBuf::from([0u8])
+                }
+            }
+            PrimitiveLiteral::Int(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Date(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Time(val) => ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Timestamp(val) => 
ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::Timestamptz(val) => 
ByteBuf::from(val.to_le_bytes()),
+            PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()),
+            PrimitiveLiteral::UUID(val) => 
ByteBuf::from(val.as_u128().to_be_bytes()),
+            PrimitiveLiteral::Fixed(val) => ByteBuf::from(val.as_slice()),
+            PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()),
+            PrimitiveLiteral::Decimal(_) => todo!(),
+        }
+    }
+
     /// Creates a boolean value.
     ///
     /// Example:
@@ -1448,78 +1478,6 @@ impl Literal {
     }
 }
 
-impl From<PrimitiveLiteral> for ByteBuf {
-    fn from(value: PrimitiveLiteral) -> Self {
-        match value {
-            PrimitiveLiteral::Boolean(val) => {
-                if val {
-                    ByteBuf::from([1u8])
-                } else {
-                    ByteBuf::from([0u8])
-                }
-            }
-            PrimitiveLiteral::Int(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Date(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Time(val) => ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Timestamp(val) => 
ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::Timestamptz(val) => 
ByteBuf::from(val.to_le_bytes()),
-            PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()),
-            PrimitiveLiteral::UUID(val) => 
ByteBuf::from(val.as_u128().to_be_bytes()),
-            PrimitiveLiteral::Fixed(val) => ByteBuf::from(val),
-            PrimitiveLiteral::Binary(val) => ByteBuf::from(val),
-            PrimitiveLiteral::Decimal(_) => todo!(),
-        }
-    }
-}
-
-impl From<Literal> for ByteBuf {
-    fn from(value: Literal) -> Self {
-        match value {
-            Literal::Primitive(val) => val.into(),
-            _ => unimplemented!(),
-        }
-    }
-}
-
-impl From<PrimitiveLiteral> for Vec<u8> {
-    fn from(value: PrimitiveLiteral) -> Self {
-        match value {
-            PrimitiveLiteral::Boolean(val) => {
-                if val {
-                    Vec::from([1u8])
-                } else {
-                    Vec::from([0u8])
-                }
-            }
-            PrimitiveLiteral::Int(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Long(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Float(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Double(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Date(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Time(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Timestamp(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::Timestamptz(val) => Vec::from(val.to_le_bytes()),
-            PrimitiveLiteral::String(val) => Vec::from(val.as_bytes()),
-            PrimitiveLiteral::UUID(val) => 
Vec::from(val.as_u128().to_be_bytes()),
-            PrimitiveLiteral::Fixed(val) => val,
-            PrimitiveLiteral::Binary(val) => val,
-            PrimitiveLiteral::Decimal(_) => todo!(),
-        }
-    }
-}
-
-impl From<Literal> for Vec<u8> {
-    fn from(value: Literal) -> Self {
-        match value {
-            Literal::Primitive(val) => val.into(),
-            _ => unimplemented!(),
-        }
-    }
-}
-
 /// The partition struct stores the tuple of partition values for each file.
 /// Its type is derived from the partition fields of the partition spec used 
to write the manifest file.
 /// In v2, the partition struct’s field ids must match the ids from the 
partition spec.
@@ -1622,21 +1580,9 @@ impl FromIterator<Option<Literal>> for Struct {
 }
 
 impl Literal {
-    /// Create iceberg value from bytes
-    pub fn try_from_bytes(bytes: &[u8], data_type: &Type) -> Result<Self> {
-        match data_type {
-            Type::Primitive(primitive_type) => {
-                let datum = Datum::try_from_bytes(bytes, 
primitive_type.clone())?;
-                Ok(Literal::Primitive(datum.literal))
-            }
-            _ => Err(Error::new(
-                crate::ErrorKind::DataInvalid,
-                "Converting bytes to non-primitive types is not supported.",
-            )),
-        }
-    }
-
     /// Create iceberg value from a json value
+    ///
+    /// See [this 
spec](https://iceberg.apache.org/spec/#json-single-value-serialization) for 
reference.
     pub fn try_from_json(value: JsonValue, data_type: &Type) -> 
Result<Option<Self>> {
         match data_type {
             Type::Primitive(primitive) => match (primitive, value) {
@@ -2714,23 +2660,27 @@ mod tests {
         assert_eq!(parsed_json_value, raw_json_value);
     }
 
-    fn check_avro_bytes_serde(input: Vec<u8>, expected_literal: Literal, 
expected_type: &Type) {
+    fn check_avro_bytes_serde(
+        input: Vec<u8>,
+        expected_datum: Datum,
+        expected_type: &PrimitiveType,
+    ) {
         let raw_schema = r#""bytes""#;
         let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
 
         let bytes = ByteBuf::from(input);
-        let literal = Literal::try_from_bytes(&bytes, expected_type).unwrap();
-        assert_eq!(literal, expected_literal);
+        let datum = Datum::try_from_bytes(&bytes, 
expected_type.clone()).unwrap();
+        assert_eq!(datum, expected_datum);
 
         let mut writer = apache_avro::Writer::new(&schema, Vec::new());
-        writer.append_ser(ByteBuf::from(literal)).unwrap();
+        writer.append_ser(datum.to_bytes()).unwrap();
         let encoded = writer.into_inner().unwrap();
         let reader = apache_avro::Reader::with_schema(&schema, 
&*encoded).unwrap();
 
         for record in reader {
             let result = 
apache_avro::from_value::<ByteBuf>(&record.unwrap()).unwrap();
-            let desered_literal = Literal::try_from_bytes(&result, 
expected_type).unwrap();
-            assert_eq!(desered_literal, expected_literal);
+            let desered_datum = Datum::try_from_bytes(&result, 
expected_type.clone()).unwrap();
+            assert_eq!(desered_datum, expected_datum);
         }
     }
 
@@ -2999,66 +2949,42 @@ mod tests {
     fn avro_bytes_boolean() {
         let bytes = vec![1u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            Literal::Primitive(PrimitiveLiteral::Boolean(true)),
-            &Type::Primitive(PrimitiveType::Boolean),
-        );
+        check_avro_bytes_serde(bytes, Datum::bool(true), 
&PrimitiveType::Boolean);
     }
 
     #[test]
     fn avro_bytes_int() {
         let bytes = vec![32u8, 0u8, 0u8, 0u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            Literal::Primitive(PrimitiveLiteral::Int(32)),
-            &Type::Primitive(PrimitiveType::Int),
-        );
+        check_avro_bytes_serde(bytes, Datum::int(32), &PrimitiveType::Int);
     }
 
     #[test]
     fn avro_bytes_long() {
         let bytes = vec![32u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            Literal::Primitive(PrimitiveLiteral::Long(32)),
-            &Type::Primitive(PrimitiveType::Long),
-        );
+        check_avro_bytes_serde(bytes, Datum::long(32), &PrimitiveType::Long);
     }
 
     #[test]
     fn avro_bytes_float() {
         let bytes = vec![0u8, 0u8, 128u8, 63u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(1.0))),
-            &Type::Primitive(PrimitiveType::Float),
-        );
+        check_avro_bytes_serde(bytes, Datum::float(1.0), 
&PrimitiveType::Float);
     }
 
     #[test]
     fn avro_bytes_double() {
         let bytes = vec![0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(1.0))),
-            &Type::Primitive(PrimitiveType::Double),
-        );
+        check_avro_bytes_serde(bytes, Datum::double(1.0), 
&PrimitiveType::Double);
     }
 
     #[test]
     fn avro_bytes_string() {
         let bytes = vec![105u8, 99u8, 101u8, 98u8, 101u8, 114u8, 103u8];
 
-        check_avro_bytes_serde(
-            bytes,
-            
Literal::Primitive(PrimitiveLiteral::String("iceberg".to_string())),
-            &Type::Primitive(PrimitiveType::String),
-        );
+        check_avro_bytes_serde(bytes, Datum::string("iceberg"), 
&PrimitiveType::String);
     }
 
     #[test]

Reply via email to