This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 811fd1d  feat: Add expression builder and display. (#169)
811fd1d is described below

commit 811fd1d04230883e7288675116da32917f6591b6
Author: Renjie Liu <[email protected]>
AuthorDate: Fri Mar 1 19:58:05 2024 +0800

    feat: Add expression builder and display. (#169)
    
    * feat: Add expression builder and display.
    
    * Fix comments
    
    * Fix doc test
    
    * Fix name of op
    
    * Fix comments
    
    * Fix timestamp
---
 crates/iceberg/src/expr/mod.rs       | 132 ++++++-
 crates/iceberg/src/expr/predicate.rs | 205 ++++++++++-
 crates/iceberg/src/expr/term.rs      |  74 +++-
 crates/iceberg/src/spec/values.rs    | 654 ++++++++++++++++++++++++++++++++++-
 4 files changed, 1021 insertions(+), 44 deletions(-)

diff --git a/crates/iceberg/src/expr/mod.rs b/crates/iceberg/src/expr/mod.rs
index aef1444..ef3d2a6 100644
--- a/crates/iceberg/src/expr/mod.rs
+++ b/crates/iceberg/src/expr/mod.rs
@@ -18,25 +18,129 @@
 //! This module contains expressions.
 
 mod term;
+
+use std::fmt::{Display, Formatter};
+
 pub use term::*;
 mod predicate;
 pub use predicate::*;
 
 /// Predicate operators used in expressions.
+///
+/// The discriminant of this enum is used for determining the type of the 
operator, see
+/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`], 
[`PredicateOperator::is_set`]
 #[allow(missing_docs)]
+#[derive(Debug, Clone, Copy)]
+#[repr(u16)]
 pub enum PredicateOperator {
-    IsNull,
-    NotNull,
-    IsNan,
-    NotNan,
-    LessThan,
-    LessThanOrEq,
-    GreaterThan,
-    GreaterThanOrEq,
-    Eq,
-    NotEq,
-    In,
-    NotIn,
-    StartsWith,
-    NotStartsWith,
+    // Unary operators
+    IsNull = 101,
+    NotNull = 102,
+    IsNan = 103,
+    NotNan = 104,
+
+    // Binary operators
+    LessThan = 201,
+    LessThanOrEq = 202,
+    GreaterThan = 203,
+    GreaterThanOrEq = 204,
+    Eq = 205,
+    NotEq = 206,
+    StartsWith = 207,
+    NotStartsWith = 208,
+
+    // Set operators
+    In = 301,
+    NotIn = 302,
+}
+
+impl Display for PredicateOperator {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            PredicateOperator::IsNull => write!(f, "IS NULL"),
+            PredicateOperator::NotNull => write!(f, "IS NOT NULL"),
+            PredicateOperator::IsNan => write!(f, "IS NAN"),
+            PredicateOperator::NotNan => write!(f, "IS NOT NAN"),
+            PredicateOperator::LessThan => write!(f, "<"),
+            PredicateOperator::LessThanOrEq => write!(f, "<="),
+            PredicateOperator::GreaterThan => write!(f, ">"),
+            PredicateOperator::GreaterThanOrEq => write!(f, ">="),
+            PredicateOperator::Eq => write!(f, "="),
+            PredicateOperator::NotEq => write!(f, "!="),
+            PredicateOperator::In => write!(f, "IN"),
+            PredicateOperator::NotIn => write!(f, "NOT IN"),
+            PredicateOperator::StartsWith => write!(f, "STARTS WITH"),
+            PredicateOperator::NotStartsWith => write!(f, "NOT STARTS WITH"),
+        }
+    }
+}
+
+impl PredicateOperator {
+    /// Check if this operator is unary operator.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use iceberg::expr::PredicateOperator;
+    /// assert!(PredicateOperator::IsNull.is_unary());
+    /// ```
+    pub fn is_unary(self) -> bool {
+        (self as u16) < (PredicateOperator::LessThan as u16)
+    }
+
+    /// Check if this operator is binary operator.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use iceberg::expr::PredicateOperator;
+    /// assert!(PredicateOperator::LessThan.is_binary());
+    /// ```
+    pub fn is_binary(self) -> bool {
+        ((self as u16) > (PredicateOperator::NotNan as u16))
+            && ((self as u16) < (PredicateOperator::In as u16))
+    }
+
+    /// Check if this operator is set operator.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use iceberg::expr::PredicateOperator;
+    /// assert!(PredicateOperator::In.is_set());
+    /// ```
+    pub fn is_set(self) -> bool {
+        (self as u16) > (PredicateOperator::NotStartsWith as u16)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::expr::PredicateOperator;
+
+    #[test]
+    fn test_unary() {
+        assert!(PredicateOperator::IsNull.is_unary());
+        assert!(PredicateOperator::NotNull.is_unary());
+        assert!(PredicateOperator::IsNan.is_unary());
+        assert!(PredicateOperator::NotNan.is_unary());
+    }
+
+    #[test]
+    fn test_binary() {
+        assert!(PredicateOperator::LessThan.is_binary());
+        assert!(PredicateOperator::LessThanOrEq.is_binary());
+        assert!(PredicateOperator::GreaterThan.is_binary());
+        assert!(PredicateOperator::GreaterThanOrEq.is_binary());
+        assert!(PredicateOperator::Eq.is_binary());
+        assert!(PredicateOperator::NotEq.is_binary());
+        assert!(PredicateOperator::StartsWith.is_binary());
+        assert!(PredicateOperator::NotStartsWith.is_binary());
+    }
+
+    #[test]
+    fn test_set() {
+        assert!(PredicateOperator::In.is_set());
+        assert!(PredicateOperator::NotIn.is_set());
+    }
 }
diff --git a/crates/iceberg/src/expr/predicate.rs 
b/crates/iceberg/src/expr/predicate.rs
index 9d6bf86..c9c047e 100644
--- a/crates/iceberg/src/expr/predicate.rs
+++ b/crates/iceberg/src/expr/predicate.rs
@@ -19,15 +19,40 @@
 //! Predicate expressions are used to filter data, and evaluates to a boolean 
value. For example,
 //! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is 
greater than `10`,
 
-use crate::expr::{BoundReference, PredicateOperator, UnboundReference};
-use crate::spec::Literal;
+use crate::expr::{BoundReference, PredicateOperator, Reference};
+use crate::spec::Datum;
 use std::collections::HashSet;
+use std::fmt::{Debug, Display, Formatter};
+use std::ops::Not;
 
 /// Logical expression, such as `AND`, `OR`, `NOT`.
 pub struct LogicalExpression<T, const N: usize> {
     inputs: [Box<T>; N],
 }
 
+impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("LogicalExpression")
+            .field("inputs", &self.inputs)
+            .finish()
+    }
+}
+
+impl<T, const N: usize> LogicalExpression<T, N> {
+    fn new(inputs: [Box<T>; N]) -> Self {
+        Self { inputs }
+    }
+
+    /// Return inputs of this logical expression.
+    pub fn inputs(&self) -> [&T; N] {
+        let mut ret: [&T; N] = [self.inputs[0].as_ref(); N];
+        for (i, item) in ret.iter_mut().enumerate() {
+            *item = &self.inputs[i];
+        }
+        ret
+    }
+}
+
 /// Unary predicate, for example, `a IS NULL`.
 pub struct UnaryExpression<T> {
     /// Operator of this predicate, must be single operand operator.
@@ -36,6 +61,28 @@ pub struct UnaryExpression<T> {
     term: T,
 }
 
+impl<T: Debug> Debug for UnaryExpression<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("UnaryExpression")
+            .field("op", &self.op)
+            .field("term", &self.term)
+            .finish()
+    }
+}
+
+impl<T: Display> Display for UnaryExpression<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{} {}", self.term, self.op)
+    }
+}
+
+impl<T> UnaryExpression<T> {
+    pub(crate) fn new(op: PredicateOperator, term: T) -> Self {
+        debug_assert!(op.is_unary());
+        Self { op, term }
+    }
+}
+
 /// Binary predicate, for example, `a > 10`.
 pub struct BinaryExpression<T> {
     /// Operator of this predicate, must be binary operator, such as `=`, `>`, 
`<`, etc.
@@ -43,7 +90,30 @@ pub struct BinaryExpression<T> {
     /// Term of this predicate, for example, `a` in `a > 10`.
     term: T,
     /// Literal of this predicate, for example, `10` in `a > 10`.
-    literal: Literal,
+    literal: Datum,
+}
+
+impl<T: Debug> Debug for BinaryExpression<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BinaryExpression")
+            .field("op", &self.op)
+            .field("term", &self.term)
+            .field("literal", &self.literal)
+            .finish()
+    }
+}
+
+impl<T> BinaryExpression<T> {
+    pub(crate) fn new(op: PredicateOperator, term: T, literal: Datum) -> Self {
+        debug_assert!(op.is_binary());
+        Self { op, term, literal }
+    }
+}
+
+impl<T: Display> Display for BinaryExpression<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{} {} {}", self.term, self.op, self.literal)
+    }
 }
 
 /// Set predicates, for example, `a in (1, 2, 3)`.
@@ -53,26 +123,139 @@ pub struct SetExpression<T> {
     /// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
     term: T,
     /// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 
3)`.
-    literals: HashSet<Literal>,
+    literals: HashSet<Datum>,
+}
+
+impl<T: Debug> Debug for SetExpression<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SetExpression")
+            .field("op", &self.op)
+            .field("term", &self.term)
+            .field("literal", &self.literals)
+            .finish()
+    }
 }
 
 /// Unbound predicate expression before binding to a schema.
-pub enum UnboundPredicate {
+#[derive(Debug)]
+pub enum Predicate {
     /// And predicate, for example, `a > 10 AND b < 20`.
-    And(LogicalExpression<UnboundPredicate, 2>),
+    And(LogicalExpression<Predicate, 2>),
     /// Or predicate, for example, `a > 10 OR b < 20`.
-    Or(LogicalExpression<UnboundPredicate, 2>),
+    Or(LogicalExpression<Predicate, 2>),
     /// Not predicate, for example, `NOT (a > 10)`.
-    Not(LogicalExpression<UnboundPredicate, 1>),
+    Not(LogicalExpression<Predicate, 1>),
     /// Unary expression, for example, `a IS NULL`.
-    Unary(UnaryExpression<UnboundReference>),
+    Unary(UnaryExpression<Reference>),
     /// Binary expression, for example, `a > 10`.
-    Binary(BinaryExpression<UnboundReference>),
+    Binary(BinaryExpression<Reference>),
     /// Set predicates, for example, `a in (1, 2, 3)`.
-    Set(SetExpression<UnboundReference>),
+    Set(SetExpression<Reference>),
+}
+
+impl Display for Predicate {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Predicate::And(expr) => {
+                write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1])
+            }
+            Predicate::Or(expr) => {
+                write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1])
+            }
+            Predicate::Not(expr) => {
+                write!(f, "NOT ({})", expr.inputs()[0])
+            }
+            Predicate::Unary(expr) => {
+                write!(f, "{}", expr.term)
+            }
+            Predicate::Binary(expr) => {
+                write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
+            }
+            Predicate::Set(expr) => {
+                write!(
+                    f,
+                    "{} {} ({})",
+                    expr.term,
+                    expr.op,
+                    expr.literals
+                        .iter()
+                        .map(|l| format!("{:?}", l))
+                        .collect::<Vec<String>>()
+                        .join(", ")
+                )
+            }
+        }
+    }
+}
+
+impl Predicate {
+    /// Combines two predicates with `AND`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::ops::Bound::Unbounded;
+    /// use iceberg::expr::BoundPredicate::Unary;
+    /// use iceberg::expr::Reference;
+    /// use iceberg::spec::Datum;
+    /// let expr1 = Reference::new("a").less_than(Datum::long(10));
+    ///
+    /// let expr2 = Reference::new("b").less_than(Datum::long(20));
+    ///
+    /// let expr = expr1.and(expr2);
+    ///
+    /// assert_eq!(&format!("{expr}"), "(a < 10) AND (b < 20)");
+    /// ```
+    pub fn and(self, other: Predicate) -> Predicate {
+        Predicate::And(LogicalExpression::new([Box::new(self), 
Box::new(other)]))
+    }
+
+    /// Combines two predicates with `OR`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::ops::Bound::Unbounded;
+    /// use iceberg::expr::BoundPredicate::Unary;
+    /// use iceberg::expr::Reference;
+    /// use iceberg::spec::Datum;
+    /// let expr1 = Reference::new("a").less_than(Datum::long(10));
+    ///
+    /// let expr2 = Reference::new("b").less_than(Datum::long(20));
+    ///
+    /// let expr = expr1.or(expr2);
+    ///
+    /// assert_eq!(&format!("{expr}"), "(a < 10) OR (b < 20)");
+    /// ```
+    pub fn or(self, other: Predicate) -> Predicate {
+        Predicate::Or(LogicalExpression::new([Box::new(self), 
Box::new(other)]))
+    }
+}
+
+impl Not for Predicate {
+    type Output = Predicate;
+
+    /// Create a predicate which is the reverse of this predicate. For 
example: `NOT (a > 10)`
+    /// # Example
+    ///     
+    ///```rust
+    ///use std::ops::Bound::Unbounded;
+    ///use iceberg::expr::BoundPredicate::Unary;
+    ///use iceberg::expr::Reference;
+    ///use iceberg::spec::Datum;
+    ///let expr1 = Reference::new("a").less_than(Datum::long(10));
+    ///     
+    ///let expr = !expr1;
+    ///     
+    ///assert_eq!(&format!("{expr}"), "NOT (a < 10)");
+    ///```
+    fn not(self) -> Self::Output {
+        Predicate::Not(LogicalExpression::new([Box::new(self)]))
+    }
 }
 
 /// Bound predicate expression after binding to a schema.
+#[derive(Debug)]
 pub enum BoundPredicate {
     /// An expression always evaluates to true.
     AlwaysTrue,
diff --git a/crates/iceberg/src/expr/term.rs b/crates/iceberg/src/expr/term.rs
index 5a81ecd..a4338a3 100644
--- a/crates/iceberg/src/expr/term.rs
+++ b/crates/iceberg/src/expr/term.rs
@@ -17,21 +17,89 @@
 
 //! Term definition.
 
-use crate::spec::NestedFieldRef;
+use crate::expr::{BinaryExpression, Predicate, PredicateOperator};
+use crate::spec::{Datum, NestedField, NestedFieldRef};
+use std::fmt::{Display, Formatter};
 
 /// Unbound term before binding to a schema.
-pub type UnboundTerm = UnboundReference;
+pub type Term = Reference;
 
 /// A named reference in an unbound expression.
 /// For example, `a` in `a > 10`.
-pub struct UnboundReference {
+#[derive(Debug, Clone)]
+pub struct Reference {
     name: String,
 }
 
+impl Reference {
+    /// Create a new unbound reference.
+    pub fn new(name: impl Into<String>) -> Self {
+        Self { name: name.into() }
+    }
+
+    /// Return the name of this reference.
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+}
+
+impl Reference {
+    /// Creates an less than expression. For example, `a < 10`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    ///
+    /// use iceberg::expr::Reference;
+    /// use iceberg::spec::Datum;
+    /// let expr = Reference::new("a").less_than(Datum::long(10));
+    ///
+    /// assert_eq!(&format!("{expr}"), "a < 10");
+    /// ```
+    pub fn less_than(self, datum: Datum) -> Predicate {
+        Predicate::Binary(BinaryExpression::new(
+            PredicateOperator::LessThan,
+            self,
+            datum,
+        ))
+    }
+}
+
+impl Display for Reference {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.name)
+    }
+}
+
 /// A named reference in a bound expression after binding to a schema.
+#[derive(Debug, Clone)]
 pub struct BoundReference {
+    // This maybe different from [`name`] filed in [`NestedField`] since this 
contains full path.
+    // For example, if the field is `a.b.c`, then `field.name` is `c`, but 
`original_name` is `a.b.c`.
+    column_name: String,
     field: NestedFieldRef,
 }
 
+impl BoundReference {
+    /// Creates a new bound reference.
+    pub fn new(name: impl Into<String>, field: NestedFieldRef) -> Self {
+        Self {
+            column_name: name.into(),
+            field,
+        }
+    }
+
+    /// Return the field of this reference.
+    pub fn field(&self) -> &NestedField {
+        &self.field
+    }
+}
+
+impl Display for BoundReference {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.column_name)
+    }
+}
+
 /// Bound term after binding to a schema.
 pub type BoundTerm = BoundReference;
diff --git a/crates/iceberg/src/spec/values.rs 
b/crates/iceberg/src/spec/values.rs
index a8a748d..688bce4 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -19,6 +19,7 @@
  * Value in iceberg
  */
 
+use std::fmt::{Display, Formatter};
 use std::str::FromStr;
 use std::{any::Any, collections::BTreeMap};
 
@@ -31,12 +32,20 @@ use serde_bytes::ByteBuf;
 use serde_json::{Map as JsonMap, Number, Value as JsonValue};
 use uuid::Uuid;
 
-use crate::{Error, ErrorKind};
+use crate::{ensure_data_valid, Error, ErrorKind};
 
 use super::datatypes::{PrimitiveType, Type};
 
+use crate::spec::values::date::{date_from_naive_date, days_to_date, 
unix_epoch};
+use crate::spec::values::time::microseconds_to_time;
+use crate::spec::values::timestamp::microseconds_to_datetime;
+use crate::spec::values::timestamptz::microseconds_to_datetimetz;
+use crate::spec::MAX_DECIMAL_PRECISION;
 pub use _serde::RawLiteral;
 
+/// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23 
hours 59 minutes 59 seconds 999999 microseconds.
+const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1;
+
 /// Values present in iceberg type
 #[derive(Clone, Debug, PartialEq, Hash, Eq, PartialOrd, Ord)]
 pub enum PrimitiveLiteral {
@@ -70,6 +79,587 @@ pub enum PrimitiveLiteral {
     Decimal(i128),
 }
 
+/// Literal associated with its type. The value and type pair is checked when 
construction, so the type and value is
+/// guaranteed to be correct when used.
+///
+/// By default, we decouple the type and value of a literal, so we can use 
avoid the cost of storing extra type info
+/// for each literal. But associate type with literal can be useful in some 
cases, for example, in unbound expression.
+#[derive(Debug)]
+pub struct Datum {
+    r#type: PrimitiveType,
+    literal: PrimitiveLiteral,
+}
+
+impl Display for Datum {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match (&self.r#type, &self.literal) {
+            (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Int(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Long(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Float(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Double(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Date(val)) => write!(f, "{}", 
days_to_date(*val)),
+            (_, PrimitiveLiteral::Time(val)) => write!(f, "{}", 
microseconds_to_time(*val)),
+            (_, PrimitiveLiteral::Timestamp(val)) => {
+                write!(f, "{}", microseconds_to_datetime(*val))
+            }
+            (_, PrimitiveLiteral::TimestampTZ(val)) => {
+                write!(f, "{}", microseconds_to_datetimetz(*val))
+            }
+            (_, PrimitiveLiteral::String(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::UUID(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f),
+            (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
+            (
+                PrimitiveType::Decimal {
+                    precision: _,
+                    scale,
+                },
+                PrimitiveLiteral::Decimal(val),
+            ) => {
+                write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale))
+            }
+            (_, _) => {
+                unreachable!()
+            }
+        }
+    }
+}
+
+fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
+    let mut s = String::with_capacity(bytes.len() * 2);
+    for b in bytes {
+        s.push_str(&format!("{:02X}", b));
+    }
+    f.write_str(&s)
+}
+
+impl From<Datum> for Literal {
+    fn from(value: Datum) -> Self {
+        Literal::Primitive(value.literal)
+    }
+}
+
+impl Datum {
+    /// Creates a boolean value.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::bool(true);
+    ///
+    /// assert_eq!(format!("{}", t), "true".to_string());
+    /// assert_eq!(Literal::from(t), 
Literal::Primitive(PrimitiveLiteral::Boolean(true)));
+    /// ```
+    pub fn bool<T: Into<bool>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Boolean,
+            literal: PrimitiveLiteral::Boolean(t.into()),
+        }
+    }
+
+    /// Creates a boolean value from string.
+    /// See [Parse bool from 
str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool)
 for reference.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::bool_from_str("false").unwrap();
+    ///
+    /// assert_eq!(&format!("{}", t), "false");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Boolean(false)), 
t.into());
+    /// ```
+    pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let v = s.as_ref().parse::<bool>().map_err(|e| {
+            Error::new(ErrorKind::DataInvalid, "Can't parse string to 
bool.").with_source(e)
+        })?;
+        Ok(Self::bool(v))
+    }
+
+    /// Creates an 32bit integer.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::int(23i8);
+    ///
+    /// assert_eq!(&format!("{}", t), "23");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
+    /// ```
+    pub fn int<T: Into<i32>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Int,
+            literal: PrimitiveLiteral::Int(t.into()),
+        }
+    }
+
+    /// Creates an 64bit integer.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::long(24i8);
+    ///
+    /// assert_eq!(&format!("{t}"), "24");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
+    /// ```
+    pub fn long<T: Into<i64>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Long,
+            literal: PrimitiveLiteral::Long(t.into()),
+        }
+    }
+
+    /// Creates an 32bit floating point number.
+    ///
+    /// Example:
+    /// ```rust
+    /// use ordered_float::OrderedFloat;
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::float( 32.1f32 );
+    ///
+    /// assert_eq!(&format!("{t}"), "32.1");
+    /// 
assert_eq!(Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))), 
t.into());
+    /// ```
+    pub fn float<T: Into<f32>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Float,
+            literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
+        }
+    }
+
+    /// Creates an 32bit floating point number.
+    ///
+    /// Example:
+    /// ```rust
+    /// use ordered_float::OrderedFloat;
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::double( 32.1f64 );
+    ///
+    /// assert_eq!(&format!("{t}"), "32.1");
+    /// 
assert_eq!(Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))), 
t.into());
+    /// ```
+    pub fn double<T: Into<f64>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Double,
+            literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
+        }
+    }
+
+    /// Creates date literal from number of days from unix epoch directly.
+    ///
+    /// Example:
+    /// ```rust
+    ///
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// // 2 days after 1970-01-01
+    /// let t = Datum::date(2);
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-03");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Date(2)), t.into());
+    /// ```
+    pub fn date(days: i32) -> Self {
+        Self {
+            r#type: PrimitiveType::Date,
+            literal: PrimitiveLiteral::Date(days),
+        }
+    }
+
+    /// Creates a date in `%Y-%m-%d` format, assume in utc timezone.
+    ///
+    /// See [`NaiveDate::from_str`].
+    ///
+    /// Example
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::date_from_str("1970-01-05").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-05");
+    /// assert_eq!(Literal::date(4), t.into());
+    /// ```
+    pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't parse date from string: {}", s.as_ref()),
+            )
+            .with_source(e)
+        })?;
+
+        Ok(Self::date(date_from_naive_date(t)))
+    }
+
+    /// Create a date from calendar date (year, month and day).
+    ///
+    /// See [`NaiveDate::from_ymd_opt`].
+    ///
+    /// Example:
+    ///
+    ///```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-05");
+    /// assert_eq!(Literal::date(4), t.into());
+    /// ```
+    pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
+        let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't create date from year: {year}, month: {month}, 
day: {day}"),
+            )
+        })?;
+
+        Ok(Self::date(date_from_naive_date(t)))
+    }
+
+    /// Creates time in microseconds directly.
+    ///
+    /// It will returns error when it's negative or too large to fit in 24 
hours.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let micro_secs = {
+    ///     1 * 3600 * 1_000_000 + // 1 hour
+    ///     2 * 60 * 1_000_000 +   // 2 minutes
+    ///     1 * 1_000_000 + // 1 second
+    ///     888999  // microseconds
+    ///  };
+    ///
+    /// let t = Datum::time_micros(micro_secs).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+    /// assert_eq!(Literal::time(micro_secs), t.into());
+    ///
+    /// let negative_value = -100;
+    /// assert!(Datum::time_micros(negative_value).is_err());
+    ///
+    /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in 
24 hours.
+    /// assert!(Datum::time_micros(too_large_value).is_err());
+    /// ```
+    pub fn time_micros(value: i64) -> Result<Self> {
+        ensure_data_valid!(
+            (0..=MAX_TIME_VALUE).contains(&value),
+            "Invalid value for Time type: {}",
+            value
+        );
+
+        Ok(Self {
+            r#type: PrimitiveType::Time,
+            literal: PrimitiveLiteral::Time(value),
+        })
+    }
+
+    /// Creates time literal from [`chrono::NaiveTime`].
+    fn time_from_naive_time(t: NaiveTime) -> Self {
+        let duration = t - unix_epoch().time();
+        // It's safe to unwrap here since less than 24 hours will never 
overflow.
+        let micro_secs = duration.num_microseconds().unwrap();
+
+        Self {
+            r#type: PrimitiveType::Time,
+            literal: PrimitiveLiteral::Time(micro_secs),
+        }
+    }
+
+    /// Creates time in microseconds in `%H:%M:%S:.f` format.
+    ///
+    /// See [`NaiveTime::from_str`] for details.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+    /// ```
+    pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't parse time from string: {}", s.as_ref()),
+            )
+            .with_source(e)
+        })?;
+
+        Ok(Self::time_from_naive_time(t))
+    }
+
+    /// Creates time literal from hour, minute, second, and microseconds.
+    ///
+    /// See [`NaiveTime::from_hms_micro_opt`].
+    ///
+    /// Example:
+    /// ```rust
+    ///
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "22:15:33.000111");
+    /// ```
+    pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> 
Result<Self> {
+        let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
+            .ok_or_else(|| Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't create time from hour: {hour}, min: {min}, 
second: {sec}, microsecond: {micro}"),
+            ))?;
+        Ok(Self::time_from_naive_time(t))
+    }
+
+    /// Creates a timestamp from unix epoch in microseconds.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamp_micros(1000);
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
+    /// ```
+    pub fn timestamp_micros(value: i64) -> Self {
+        Self {
+            r#type: PrimitiveType::Timestamp,
+            literal: PrimitiveLiteral::Timestamp(value),
+        }
+    }
+
+    /// Creates a timestamp from [`DateTime`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use chrono::{NaiveDate, NaiveDateTime, TimeZone, Utc};
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamp_from_datetime(
+    ///     NaiveDate::from_ymd_opt(1992, 3, 1)
+    ///         .unwrap()
+    ///         .and_hms_micro_opt(1, 2, 3, 88)
+    ///         .unwrap());
+    ///
+    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
+    /// ```
+    pub fn timestamp_from_datetime(dt: NaiveDateTime) -> Self {
+        Self::timestamp_micros(dt.timestamp_micros())
+    }
+
+    /// Parse a timestamp in [`%Y-%m-%dT%H:%M:%S%.f`] format.
+    ///
+    /// See [`NaiveDateTime::from_str`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, 
NaiveTime};
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = 
Datum::timestamp_from_str("1992-03-01T01:02:03.000088").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
+    /// ```
+    pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let dt = s.as_ref().parse::<NaiveDateTime>().map_err(|e| {
+            Error::new(ErrorKind::DataInvalid, "Can't parse 
timestamp.").with_source(e)
+        })?;
+
+        Ok(Self::timestamp_from_datetime(dt))
+    }
+
+    /// Creates a timestamp with timezone from unix epoch in microseconds.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamptz_micros(1000);
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001 UTC");
+    /// ```
+    pub fn timestamptz_micros(value: i64) -> Self {
+        Self {
+            r#type: PrimitiveType::Timestamptz,
+            literal: PrimitiveLiteral::TimestampTZ(value),
+        }
+    }
+
+    /// Creates a timestamp with timezone from [`DateTime`].
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use chrono::{TimeZone, Utc};
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamptz_from_datetime(Utc.timestamp_opt(1000, 
0).unwrap());
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40 UTC");
+    /// ```
+    pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
+        Self::timestamptz_micros(dt.with_timezone(&Utc).timestamp_micros())
+    }
+
+    /// Parse timestamp with timezone in RFC3339 format.
+    ///
+    /// See [`DateTime::from_str`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, 
NaiveTime};
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = 
Datum::timestamptz_from_str("1992-03-01T01:02:03.000088+08:00").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1992-02-29 17:02:03.000088 UTC");
+    /// ```
+    pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
+            Error::new(ErrorKind::DataInvalid, "Can't parse 
datetime.").with_source(e)
+        })?;
+
+        Ok(Self::timestamptz_from_datetime(dt))
+    }
+
+    /// Creates a string literal.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::string("ss");
+    ///
+    /// assert_eq!(&format!("{t}"), "ss");
+    /// ```
+    pub fn string<S: ToString>(s: S) -> Self {
+        Self {
+            r#type: PrimitiveType::String,
+            literal: PrimitiveLiteral::String(s.to_string()),
+        }
+    }
+
+    /// Creates uuid literal.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use uuid::uuid;
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::uuid(uuid!("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"));
+    ///
+    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
+    /// ```
+    pub fn uuid(uuid: Uuid) -> Self {
+        Self {
+            r#type: PrimitiveType::Uuid,
+            literal: PrimitiveLiteral::UUID(uuid),
+        }
+    }
+
+    /// Creates uuid from str. See [`Uuid::parse_str`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::{Datum};
+    /// let t = 
Datum::uuid_from_str("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
+    /// ```
+    pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let uuid = Uuid::parse_str(s.as_ref()).map_err(|e| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't parse uuid from string: {}", s.as_ref()),
+            )
+            .with_source(e)
+        })?;
+        Ok(Self::uuid(uuid))
+    }
+
+    /// Creates a fixed literal from bytes.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::fixed(vec![1u8, 2u8]);
+    ///
+    /// assert_eq!(&format!("{t}"), "0102");
+    /// ```
+    pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
+        let value: Vec<u8> = input.into_iter().collect();
+        Self {
+            r#type: PrimitiveType::Fixed(value.len() as u64),
+            literal: PrimitiveLiteral::Fixed(value),
+        }
+    }
+
+    /// Creates a binary literal from bytes.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::binary(vec![1u8, 100u8]);
+    ///
+    /// assert_eq!(&format!("{t}"), "0164");
+    /// ```
+    pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
+        Self {
+            r#type: PrimitiveType::Binary,
+            literal: PrimitiveLiteral::Binary(input.into_iter().collect()),
+        }
+    }
+
+    /// Creates decimal literal from string. See [`Decimal::from_str_exact`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use itertools::assert_equal;
+    /// use rust_decimal::Decimal;
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::decimal_from_str("123.45").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "123.45");
+    /// ```
+    pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let decimal = Decimal::from_str_exact(s.as_ref()).map_err(|e| {
+            Error::new(ErrorKind::DataInvalid, "Can't parse 
decimal.").with_source(e)
+        })?;
+
+        Self::decimal(decimal)
+    }
+
+    /// Try to create a decimal literal from [`Decimal`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use rust_decimal::Decimal;
+    /// use iceberg::spec::Datum;
+    ///
+    /// let t = Datum::decimal(Decimal::new(123, 2)).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1.23");
+    /// ```
+    pub fn decimal(value: impl Into<Decimal>) -> Result<Self> {
+        let decimal = value.into();
+        let scale = decimal.scale();
+
+        let r#type = Type::decimal(MAX_DECIMAL_PRECISION, scale)?;
+        if let Type::Primitive(p) = r#type {
+            Ok(Self {
+                r#type: p,
+                literal: PrimitiveLiteral::Decimal(decimal.mantissa()),
+            })
+        } else {
+            unreachable!("Decimal type must be primitive.")
+        }
+    }
+}
+
 /// Values present in iceberg type
 #[derive(Clone, Debug, PartialEq, Hash, Eq, PartialOrd, Ord)]
 pub enum Literal {
@@ -174,22 +764,11 @@ impl Literal {
         Self::Primitive(PrimitiveLiteral::Double(OrderedFloat(t.into())))
     }
 
-    /// Returns unix epoch.
-    pub fn unix_epoch() -> DateTime<Utc> {
-        Utc.timestamp_nanos(0)
-    }
-
     /// Creates date literal from number of days from unix epoch directly.
     pub fn date(days: i32) -> Self {
         Self::Primitive(PrimitiveLiteral::Date(days))
     }
 
-    /// Creates date literal from `NaiveDate`, assuming it's utc timezone.
-    fn date_from_naive_date(date: NaiveDate) -> Self {
-        let days = (date - Self::unix_epoch().date_naive()).num_days();
-        Self::date(days as i32)
-    }
-
     /// Creates a date in `%Y-%m-%d` format, assume in utc timezone.
     ///
     /// See [`NaiveDate::from_str`].
@@ -210,7 +789,7 @@ impl Literal {
             .with_source(e)
         })?;
 
-        Ok(Self::date_from_naive_date(t))
+        Ok(Self::date(date_from_naive_date(t)))
     }
 
     /// Create a date from calendar date (year, month and day).
@@ -233,7 +812,7 @@ impl Literal {
             )
         })?;
 
-        Ok(Self::date_from_naive_date(t))
+        Ok(Self::date(date_from_naive_date(t)))
     }
 
     /// Creates time in microseconds directly
@@ -243,7 +822,7 @@ impl Literal {
 
     /// Creates time literal from [`chrono::NaiveTime`].
     fn time_from_naive_time(t: NaiveTime) -> Self {
-        let duration = t - Self::unix_epoch().time();
+        let duration = t - unix_epoch().time();
         // It's safe to unwrap here since less than 24 hours will never 
overflow.
         let micro_secs = duration.num_microseconds().unwrap();
 
@@ -951,7 +1530,7 @@ impl Literal {
 }
 
 mod date {
-    use chrono::{NaiveDate, NaiveDateTime};
+    use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
 
     pub(crate) fn date_to_days(date: &NaiveDate) -> i32 {
         date.signed_duration_since(
@@ -967,6 +1546,16 @@ mod date {
             .unwrap()
             .date()
     }
+
+    /// Returns unix epoch.
+    pub(crate) fn unix_epoch() -> DateTime<Utc> {
+        Utc.timestamp_nanos(0)
+    }
+
+    /// Creates date literal from `NaiveDate`, assuming it's utc timezone.
+    pub(crate) fn date_from_naive_date(date: NaiveDate) -> i32 {
+        (date - unix_epoch().date_naive()).num_days() as i32
+    }
 }
 
 mod time {
@@ -2234,4 +2823,37 @@ mod tests {
     // rust avro can't support to convert any byte-like type to fixed in avro 
now.
     // - uuid ser/de
     // - fixed ser/de
+
+    #[test]
+    fn test_parse_timestamp() {
+        let value = 
Datum::timestamp_from_str("2021-08-01T01:09:00.0899").unwrap();
+        assert_eq!(&format!("{value}"), "2021-08-01 01:09:00.089900");
+
+        let value = Datum::timestamp_from_str("2021-08-01T01:09:00.0899+0800");
+        assert!(value.is_err(), "Parse timestamp with timezone should fail!");
+
+        let value = Datum::timestamp_from_str("dfa");
+        assert!(
+            value.is_err(),
+            "Parse timestamp with invalid input should fail!"
+        );
+    }
+
+    #[test]
+    fn test_parse_timestamptz() {
+        let value = 
Datum::timestamptz_from_str("2021-08-01T09:09:00.0899+0800").unwrap();
+        assert_eq!(&format!("{value}"), "2021-08-01 01:09:00.089900 UTC");
+
+        let value = Datum::timestamptz_from_str("2021-08-01T01:09:00.0899");
+        assert!(
+            value.is_err(),
+            "Parse timestamptz without timezone should fail!"
+        );
+
+        let value = Datum::timestamptz_from_str("dfa");
+        assert!(
+            value.is_err(),
+            "Parse timestamptz with invalid input should fail!"
+        );
+    }
 }


Reply via email to