liurenjie1024 commented on code in PR #309:
URL: https://github.com/apache/iceberg-rust/pull/309#discussion_r1547488931


##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {
+                Transform::Identity
+                | Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => 
Some(Predicate::Unary(UnaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                ))),
+                _ => None,
+            },
+            BoundPredicate::Binary(expr) => match self {
+                Transform::Identity => 
Some(Predicate::Binary(BinaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literal().to_owned(),
+                ))),
+                Transform::Bucket(_) => {
+                    if expr.op() != PredicateOperator::Eq || 
!self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Binary(BinaryExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        func.transform_literal_result(expr.literal())?,
+                    )))
+                }
+                Transform::Truncate(width) => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        Some(*width),
+                    )?
+                }
+                Transform::Year | Transform::Month | Transform::Day | 
Transform::Hour => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        None,
+                    )?
+                }
+                _ => None,
+            },
+            BoundPredicate::Set(expr) => match self {
+                Transform::Identity => Some(Predicate::Set(SetExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literals().to_owned(),
+                ))),
+                Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => {
+                    if expr.op() != PredicateOperator::In
+                        || expr.literals().iter().any(|d| 
!self.can_transform(d))
+                    {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Set(SetExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        self.transform_set(expr.literals(), &func)?,
+                    )))
+                }
+                _ => None,
+            },
+            _ => None,
+        };
+
+        Ok(projection)
+    }
+
+    /// Check if `Transform` is applicable on datum's `PrimitiveType`
+    fn can_transform(&self, datum: &Datum) -> bool {
+        let input_type = datum.data_type().clone();
+        self.result_type(&Type::Primitive(input_type)).is_ok()
+    }
+
+    /// Transform each literal value of `FnvHashSet<Datum>`
+    fn transform_set(
+        &self,
+        literals: &FnvHashSet<Datum>,
+        func: &BoxedTransformFunction,
+    ) -> Result<FnvHashSet<Datum>> {
+        let mut new_set = FnvHashSet::default();
+
+        for lit in literals {
+            let datum = func.transform_literal_result(lit)?;
+
+            if let Some(AdjustedProjection::Single(d)) =
+                self.adjust_projection(&PredicateOperator::In, lit, &datum)
+            {
+                new_set.insert(d);
+            };
+
+            new_set.insert(datum);
+        }
+
+        Ok(new_set)
+    }
+
+    /// Apply transform on `Datum` with adjusted boundaries.
+    /// Returns Predicate with projection and possibly
+    /// rewritten `PredicateOperator`
+    fn transform_projected_boundary(
+        &self,

Review Comment:
   ```suggestion
   ```
   Ditto



##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {
+                Transform::Identity
+                | Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => 
Some(Predicate::Unary(UnaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                ))),
+                _ => None,
+            },
+            BoundPredicate::Binary(expr) => match self {
+                Transform::Identity => 
Some(Predicate::Binary(BinaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literal().to_owned(),
+                ))),
+                Transform::Bucket(_) => {
+                    if expr.op() != PredicateOperator::Eq || 
!self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Binary(BinaryExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        func.transform_literal_result(expr.literal())?,
+                    )))
+                }
+                Transform::Truncate(width) => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        Some(*width),
+                    )?
+                }
+                Transform::Year | Transform::Month | Transform::Day | 
Transform::Hour => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        None,
+                    )?
+                }
+                _ => None,
+            },
+            BoundPredicate::Set(expr) => match self {
+                Transform::Identity => Some(Predicate::Set(SetExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literals().to_owned(),
+                ))),
+                Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => {
+                    if expr.op() != PredicateOperator::In
+                        || expr.literals().iter().any(|d| 
!self.can_transform(d))
+                    {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Set(SetExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        self.transform_set(expr.literals(), &func)?,
+                    )))
+                }
+                _ => None,
+            },
+            _ => None,
+        };
+
+        Ok(projection)
+    }
+
+    /// Check if `Transform` is applicable on datum's `PrimitiveType`
+    fn can_transform(&self, datum: &Datum) -> bool {
+        let input_type = datum.data_type().clone();
+        self.result_type(&Type::Primitive(input_type)).is_ok()
+    }
+
+    /// Transform each literal value of `FnvHashSet<Datum>`
+    fn transform_set(
+        &self,
+        literals: &FnvHashSet<Datum>,
+        func: &BoxedTransformFunction,
+    ) -> Result<FnvHashSet<Datum>> {
+        let mut new_set = FnvHashSet::default();
+
+        for lit in literals {
+            let datum = func.transform_literal_result(lit)?;
+
+            if let Some(AdjustedProjection::Single(d)) =
+                self.adjust_projection(&PredicateOperator::In, lit, &datum)
+            {
+                new_set.insert(d);
+            };
+
+            new_set.insert(datum);
+        }
+
+        Ok(new_set)
+    }
+
+    /// Apply transform on `Datum` with adjusted boundaries.
+    /// Returns Predicate with projection and possibly
+    /// rewritten `PredicateOperator`
+    fn transform_projected_boundary(
+        &self,
+        name: String,
+        datum: &Datum,
+        op: &PredicateOperator,
+        func: &BoxedTransformFunction,
+        width: Option<u32>,
+    ) -> Result<Option<Predicate>> {
+        if let Some(boundary) = self.projected_boundary(op, datum)? {
+            let transformed = func.transform_literal_result(&boundary)?;
+            let adjusted = self.adjust_projection(op, datum, &transformed);
+            let op = self.projected_operator(op, datum, width);
+
+            if let Some(op) = op {
+                let predicate = match adjusted {
+                    None => Predicate::Binary(BinaryExpression::new(
+                        op,
+                        Reference::new(name),
+                        transformed,
+                    )),
+                    Some(AdjustedProjection::Single(d)) => {
+                        Predicate::Binary(BinaryExpression::new(op, 
Reference::new(name), d))
+                    }
+                    Some(AdjustedProjection::Set(d)) => 
Predicate::Set(SetExpression::new(
+                        PredicateOperator::In,
+                        Reference::new(name),
+                        d,
+                    )),
+                };
+                return Ok(Some(predicate));
+            }
+        };
+
+        Ok(None)
+    }
+
+    /// Create a new `Datum` with adjusted projection boundary.
+    /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral`
+    /// can not be projected
+    fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> 
Result<Option<Datum>> {

Review Comment:
   ```suggestion
       fn projected_boundary(op: &PredicateOperator, datum: &Datum) -> 
Result<Option<Datum>> {
   ```
   We don't need `self` here?



##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {

Review Comment:
   Would you mind to rewrite this as following:
   ```rust
   match self {
      Transform::Identity => {
        match predicate => {
          BoundPredicate::Unary(expr) => { ... }
          BoundPredicate::Binary(expr) => {...}
       }
      }
   }
   ```
   
   I know the results are same, but rewrite it in this approach makes it easier 
to read, and do check against java implemention, since they are organized by 
transfrom in feach file.



##########
crates/iceberg/src/transform/temporal.rs:
##########
@@ -269,11 +294,1995 @@ mod test {
     use chrono::{NaiveDate, NaiveDateTime};
     use std::sync::Arc;
 
+    use crate::spec::PrimitiveType::{
+        Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, 
Timestamp,
+        Timestamptz, Uuid,
+    };
+    use crate::spec::StructType;
+    use crate::spec::Type::{Primitive, Struct};
+
+    use crate::transform::test::TestTransformFixture;
     use crate::{
-        spec::Datum,
-        transform::{BoxedTransformFunction, TransformFunction},
+        expr::PredicateOperator,
+        spec::{Datum, NestedField, PrimitiveType, Transform, Type},
+        transform::{test::TestProjectionFixture, BoxedTransformFunction, 
TransformFunction},
+        Result,
     };
 
+    #[test]
+    fn test_year_transform() {
+        let trans = Transform::Year;
+
+        let fixture = TestTransformFixture {
+            display: "year".to_string(),
+            json: r#""year""#.to_string(),
+            dedup_name: "time".to_string(),
+            preserves_order: true,
+            satisfies_order_of: vec![
+                (Transform::Year, true),
+                (Transform::Month, false),
+                (Transform::Day, false),
+                (Transform::Hour, false),
+                (Transform::Void, false),
+                (Transform::Identity, false),
+            ],
+            trans_types: vec![
+                (Primitive(Binary), None),
+                (Primitive(Date), Some(Primitive(Int))),
+                (
+                    Primitive(Decimal {
+                        precision: 8,
+                        scale: 5,
+                    }),
+                    None,
+                ),
+                (Primitive(Fixed(8)), None),
+                (Primitive(Int), None),
+                (Primitive(Long), None),
+                (Primitive(StringType), None),
+                (Primitive(Uuid), None),
+                (Primitive(Time), None),
+                (Primitive(Timestamp), Some(Primitive(Int))),
+                (Primitive(Timestamptz), Some(Primitive(Int))),
+                (
+                    Struct(StructType::new(vec![NestedField::optional(
+                        1,
+                        "a",
+                        Primitive(Timestamp),
+                    )
+                    .into()])),
+                    None,
+                ),
+            ],
+        };
+
+        fixture.assert_transform(trans);
+    }
+
+    #[test]
+    fn test_month_transform() {
+        let trans = Transform::Month;
+
+        let fixture = TestTransformFixture {
+            display: "month".to_string(),
+            json: r#""month""#.to_string(),
+            dedup_name: "time".to_string(),
+            preserves_order: true,
+            satisfies_order_of: vec![
+                (Transform::Year, true),
+                (Transform::Month, true),
+                (Transform::Day, false),
+                (Transform::Hour, false),
+                (Transform::Void, false),
+                (Transform::Identity, false),
+            ],
+            trans_types: vec![
+                (Primitive(Binary), None),
+                (Primitive(Date), Some(Primitive(Int))),
+                (
+                    Primitive(Decimal {
+                        precision: 8,
+                        scale: 5,
+                    }),
+                    None,
+                ),
+                (Primitive(Fixed(8)), None),
+                (Primitive(Int), None),
+                (Primitive(Long), None),
+                (Primitive(StringType), None),
+                (Primitive(Uuid), None),
+                (Primitive(Time), None),
+                (Primitive(Timestamp), Some(Primitive(Int))),
+                (Primitive(Timestamptz), Some(Primitive(Int))),
+                (
+                    Struct(StructType::new(vec![NestedField::optional(
+                        1,
+                        "a",
+                        Primitive(Timestamp),
+                    )
+                    .into()])),
+                    None,
+                ),
+            ],
+        };
+
+        fixture.assert_transform(trans);
+    }
+
+    #[test]
+    fn test_day_transform() {
+        let trans = Transform::Day;
+
+        let fixture = TestTransformFixture {
+            display: "day".to_string(),
+            json: r#""day""#.to_string(),
+            dedup_name: "time".to_string(),
+            preserves_order: true,
+            satisfies_order_of: vec![
+                (Transform::Year, true),
+                (Transform::Month, true),
+                (Transform::Day, true),
+                (Transform::Hour, false),
+                (Transform::Void, false),
+                (Transform::Identity, false),
+            ],
+            trans_types: vec![
+                (Primitive(Binary), None),
+                (Primitive(Date), Some(Primitive(Int))),
+                (
+                    Primitive(Decimal {
+                        precision: 8,
+                        scale: 5,
+                    }),
+                    None,
+                ),
+                (Primitive(Fixed(8)), None),
+                (Primitive(Int), None),
+                (Primitive(Long), None),
+                (Primitive(StringType), None),
+                (Primitive(Uuid), None),
+                (Primitive(Time), None),
+                (Primitive(Timestamp), Some(Primitive(Int))),
+                (Primitive(Timestamptz), Some(Primitive(Int))),
+                (
+                    Struct(StructType::new(vec![NestedField::optional(
+                        1,
+                        "a",
+                        Primitive(Timestamp),
+                    )
+                    .into()])),
+                    None,
+                ),
+            ],
+        };
+
+        fixture.assert_transform(trans);
+    }
+
+    #[test]
+    fn test_hour_transform() {
+        let trans = Transform::Hour;
+
+        let fixture = TestTransformFixture {
+            display: "hour".to_string(),
+            json: r#""hour""#.to_string(),
+            dedup_name: "time".to_string(),
+            preserves_order: true,
+            satisfies_order_of: vec![
+                (Transform::Year, true),
+                (Transform::Month, true),
+                (Transform::Day, true),
+                (Transform::Hour, true),
+                (Transform::Void, false),
+                (Transform::Identity, false),
+            ],
+            trans_types: vec![
+                (Primitive(Binary), None),
+                (Primitive(Date), None),
+                (
+                    Primitive(Decimal {
+                        precision: 8,
+                        scale: 5,
+                    }),
+                    None,
+                ),
+                (Primitive(Fixed(8)), None),
+                (Primitive(Int), None),
+                (Primitive(Long), None),
+                (Primitive(StringType), None),
+                (Primitive(Uuid), None),
+                (Primitive(Time), None),
+                (Primitive(Timestamp), Some(Primitive(Int))),
+                (Primitive(Timestamptz), Some(Primitive(Int))),
+                (
+                    Struct(StructType::new(vec![NestedField::optional(
+                        1,
+                        "a",
+                        Primitive(Timestamp),
+                    )
+                    .into()])),
+                    None,
+                ),
+            ],
+        };
+
+        fixture.assert_transform(trans);
+    }
+
+    #[test]
+    fn test_projection_timestamp_hour_upper_bound() -> Result<()> {
+        // 420034
+        let value = "2017-12-01T10:59:59.999999";
+        // 412007
+        let another = "2016-12-31T23:59:59.999999";
+
+        let fixture = TestProjectionFixture::new(
+            Transform::Hour,
+            "name",
+            NestedField::required(1, "value", 
Type::Primitive(PrimitiveType::Timestamp)),
+        );
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(
+                PredicateOperator::LessThan,
+                Datum::timestamp_from_str(value)?,
+            ),
+            Some("name <= 420034"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(
+                PredicateOperator::LessThanOrEq,
+                Datum::timestamp_from_str(value)?,
+            ),
+            Some("name <= 420034"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(
+                PredicateOperator::GreaterThan,
+                Datum::timestamp_from_str(value)?,
+            ),
+            Some("name >= 420035"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(
+                PredicateOperator::GreaterThanOrEq,
+                Datum::timestamp_from_str(value)?,
+            ),
+            Some("name >= 420034"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(PredicateOperator::Eq, 
Datum::timestamp_from_str(value)?),
+            Some("name = 420034"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.binary_predicate(PredicateOperator::NotEq, 
Datum::timestamp_from_str(value)?),
+            None,
+        )?;
+
+        fixture.assert_projection(
+            &fixture.set_predicate(
+                PredicateOperator::In,
+                vec![
+                    Datum::timestamp_from_str(value)?,
+                    Datum::timestamp_from_str(another)?,
+                ],
+            ),
+            Some("name IN (420034, 412007)"),
+        )?;
+
+        fixture.assert_projection(
+            &fixture.set_predicate(
+                PredicateOperator::NotIn,
+                vec![
+                    Datum::timestamp_from_str(value)?,
+                    Datum::timestamp_from_str(another)?,
+                ],
+            ),
+            None,
+        )?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_timestamp_hour_lower_bound() -> Result<()> {

Review Comment:
   Really greatful for these test!



##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {
+                Transform::Identity
+                | Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => 
Some(Predicate::Unary(UnaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                ))),
+                _ => None,
+            },
+            BoundPredicate::Binary(expr) => match self {
+                Transform::Identity => 
Some(Predicate::Binary(BinaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literal().to_owned(),
+                ))),
+                Transform::Bucket(_) => {
+                    if expr.op() != PredicateOperator::Eq || 
!self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Binary(BinaryExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        func.transform_literal_result(expr.literal())?,
+                    )))
+                }
+                Transform::Truncate(width) => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        Some(*width),
+                    )?
+                }
+                Transform::Year | Transform::Month | Transform::Day | 
Transform::Hour => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        None,
+                    )?
+                }
+                _ => None,
+            },
+            BoundPredicate::Set(expr) => match self {
+                Transform::Identity => Some(Predicate::Set(SetExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literals().to_owned(),
+                ))),
+                Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => {
+                    if expr.op() != PredicateOperator::In
+                        || expr.literals().iter().any(|d| 
!self.can_transform(d))
+                    {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Set(SetExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        self.transform_set(expr.literals(), &func)?,
+                    )))
+                }
+                _ => None,
+            },
+            _ => None,
+        };
+
+        Ok(projection)
+    }
+
+    /// Check if `Transform` is applicable on datum's `PrimitiveType`
+    fn can_transform(&self, datum: &Datum) -> bool {
+        let input_type = datum.data_type().clone();
+        self.result_type(&Type::Primitive(input_type)).is_ok()
+    }
+
+    /// Transform each literal value of `FnvHashSet<Datum>`
+    fn transform_set(
+        &self,
+        literals: &FnvHashSet<Datum>,
+        func: &BoxedTransformFunction,
+    ) -> Result<FnvHashSet<Datum>> {
+        let mut new_set = FnvHashSet::default();
+
+        for lit in literals {
+            let datum = func.transform_literal_result(lit)?;
+
+            if let Some(AdjustedProjection::Single(d)) =
+                self.adjust_projection(&PredicateOperator::In, lit, &datum)
+            {
+                new_set.insert(d);
+            };
+
+            new_set.insert(datum);
+        }
+
+        Ok(new_set)
+    }
+
+    /// Apply transform on `Datum` with adjusted boundaries.
+    /// Returns Predicate with projection and possibly
+    /// rewritten `PredicateOperator`
+    fn transform_projected_boundary(
+        &self,
+        name: String,
+        datum: &Datum,
+        op: &PredicateOperator,
+        func: &BoxedTransformFunction,
+        width: Option<u32>,
+    ) -> Result<Option<Predicate>> {
+        if let Some(boundary) = self.projected_boundary(op, datum)? {
+            let transformed = func.transform_literal_result(&boundary)?;
+            let adjusted = self.adjust_projection(op, datum, &transformed);
+            let op = self.projected_operator(op, datum, width);
+
+            if let Some(op) = op {
+                let predicate = match adjusted {
+                    None => Predicate::Binary(BinaryExpression::new(
+                        op,
+                        Reference::new(name),
+                        transformed,
+                    )),
+                    Some(AdjustedProjection::Single(d)) => {
+                        Predicate::Binary(BinaryExpression::new(op, 
Reference::new(name), d))
+                    }
+                    Some(AdjustedProjection::Set(d)) => 
Predicate::Set(SetExpression::new(
+                        PredicateOperator::In,
+                        Reference::new(name),
+                        d,
+                    )),
+                };
+                return Ok(Some(predicate));
+            }
+        };
+
+        Ok(None)
+    }
+
+    /// Create a new `Datum` with adjusted projection boundary.
+    /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral`
+    /// can not be projected
+    fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> 
Result<Option<Datum>> {
+        let literal = datum.literal();
+
+        let projected_boundary = match op {
+            PredicateOperator::LessThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v - 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::GreaterThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v + 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::Eq
+            | PredicateOperator::LessThanOrEq
+            | PredicateOperator::GreaterThanOrEq
+            | PredicateOperator::StartsWith
+            | PredicateOperator::NotStartsWith => Some(datum.to_owned()),
+            _ => None,
+        };
+
+        Ok(projected_boundary)
+    }
+
+    /// Create a new `PredicateOperator`, rewritten for projection
+    fn projected_operator(
+        &self,

Review Comment:
   ```suggestion
   ```
   Ditto.



##########
crates/iceberg/src/spec/transform.rs:
##########
@@ -261,6 +269,300 @@ impl Transform {
             _ => self == other,
         }
     }
+
+    /// Projects a given predicate according to the transformation
+    /// specified by the `Transform` instance.
+    ///
+    /// This allows predicates to be effectively applied to data
+    /// that has undergone transformation, enabling efficient querying
+    /// and filtering based on the original, untransformed data.
+    ///
+    /// # Example
+    /// Suppose, we have row filter `a = 10`, and a partition spec
+    /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition
+    /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to
+    /// `bs = bucket(10, 37)`
+    pub fn project(&self, name: String, predicate: &BoundPredicate) -> 
Result<Option<Predicate>> {
+        let func = create_transform_function(self)?;
+
+        let projection = match predicate {
+            BoundPredicate::Unary(expr) => match self {
+                Transform::Identity
+                | Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => 
Some(Predicate::Unary(UnaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                ))),
+                _ => None,
+            },
+            BoundPredicate::Binary(expr) => match self {
+                Transform::Identity => 
Some(Predicate::Binary(BinaryExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literal().to_owned(),
+                ))),
+                Transform::Bucket(_) => {
+                    if expr.op() != PredicateOperator::Eq || 
!self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Binary(BinaryExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        func.transform_literal_result(expr.literal())?,
+                    )))
+                }
+                Transform::Truncate(width) => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        Some(*width),
+                    )?
+                }
+                Transform::Year | Transform::Month | Transform::Day | 
Transform::Hour => {
+                    if !self.can_transform(expr.literal()) {
+                        return Ok(None);
+                    }
+
+                    self.transform_projected_boundary(
+                        name,
+                        expr.literal(),
+                        &expr.op(),
+                        &func,
+                        None,
+                    )?
+                }
+                _ => None,
+            },
+            BoundPredicate::Set(expr) => match self {
+                Transform::Identity => Some(Predicate::Set(SetExpression::new(
+                    expr.op(),
+                    Reference::new(name),
+                    expr.literals().to_owned(),
+                ))),
+                Transform::Bucket(_)
+                | Transform::Truncate(_)
+                | Transform::Year
+                | Transform::Month
+                | Transform::Day
+                | Transform::Hour => {
+                    if expr.op() != PredicateOperator::In
+                        || expr.literals().iter().any(|d| 
!self.can_transform(d))
+                    {
+                        return Ok(None);
+                    }
+
+                    Some(Predicate::Set(SetExpression::new(
+                        expr.op(),
+                        Reference::new(name),
+                        self.transform_set(expr.literals(), &func)?,
+                    )))
+                }
+                _ => None,
+            },
+            _ => None,
+        };
+
+        Ok(projection)
+    }
+
+    /// Check if `Transform` is applicable on datum's `PrimitiveType`
+    fn can_transform(&self, datum: &Datum) -> bool {
+        let input_type = datum.data_type().clone();
+        self.result_type(&Type::Primitive(input_type)).is_ok()
+    }
+
+    /// Transform each literal value of `FnvHashSet<Datum>`
+    fn transform_set(
+        &self,
+        literals: &FnvHashSet<Datum>,
+        func: &BoxedTransformFunction,
+    ) -> Result<FnvHashSet<Datum>> {
+        let mut new_set = FnvHashSet::default();
+
+        for lit in literals {
+            let datum = func.transform_literal_result(lit)?;
+
+            if let Some(AdjustedProjection::Single(d)) =
+                self.adjust_projection(&PredicateOperator::In, lit, &datum)
+            {
+                new_set.insert(d);
+            };
+
+            new_set.insert(datum);
+        }
+
+        Ok(new_set)
+    }
+
+    /// Apply transform on `Datum` with adjusted boundaries.
+    /// Returns Predicate with projection and possibly
+    /// rewritten `PredicateOperator`
+    fn transform_projected_boundary(
+        &self,
+        name: String,
+        datum: &Datum,
+        op: &PredicateOperator,
+        func: &BoxedTransformFunction,
+        width: Option<u32>,
+    ) -> Result<Option<Predicate>> {
+        if let Some(boundary) = self.projected_boundary(op, datum)? {
+            let transformed = func.transform_literal_result(&boundary)?;
+            let adjusted = self.adjust_projection(op, datum, &transformed);
+            let op = self.projected_operator(op, datum, width);
+
+            if let Some(op) = op {
+                let predicate = match adjusted {
+                    None => Predicate::Binary(BinaryExpression::new(
+                        op,
+                        Reference::new(name),
+                        transformed,
+                    )),
+                    Some(AdjustedProjection::Single(d)) => {
+                        Predicate::Binary(BinaryExpression::new(op, 
Reference::new(name), d))
+                    }
+                    Some(AdjustedProjection::Set(d)) => 
Predicate::Set(SetExpression::new(
+                        PredicateOperator::In,
+                        Reference::new(name),
+                        d,
+                    )),
+                };
+                return Ok(Some(predicate));
+            }
+        };
+
+        Ok(None)
+    }
+
+    /// Create a new `Datum` with adjusted projection boundary.
+    /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral`
+    /// can not be projected
+    fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> 
Result<Option<Datum>> {
+        let literal = datum.literal();
+
+        let projected_boundary = match op {
+            PredicateOperator::LessThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v - 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::GreaterThan => match literal {
+                PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)),
+                PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)),
+                PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?),
+                PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)),
+                PrimitiveLiteral::Timestamp(v) => 
Some(Datum::timestamp_micros(v + 1)),
+                _ => Some(datum.to_owned()),
+            },
+            PredicateOperator::Eq
+            | PredicateOperator::LessThanOrEq
+            | PredicateOperator::GreaterThanOrEq
+            | PredicateOperator::StartsWith
+            | PredicateOperator::NotStartsWith => Some(datum.to_owned()),
+            _ => None,
+        };
+
+        Ok(projected_boundary)
+    }
+
+    /// Create a new `PredicateOperator`, rewritten for projection
+    fn projected_operator(

Review Comment:
   ```suggestion
       fn projected_operator_for_truancate(
   ```
   It seems that this only applies to truncate transform?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org


Reply via email to