marvinlanhenke commented on code in PR #309: URL: https://github.com/apache/iceberg-rust/pull/309#discussion_r1553271703
########## crates/iceberg/src/spec/transform.rs: ########## @@ -261,6 +269,323 @@ impl Transform { _ => self == other, } } + + /// Projects a given predicate according to the transformation + /// specified by the `Transform` instance. + /// + /// This allows predicates to be effectively applied to data + /// that has undergone transformation, enabling efficient querying + /// and filtering based on the original, untransformed data. + /// + /// # Example + /// Suppose, we have row filter `a = 10`, and a partition spec + /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition + /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to + /// `bs = bucket(10, 37)` + pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result<Option<Predicate>> { + let func = create_transform_function(self)?; + + match self { + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + expr.literal().to_owned(), + )))), + BoundPredicate::Set(expr) => Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + expr.literals().to_owned(), + )))), + _ => Ok(None), + }, + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => self.project_binary(name, expr, &func), + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + }, + Transform::Truncate(width) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, Some(*width)) + } + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + }, + Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { + match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, None) + } + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + } + } + _ => Ok(None), + } + } + + /// Check if `Transform` is applicable on datum's `PrimitiveType` + fn can_transform(&self, datum: &Datum) -> bool { + let input_type = datum.data_type().clone(); + self.result_type(&Type::Primitive(input_type)).is_ok() + } + + /// Creates a unary predicate from a given operator and a reference name. + fn project_unary(op: PredicateOperator, name: String) -> Result<Option<Predicate>> { + Ok(Some(Predicate::Unary(UnaryExpression::new( + op, + Reference::new(name), + )))) + } + + /// Attempts to create a binary predicate based on a binary expression, + /// if applicable. + /// + /// This method evaluates a given binary expression and, if the operation + /// is equality (`Eq`) and the literal can be transformed, constructs a + /// `Predicate::Binary`variant representing the binary operation. + fn project_binary<T>( + &self, + name: String, + expr: &BinaryExpression<T>, + func: &BoxedTransformFunction, + ) -> Result<Option<Predicate>> { + if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { + return Ok(None); + } + + Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + func.transform_literal_result(expr.literal())?, + )))) + } + + /// Projects a binary expression to a predicate with an adjusted boundary. + /// + /// Checks if the literal within the given binary expression is + /// transformable. If transformable, it proceeds to potentially adjust + /// the boundary of the expression based on the comparison operator (`op`). + /// The potential adjustments involve incrementing or decrementing the + /// literal value and changing the `PredicateOperator` itself to its + /// inclusive variant. + fn project_binary_with_adjusted_boundary<T>( + &self, + name: String, + expr: &BinaryExpression<T>, + func: &BoxedTransformFunction, + width: Option<u32>, + ) -> Result<Option<Predicate>> { + if !self.can_transform(expr.literal()) { + return Ok(None); + } + + let op = &expr.op(); + let datum = &expr.literal(); + + if let Some(boundary) = Self::adjust_boundary(op, datum)? { + let transformed_projection = func.transform_literal_result(&boundary)?; + + let adjusted_projection = + self.adjust_time_projection(op, datum, &transformed_projection); + + let adjusted_operator = Self::adjust_operator(op, datum, width); + + if let Some(op) = adjusted_operator { + let predicate = match adjusted_projection { + None => Predicate::Binary(BinaryExpression::new( + op, + Reference::new(name), + transformed_projection, + )), + Some(AdjustedProjection::Single(d)) => { + Predicate::Binary(BinaryExpression::new(op, Reference::new(name), d)) + } + Some(AdjustedProjection::Set(d)) => Predicate::Set(SetExpression::new( + PredicateOperator::In, + Reference::new(name), + d, + )), + }; + return Ok(Some(predicate)); + } + }; + + Ok(None) + } + + /// Projects a set expression to a predicate, + /// applying a transformation to each literal in the set. + fn project_set<T>( Review Comment: fixed ########## crates/iceberg/src/spec/transform.rs: ########## @@ -261,6 +269,323 @@ impl Transform { _ => self == other, } } + + /// Projects a given predicate according to the transformation + /// specified by the `Transform` instance. + /// + /// This allows predicates to be effectively applied to data + /// that has undergone transformation, enabling efficient querying + /// and filtering based on the original, untransformed data. + /// + /// # Example + /// Suppose, we have row filter `a = 10`, and a partition spec + /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition + /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to + /// `bs = bucket(10, 37)` + pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result<Option<Predicate>> { + let func = create_transform_function(self)?; + + match self { + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + expr.literal().to_owned(), + )))), + BoundPredicate::Set(expr) => Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + expr.literals().to_owned(), + )))), + _ => Ok(None), + }, + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => self.project_binary(name, expr, &func), + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + }, + Transform::Truncate(width) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, Some(*width)) + } + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + }, + Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { + match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, None) + } + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + } + } + _ => Ok(None), + } + } + + /// Check if `Transform` is applicable on datum's `PrimitiveType` + fn can_transform(&self, datum: &Datum) -> bool { + let input_type = datum.data_type().clone(); + self.result_type(&Type::Primitive(input_type)).is_ok() + } + + /// Creates a unary predicate from a given operator and a reference name. + fn project_unary(op: PredicateOperator, name: String) -> Result<Option<Predicate>> { + Ok(Some(Predicate::Unary(UnaryExpression::new( + op, + Reference::new(name), + )))) + } + + /// Attempts to create a binary predicate based on a binary expression, + /// if applicable. + /// + /// This method evaluates a given binary expression and, if the operation + /// is equality (`Eq`) and the literal can be transformed, constructs a + /// `Predicate::Binary`variant representing the binary operation. + fn project_binary<T>( Review Comment: fixed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org