sdf-jkl commented on code in PR #19722:
URL: https://github.com/apache/datafusion/pull/19722#discussion_r2714030624
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1969,12 +1972,101 @@ impl TreeNodeRewriter for Simplifier<'_> {
}))
}
+ // =======================================
+ // preimage_in_comparison
+ // =======================================
+ //
+ // For case:
+ // date_part('YEAR', expr) op literal
+ //
+ // Background:
+ // Datasources such as Parquet can prune partitions using simple
predicates,
+ // but they cannot do so for complex expressions.
+ // For a complex predicate like `date_part('YEAR', c1) < 2000`,
pruning is not possible.
+ // After rewriting it to `c1 < 2000-01-01`, pruning becomes
feasible.
+ // Rewrites use inclusive lower and exclusive upper bounds when
+ // translating an equality into a range.
+ // NOTE: we only consider immutable UDFs with literal RHS values
and
+ // UDFs that provide both `preimage` and `column_expr`.
+ Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
+ use datafusion_expr::Operator::*;
+ let is_preimage_op = matches!(
+ op,
+ Eq | NotEq
+ | Lt
+ | LtEq
+ | Gt
+ | GtEq
+ | IsDistinctFrom
+ | IsNotDistinctFrom
+ );
+ if !is_preimage_op {
+ return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
+ left,
+ op,
+ right,
+ })));
+ }
+
+ if let (Some(interval), Some(col_expr)) =
+ get_preimage(left.as_ref(), right.as_ref(), info)?
+ {
+ rewrite_with_preimage(info, interval, op,
Box::new(col_expr))?
+ } else if let Some(swapped) = op.swap() {
+ if let (Some(interval), Some(col_expr)) =
+ get_preimage(right.as_ref(), left.as_ref(), info)?
+ {
+ rewrite_with_preimage(
+ info,
+ interval,
+ swapped,
+ Box::new(col_expr),
+ )?
+ } else {
+ Transformed::no(Expr::BinaryExpr(BinaryExpr { left,
op, right }))
+ }
+ } else {
+ Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op,
right }))
+ }
+ }
+
// no additional rewrites possible
expr => Transformed::no(expr),
})
}
}
+fn get_preimage(
+ left_expr: &Expr,
+ right_expr: &Expr,
+ info: &SimplifyContext,
+) -> Result<(Option<Interval>, Option<Expr>)> {
+ let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
+ return Ok((None, None));
+ };
+ if !is_literal_or_literal_cast(right_expr) {
Review Comment:
Do you have an example where we could use a non-literal `expr` on rhs for a
comparison with `preimage`? I can't come up with one, but if there is, we could
move expression matching into `preimage` impl
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]