pepijnve commented on code in PR #18821:
URL: https://github.com/apache/datafusion/pull/18821#discussion_r2542953009


##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -48,199 +34,282 @@ impl<'a> GuaranteeRewriter<'a> {
         guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
     ) -> Self {
         Self {
-            // TODO: Clippy wants the "map" call removed, but doing so 
generates
-            //       a compilation error. Remove the clippy directive once this
-            //       issue is fixed.
-            #[allow(clippy::map_identity)]
             guarantees: guarantees.into_iter().map(|(k, v)| (k, v)).collect(),
         }
     }
 }
 
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval] that represents the known 
possible
+/// values of the expression.
+///
+/// Rewriting expressions using this type of guarantee can make the work of 
other expression
+/// simplifications, like const evaluation, easier.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+///
+/// If the set of guarantees will be used to rewrite more than one expression, 
consider using
+/// [rewrite_with_guarantees_map] instead.
+///
+/// A full example of using this rewrite rule can be found in
+/// 
[`ExprSimplifier::with_guarantees()`](https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html#method.with_guarantees).
+pub fn rewrite_with_guarantees<'a>(
+    expr: Expr,
+    guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
+) -> Result<Transformed<Expr>> {
+    let guarantees_map: HashMap<&Expr, &NullableInterval> =
+        guarantees.into_iter().map(|(k, v)| (k, v)).collect();
+    rewrite_with_guarantees_map(expr, &guarantees_map)
+}
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval]. The interval represents the 
known
+/// possible values of the column.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+pub fn rewrite_with_guarantees_map<'a>(
+    expr: Expr,
+    guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    expr.transform_up(|e| rewrite_expr(e, guarantees))
+}
+
 impl TreeNodeRewriter for GuaranteeRewriter<'_> {
     type Node = Expr;
 
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
-        if self.guarantees.is_empty() {
-            return Ok(Transformed::no(expr));
-        }
+        rewrite_expr(expr, &self.guarantees)
+    }
+}
 
-        match &expr {
-            Expr::IsNull(inner) => match self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => 
Ok(Transformed::yes(lit(true))),
-                Some(NullableInterval::NotNull { .. }) => {
-                    Ok(Transformed::yes(lit(false)))
-                }
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::IsNotNull(inner) => match 
self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => 
Ok(Transformed::yes(lit(false))),
-                Some(NullableInterval::NotNull { .. }) => 
Ok(Transformed::yes(lit(true))),
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::Between(Between {
-                expr: inner,
-                negated,
-                low,
-                high,
-            }) => {
-                if let (Some(interval), Expr::Literal(low, _), 
Expr::Literal(high, _)) = (
-                    self.guarantees.get(inner.as_ref()),
-                    low.as_ref(),
-                    high.as_ref(),
-                ) {
-                    let expr_interval = NullableInterval::NotNull {
-                        values: Interval::try_new(low.clone(), high.clone())?,
-                    };
-
-                    let contains = expr_interval.contains(*interval)?;
-
-                    if contains.is_certainly_true() {
-                        Ok(Transformed::yes(lit(!negated)))
-                    } else if contains.is_certainly_false() {
-                        Ok(Transformed::yes(lit(*negated)))
-                    } else {
-                        Ok(Transformed::no(expr))
-                    }
-                } else {
-                    Ok(Transformed::no(expr))
-                }
-            }
+fn rewrite_expr(
+    expr: Expr,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    if guarantees.is_empty() {
+        return Ok(Transformed::no(expr));
+    }
 
-            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-                // The left or right side of expression might either have a 
guarantee
-                // or be a literal. Either way, we can resolve them to a 
NullableInterval.
-                let left_interval = self
-                    .guarantees
-                    .get(left.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = left.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-                let right_interval = self
-                    .guarantees
-                    .get(right.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = right.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-
-                match (left_interval, right_interval) {
-                    (Some(left_interval), Some(right_interval)) => {
-                        let result =
-                            left_interval.apply_operator(op, 
right_interval.as_ref())?;
-                        if result.is_certainly_true() {
-                            Ok(Transformed::yes(lit(true)))
-                        } else if result.is_certainly_false() {
-                            Ok(Transformed::yes(lit(false)))
-                        } else {
-                            Ok(Transformed::no(expr))
-                        }
-                    }
-                    _ => Ok(Transformed::no(expr)),
-                }
+    let new_expr = match &expr {

Review Comment:
   I'll process this asap



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to