alamb commented on code in PR #18821:
URL: https://github.com/apache/datafusion/pull/18821#discussion_r2542539166


##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -15,30 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Simplifier implementation for [`ExprSimplifier::with_guarantees()`]
-//!
-//! [`ExprSimplifier::with_guarantees()`]: 
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
+//! Rewrite expressions based on external expression value range guarantees.
 
-use std::{borrow::Cow, collections::HashMap};
+use std::borrow::Cow;
 
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
-use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
+use crate::{expr::InList, lit, Between, BinaryExpr, Expr};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_common::{DataFusionError, HashMap, Result, ScalarValue};
+use datafusion_expr_common::interval_arithmetic::{Interval, NullableInterval};
 
 /// Rewrite expressions to incorporate guarantees.

Review Comment:
   Can we please add a doc link to point people at `rewrite_with_guarantees`?
   
   Something like
   
   ```suggestion
   /// Rewrite expressions to incorporate guarantees.
   ///
   /// See [`rewrite_with_guarantees`] for more information
   ```



##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -48,199 +34,282 @@ impl<'a> GuaranteeRewriter<'a> {
         guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
     ) -> Self {
         Self {
-            // TODO: Clippy wants the "map" call removed, but doing so 
generates
-            //       a compilation error. Remove the clippy directive once this
-            //       issue is fixed.
-            #[allow(clippy::map_identity)]
             guarantees: guarantees.into_iter().map(|(k, v)| (k, v)).collect(),
         }
     }
 }
 
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval] that represents the known 
possible
+/// values of the expression.
+///
+/// Rewriting expressions using this type of guarantee can make the work of 
other expression
+/// simplifications, like const evaluation, easier.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+///
+/// If the set of guarantees will be used to rewrite more than one expression, 
consider using
+/// [rewrite_with_guarantees_map] instead.
+///
+/// A full example of using this rewrite rule can be found in
+/// 
[`ExprSimplifier::with_guarantees()`](https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html#method.with_guarantees).
+pub fn rewrite_with_guarantees<'a>(
+    expr: Expr,
+    guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
+) -> Result<Transformed<Expr>> {
+    let guarantees_map: HashMap<&Expr, &NullableInterval> =
+        guarantees.into_iter().map(|(k, v)| (k, v)).collect();
+    rewrite_with_guarantees_map(expr, &guarantees_map)
+}
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval]. The interval represents the 
known
+/// possible values of the column.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+pub fn rewrite_with_guarantees_map<'a>(
+    expr: Expr,
+    guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    expr.transform_up(|e| rewrite_expr(e, guarantees))
+}
+
 impl TreeNodeRewriter for GuaranteeRewriter<'_> {
     type Node = Expr;
 
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
-        if self.guarantees.is_empty() {
-            return Ok(Transformed::no(expr));
-        }
+        rewrite_expr(expr, &self.guarantees)
+    }
+}
 
-        match &expr {
-            Expr::IsNull(inner) => match self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => 
Ok(Transformed::yes(lit(true))),
-                Some(NullableInterval::NotNull { .. }) => {
-                    Ok(Transformed::yes(lit(false)))
-                }
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::IsNotNull(inner) => match 
self.guarantees.get(inner.as_ref()) {
-                Some(NullableInterval::Null { .. }) => 
Ok(Transformed::yes(lit(false))),
-                Some(NullableInterval::NotNull { .. }) => 
Ok(Transformed::yes(lit(true))),
-                _ => Ok(Transformed::no(expr)),
-            },
-            Expr::Between(Between {
-                expr: inner,
-                negated,
-                low,
-                high,
-            }) => {
-                if let (Some(interval), Expr::Literal(low, _), 
Expr::Literal(high, _)) = (
-                    self.guarantees.get(inner.as_ref()),
-                    low.as_ref(),
-                    high.as_ref(),
-                ) {
-                    let expr_interval = NullableInterval::NotNull {
-                        values: Interval::try_new(low.clone(), high.clone())?,
-                    };
-
-                    let contains = expr_interval.contains(*interval)?;
-
-                    if contains.is_certainly_true() {
-                        Ok(Transformed::yes(lit(!negated)))
-                    } else if contains.is_certainly_false() {
-                        Ok(Transformed::yes(lit(*negated)))
-                    } else {
-                        Ok(Transformed::no(expr))
-                    }
-                } else {
-                    Ok(Transformed::no(expr))
-                }
-            }
+fn rewrite_expr(
+    expr: Expr,
+    guarantees: &HashMap<&Expr, &NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    if guarantees.is_empty() {
+        return Ok(Transformed::no(expr));
+    }
 
-            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-                // The left or right side of expression might either have a 
guarantee
-                // or be a literal. Either way, we can resolve them to a 
NullableInterval.
-                let left_interval = self
-                    .guarantees
-                    .get(left.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = left.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-                let right_interval = self
-                    .guarantees
-                    .get(right.as_ref())
-                    .map(|interval| Cow::Borrowed(*interval))
-                    .or_else(|| {
-                        if let Expr::Literal(value, _) = right.as_ref() {
-                            Some(Cow::Owned(value.clone().into()))
-                        } else {
-                            None
-                        }
-                    });
-
-                match (left_interval, right_interval) {
-                    (Some(left_interval), Some(right_interval)) => {
-                        let result =
-                            left_interval.apply_operator(op, 
right_interval.as_ref())?;
-                        if result.is_certainly_true() {
-                            Ok(Transformed::yes(lit(true)))
-                        } else if result.is_certainly_false() {
-                            Ok(Transformed::yes(lit(false)))
-                        } else {
-                            Ok(Transformed::no(expr))
-                        }
-                    }
-                    _ => Ok(Transformed::no(expr)),
-                }
+    let new_expr = match &expr {
+        Expr::IsNull(inner) => match guarantees.get(inner.as_ref()) {

Review Comment:
   this looks really nice now



##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -48,199 +34,282 @@ impl<'a> GuaranteeRewriter<'a> {
         guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
     ) -> Self {
         Self {
-            // TODO: Clippy wants the "map" call removed, but doing so 
generates
-            //       a compilation error. Remove the clippy directive once this
-            //       issue is fixed.
-            #[allow(clippy::map_identity)]
             guarantees: guarantees.into_iter().map(|(k, v)| (k, v)).collect(),
         }
     }
 }
 
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval] that represents the known 
possible
+/// values of the expression.
+///
+/// Rewriting expressions using this type of guarantee can make the work of 
other expression
+/// simplifications, like const evaluation, easier.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+///
+/// If the set of guarantees will be used to rewrite more than one expression, 
consider using
+/// [rewrite_with_guarantees_map] instead.
+///
+/// A full example of using this rewrite rule can be found in
+/// 
[`ExprSimplifier::with_guarantees()`](https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html#method.with_guarantees).
+pub fn rewrite_with_guarantees<'a>(
+    expr: Expr,
+    guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
+) -> Result<Transformed<Expr>> {
+    let guarantees_map: HashMap<&Expr, &NullableInterval> =
+        guarantees.into_iter().map(|(k, v)| (k, v)).collect();
+    rewrite_with_guarantees_map(expr, &guarantees_map)
+}
+
+/// Rewrite expressions to incorporate guarantees.
+///
+/// Guarantees are a mapping from an expression (which currently is always a
+/// column reference) to a [NullableInterval]. The interval represents the 
known
+/// possible values of the column.
+///
+/// For example, if we know that a column is not null and has values in the
+/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
+pub fn rewrite_with_guarantees_map<'a>(
+    expr: Expr,
+    guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+) -> Result<Transformed<Expr>> {
+    expr.transform_up(|e| rewrite_expr(e, guarantees))

Review Comment:
   We could probably make this more efficient by skipping the entire recursion 
/ tree walk in `rewrite_with_guarantees_map`  -- rewriting the entire tree tree 
is not free, even if no changes are made
   
   The same check is in `rewrite_expr` but that is called on each node in the 
tree always even if there are no guarantees
   
   ```suggestion
       if guarantees.is_empty() {
         return Ok(Transformed::no(expr));
       }
       expr.transform_up(|e| rewrite_expr(e, guarantees))
   ```
   
   It does look like the rewrite code is called for all plans (and the 
guarantees will be empty for almost all of them), so it might save non trivial 
time



##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -458,7 +527,9 @@ mod tests {
                     .collect(),
                 *negated,
             );
-            let output = expr.clone().rewrite(&mut rewriter).data().unwrap();
+            let output = rewrite_with_guarantees(expr.clone(), 
guarantees.iter())
+                .data()
+                .unwrap();
             let expected_list = expected_list
                 .iter()
                 .map(|v| lit(ScalarValue::Int32(Some(*v))))

Review Comment:
   Did you want to add a test for the newly fixed case too? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to