alamb commented on code in PR #17813:
URL: https://github.com/apache/datafusion/pull/17813#discussion_r2542022786
##########
datafusion/expr-common/src/interval_arithmetic.rs:
##########
@@ -1852,8 +1932,18 @@ impl NullableInterval {
}
}
- /// Compute the logical conjunction of this (boolean) interval with the
- /// given boolean interval.
+ /// Returns an interval representing the set of possible values after
applying SQL
+ /// three-valued logical AND on each combination of possible values from
`self` and `other`.
+ ///
+ /// This method uses the following truth table.
+ ///
+ /// ```text
+ /// A ∧ B │ F U T
+ /// ──────┼──────
+ /// F │ F F F
Review Comment:
this truth table seems to be missing the values for `A` (self)
##########
datafusion/expr-common/src/interval_arithmetic.rs:
##########
@@ -1880,8 +1970,18 @@ impl NullableInterval {
}
}
- /// Compute the logical disjunction of this (boolean) interval with the
- /// given boolean interval.
+ /// Returns an interval representing the set of possible values after
applying SQL three-valued
+ /// logical OR on each combination of possible values from `self` and
`other`.
+ ///
+ /// This method uses the following truth table.
+ ///
+ /// ```text
+ /// A ∨ B │ F U T
Review Comment:
likewise here this table is missing values for `A`
##########
datafusion/expr/src/expr_rewriter/guarantees.rs:
##########
@@ -15,46 +15,53 @@
// specific language governing permissions and limitations
// under the License.
-//! Simplifier implementation for [`ExprSimplifier::with_guarantees()`]
-//!
-//! [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
+//! Rewrite expressions based on external expression value range guarantees.
-use std::{borrow::Cow, collections::HashMap};
+use std::borrow::Cow;
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
-use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
+use crate::{expr::InList, lit, Between, BinaryExpr, Expr};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_common::{DataFusionError, HashMap, Result};
+use datafusion_expr_common::interval_arithmetic::{Interval, NullableInterval};
+
+struct GuaranteeRewriter<'a> {
Review Comment:
I think the `GuaranteeRewriter` is part of the public API, so making this
change would potentially cause breaking downstream changes:
https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.GuaranteeRewriter.html
I think we should leave the `GuaranteeRewriter` API in place (w/ comments
etc) and then make `rewrite_with_guarantees` a method or something
Perhaps
```rust
impl GuaranteeRewriter {
/// Create new guarantees from an iterator
pub fn new(
guarantees: impl IntoIterator<Item = &'a (Expr, NullableInterval)>,
)
/// Create new gurantees from a map
pub fn new(
guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
)
}
```
🤔
##########
datafusion/optimizer/src/simplify_expressions/guarantees.rs:
##########
@@ -15,46 +15,53 @@
// specific language governing permissions and limitations
// under the License.
-//! Simplifier implementation for [`ExprSimplifier::with_guarantees()`]
-//!
-//! [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
+//! Rewrite expressions based on external expression value range guarantees.
-use std::{borrow::Cow, collections::HashMap};
+use std::borrow::Cow;
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
-use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
+use crate::{expr::InList, lit, Between, BinaryExpr, Expr};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_common::{DataFusionError, HashMap, Result};
+use datafusion_expr_common::interval_arithmetic::{Interval, NullableInterval};
+
+struct GuaranteeRewriter<'a> {
+ guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+}
/// Rewrite expressions to incorporate guarantees.
///
/// Guarantees are a mapping from an expression (which currently is always a
/// column reference) to a [NullableInterval]. The interval represents the
known
-/// possible values of the column. Using these known values, expressions are
-/// rewritten so they can be simplified using `ConstEvaluator` and
`Simplifier`.
+/// possible values of the column.
///
/// For example, if we know that a column is not null and has values in the
/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
///
-/// See a full example in [`ExprSimplifier::with_guarantees()`].
-///
-/// [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
Review Comment:
You probably removed this doc link b/c the code is now in a different module
that doesn't depend on optimizer.
However I think the link still adds value
What we have done in other places where we can't rely on auto links is to
use the direct HTML link:
https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html#method.with_guarantees
Which isn't as good as rustdoc doesn't check that the links don't get
broken, but I think it is better than just removing the link totally
##########
datafusion/optimizer/src/simplify_expressions/guarantees.rs:
##########
@@ -15,46 +15,53 @@
// specific language governing permissions and limitations
// under the License.
-//! Simplifier implementation for [`ExprSimplifier::with_guarantees()`]
-//!
-//! [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
+//! Rewrite expressions based on external expression value range guarantees.
-use std::{borrow::Cow, collections::HashMap};
+use std::borrow::Cow;
-use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
-use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr};
+use crate::{expr::InList, lit, Between, BinaryExpr, Expr};
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_common::{DataFusionError, HashMap, Result};
+use datafusion_expr_common::interval_arithmetic::{Interval, NullableInterval};
+
+struct GuaranteeRewriter<'a> {
+ guarantees: &'a HashMap<&'a Expr, &'a NullableInterval>,
+}
/// Rewrite expressions to incorporate guarantees.
///
/// Guarantees are a mapping from an expression (which currently is always a
/// column reference) to a [NullableInterval]. The interval represents the
known
-/// possible values of the column. Using these known values, expressions are
-/// rewritten so they can be simplified using `ConstEvaluator` and
`Simplifier`.
+/// possible values of the column.
///
/// For example, if we know that a column is not null and has values in the
/// range [1, 10), we can rewrite `x IS NULL` to `false` or `x < 10` to `true`.
///
-/// See a full example in [`ExprSimplifier::with_guarantees()`].
-///
-/// [`ExprSimplifier::with_guarantees()`]:
crate::simplify_expressions::expr_simplifier::ExprSimplifier::with_guarantees
-pub struct GuaranteeRewriter<'a> {
Review Comment:
Unless there is a good reason, I think we should avoid removing this API as
it will cause unecessary churn on downstream crates
If you find `rewrite_with_guarantees` easier to work with, maybe you leave
`GuaranteeRewriter` and but implement `rewrite_with_guarantees` in terms of that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]