This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7d87976e1f Avoid the need to rewrite expressions when evaluating
logical case nullability (#18849)
7d87976e1f is described below
commit 7d87976e1f74e7444a62f8d80651b67d733a57ce
Author: Pepijn Van Eeckhoudt <[email protected]>
AuthorDate: Sun Nov 23 15:39:33 2025 +0100
Avoid the need to rewrite expressions when evaluating logical case
nullability (#18849)
## Which issue does this PR close?
- None, follow up to PR #17813
## Rationale for this change
In #17813, the when expressions are rewritten using `GuaranteesRewrite`
before evaluating their bounds. This PR avoids the rewrite cost by
inlining the special case of `GuaranteesRewrite` with a single `Null`
guarantee into the `PredicateBounds::evaluate_bounds` logic.
## What changes are included in this PR?
Inlines guarantee rewriting into predicate bounds evaluation.
## Are these changes tested?
Covered by existing tests
## Are there any user-facing changes?
No
---
datafusion/expr/src/expr_schema.rs | 26 ++------------------------
datafusion/expr/src/predicate_bounds.rs | 26 +++++++++++++++++++-------
2 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index 8f8720941f..94f5b0480b 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -21,7 +21,6 @@ use crate::expr::{
InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
WindowFunctionParams,
};
-use crate::expr_rewriter::rewrite_with_guarantees;
use crate::type_coercion::functions::{
data_types_with_scalar_udf, fields_with_aggregate_udf,
fields_with_window_udf,
};
@@ -34,7 +33,6 @@ use datafusion_common::{
not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError,
ExprSchema,
Result, ScalarValue, Spans, TableReference,
};
-use datafusion_expr_common::interval_arithmetic::NullableInterval;
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use std::sync::Arc;
@@ -307,29 +305,9 @@ impl ExprSchemable for Expr {
// For branches with a nullable 'then' expression, try
to determine
// if the 'then' expression is ever reachable in the
situation where
// it would evaluate to null.
-
- // First, derive a variant of the 'when' expression,
where all occurrences
- // of the 'then' expression have been replaced by
'NULL'.
- let certainly_null_expr =
unwrap_certainly_null_expr(t).clone();
- let certainly_null_type =
- match certainly_null_expr.get_type(input_schema) {
- Err(e) => return Some(Err(e)),
- Ok(datatype) => datatype,
- };
- let null_interval = NullableInterval::Null {
- datatype: certainly_null_type,
- };
- let guarantees = vec![(certainly_null_expr,
null_interval)];
- let when_with_null =
- match rewrite_with_guarantees(*w.clone(),
&guarantees) {
- Err(e) => return Some(Err(e)),
- Ok(e) => e.data,
- };
-
- // Next, determine the bounds of the derived 'when'
expression to see if it
- // can ever evaluate to true.
let bounds = match predicate_bounds::evaluate_bounds(
- &when_with_null,
+ w,
+ Some(unwrap_certainly_null_expr(t)),
input_schema,
) {
Err(e) => return Some(Err(e)),
diff --git a/datafusion/expr/src/predicate_bounds.rs
b/datafusion/expr/src/predicate_bounds.rs
index e79e756a32..192b2929fd 100644
--- a/datafusion/expr/src/predicate_bounds.rs
+++ b/datafusion/expr/src/predicate_bounds.rs
@@ -48,14 +48,19 @@ use datafusion_expr_common::operator::Operator;
///
pub(super) fn evaluate_bounds(
predicate: &Expr,
+ certainly_null_expr: Option<&Expr>,
input_schema: &dyn ExprSchema,
) -> Result<NullableInterval> {
- let evaluator = PredicateBoundsEvaluator { input_schema };
+ let evaluator = PredicateBoundsEvaluator {
+ input_schema,
+ certainly_null_expr,
+ };
evaluator.evaluate_bounds(predicate)
}
struct PredicateBoundsEvaluator<'a> {
input_schema: &'a dyn ExprSchema,
+ certainly_null_expr: Option<&'a Expr>,
}
impl PredicateBoundsEvaluator<'_> {
@@ -165,6 +170,13 @@ impl PredicateBoundsEvaluator<'_> {
return NullableInterval::FALSE;
}
+ // Check if the expression is the `certainly_null_expr` that was
passed in.
+ if let Some(certainly_null_expr) = &self.certainly_null_expr {
+ if expr.eq(certainly_null_expr) {
+ return NullableInterval::TRUE;
+ }
+ }
+
// `expr` is nullable, so our default answer for `is null` is going to
be `{ TRUE, FALSE }`.
// Try to see if we can narrow it down to just one option.
match expr {
@@ -237,7 +249,7 @@ mod tests {
fn eval_bounds(predicate: &Expr) -> Result<NullableInterval> {
let schema = DFSchema::try_from(Schema::empty())?;
- evaluate_bounds(predicate, &schema)
+ evaluate_bounds(predicate, None, &schema)
}
#[test]
@@ -467,7 +479,7 @@ mod tests {
for case in cases {
assert_eq!(
- evaluate_bounds(&case.0, case.1).unwrap(),
+ evaluate_bounds(&case.0, None, case.1).unwrap(),
case.2,
"Failed for {}",
case.0
@@ -543,14 +555,14 @@ mod tests {
for case in cases {
assert_eq!(
- evaluate_bounds(&case.0, ¬_nullable_schema).unwrap(),
+ evaluate_bounds(&case.0, None, ¬_nullable_schema).unwrap(),
case.1,
"Failed for {}",
case.0
);
assert_eq!(
- evaluate_bounds(&case.0, &nullable_schema).unwrap(),
+ evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
NullableInterval::ANY_TRUTH_VALUE,
"Failed for {}",
case.0
@@ -623,14 +635,14 @@ mod tests {
for case in cases {
assert_eq!(
- evaluate_bounds(&case.0, ¬_nullable_schema).unwrap(),
+ evaluate_bounds(&case.0, None, ¬_nullable_schema).unwrap(),
case.1,
"Failed for {}",
case.0
);
assert_eq!(
- evaluate_bounds(&case.0, &nullable_schema).unwrap(),
+ evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
NullableInterval::ANY_TRUTH_VALUE,
"Failed for {}",
case.0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]