(datafusion) branch main updated: Avoid the need to rewrite expressions when evaluating logical case nullability (#18849)

github-bot Sun, 23 Nov 2025 06:40:00 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 7d87976e1f Avoid the need to rewrite expressions when evaluating 
logical case nullability (#18849)
7d87976e1f is described below

commit 7d87976e1f74e7444a62f8d80651b67d733a57ce
Author: Pepijn Van Eeckhoudt <[email protected]>
AuthorDate: Sun Nov 23 15:39:33 2025 +0100

    Avoid the need to rewrite expressions when evaluating logical case 
nullability (#18849)
    
    ## Which issue does this PR close?
    
    - None, follow up to PR #17813
    
    ## Rationale for this change
    
    In #17813, the when expressions are rewritten using `GuaranteesRewrite`
    before evaluating their bounds. This PR avoids the rewrite cost by
    inlining the special case of `GuaranteesRewrite` with a single `Null`
    guarantee into the `PredicateBounds::evaluate_bounds` logic.
    
    ## What changes are included in this PR?
    
    Inlines guarantee rewriting into predicate bounds evaluation.
    
    ## Are these changes tested?
    
    Covered by existing tests
    
    ## Are there any user-facing changes?
    
    No
---
 datafusion/expr/src/expr_schema.rs      | 26 ++------------------------
 datafusion/expr/src/predicate_bounds.rs | 26 +++++++++++++++++++-------
 2 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index 8f8720941f..94f5b0480b 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -21,7 +21,6 @@ use crate::expr::{
     InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
     WindowFunctionParams,
 };
-use crate::expr_rewriter::rewrite_with_guarantees;
 use crate::type_coercion::functions::{
     data_types_with_scalar_udf, fields_with_aggregate_udf, 
fields_with_window_udf,
 };
@@ -34,7 +33,6 @@ use datafusion_common::{
     not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, 
ExprSchema,
     Result, ScalarValue, Spans, TableReference,
 };
-use datafusion_expr_common::interval_arithmetic::NullableInterval;
 use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use std::sync::Arc;
@@ -307,29 +305,9 @@ impl ExprSchemable for Expr {
                         // For branches with a nullable 'then' expression, try 
to determine
                         // if the 'then' expression is ever reachable in the 
situation where
                         // it would evaluate to null.
-
-                        // First, derive a variant of the 'when' expression, 
where all occurrences
-                        // of the 'then' expression have been replaced by 
'NULL'.
-                        let certainly_null_expr = 
unwrap_certainly_null_expr(t).clone();
-                        let certainly_null_type =
-                            match certainly_null_expr.get_type(input_schema) {
-                                Err(e) => return Some(Err(e)),
-                                Ok(datatype) => datatype,
-                            };
-                        let null_interval = NullableInterval::Null {
-                            datatype: certainly_null_type,
-                        };
-                        let guarantees = vec![(certainly_null_expr, 
null_interval)];
-                        let when_with_null =
-                            match rewrite_with_guarantees(*w.clone(), 
&guarantees) {
-                                Err(e) => return Some(Err(e)),
-                                Ok(e) => e.data,
-                            };
-
-                        // Next, determine the bounds of the derived 'when' 
expression to see if it
-                        // can ever evaluate to true.
                         let bounds = match predicate_bounds::evaluate_bounds(
-                            &when_with_null,
+                            w,
+                            Some(unwrap_certainly_null_expr(t)),
                             input_schema,
                         ) {
                             Err(e) => return Some(Err(e)),
diff --git a/datafusion/expr/src/predicate_bounds.rs 
b/datafusion/expr/src/predicate_bounds.rs
index e79e756a32..192b2929fd 100644
--- a/datafusion/expr/src/predicate_bounds.rs
+++ b/datafusion/expr/src/predicate_bounds.rs
@@ -48,14 +48,19 @@ use datafusion_expr_common::operator::Operator;
 ///
 pub(super) fn evaluate_bounds(
     predicate: &Expr,
+    certainly_null_expr: Option<&Expr>,
     input_schema: &dyn ExprSchema,
 ) -> Result<NullableInterval> {
-    let evaluator = PredicateBoundsEvaluator { input_schema };
+    let evaluator = PredicateBoundsEvaluator {
+        input_schema,
+        certainly_null_expr,
+    };
     evaluator.evaluate_bounds(predicate)
 }
 
 struct PredicateBoundsEvaluator<'a> {
     input_schema: &'a dyn ExprSchema,
+    certainly_null_expr: Option<&'a Expr>,
 }
 
 impl PredicateBoundsEvaluator<'_> {
@@ -165,6 +170,13 @@ impl PredicateBoundsEvaluator<'_> {
             return NullableInterval::FALSE;
         }
 
+        // Check if the expression is the `certainly_null_expr` that was 
passed in.
+        if let Some(certainly_null_expr) = &self.certainly_null_expr {
+            if expr.eq(certainly_null_expr) {
+                return NullableInterval::TRUE;
+            }
+        }
+
         // `expr` is nullable, so our default answer for `is null` is going to 
be `{ TRUE, FALSE }`.
         // Try to see if we can narrow it down to just one option.
         match expr {
@@ -237,7 +249,7 @@ mod tests {
 
     fn eval_bounds(predicate: &Expr) -> Result<NullableInterval> {
         let schema = DFSchema::try_from(Schema::empty())?;
-        evaluate_bounds(predicate, &schema)
+        evaluate_bounds(predicate, None, &schema)
     }
 
     #[test]
@@ -467,7 +479,7 @@ mod tests {
 
         for case in cases {
             assert_eq!(
-                evaluate_bounds(&case.0, case.1).unwrap(),
+                evaluate_bounds(&case.0, None, case.1).unwrap(),
                 case.2,
                 "Failed for {}",
                 case.0
@@ -543,14 +555,14 @@ mod tests {
 
         for case in cases {
             assert_eq!(
-                evaluate_bounds(&case.0, &not_nullable_schema).unwrap(),
+                evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
                 case.1,
                 "Failed for {}",
                 case.0
             );
 
             assert_eq!(
-                evaluate_bounds(&case.0, &nullable_schema).unwrap(),
+                evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
                 NullableInterval::ANY_TRUTH_VALUE,
                 "Failed for {}",
                 case.0
@@ -623,14 +635,14 @@ mod tests {
 
         for case in cases {
             assert_eq!(
-                evaluate_bounds(&case.0, &not_nullable_schema).unwrap(),
+                evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
                 case.1,
                 "Failed for {}",
                 case.0
             );
 
             assert_eq!(
-                evaluate_bounds(&case.0, &nullable_schema).unwrap(),
+                evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
                 NullableInterval::ANY_TRUTH_VALUE,
                 "Failed for {}",
                 case.0


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Avoid the need to rewrite expressions when evaluating logical case nullability (#18849)

Reply via email to