This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2aad9d2d91 functions: Remove NullHandling from scalar funcs (#14531)
2aad9d2d91 is described below

commit 2aad9d2d91f6c858ddd599c2f91ec6832fdf0f44
Author: Joseph Koshakow <[email protected]>
AuthorDate: Fri Feb 7 11:14:49 2025 -0500

    functions: Remove NullHandling from scalar funcs (#14531)
    
    3dfce7d33c19d6e7941b58cb7e83194c066347ca added an enum to all scalar
    functions called `NullHandling`, with two variants: `PassThrough` and
    Propagate`. `PassThrough` would pass through null inputs to the
    function implementation. `Propagate` would cause the function to return
    null if any of the inputs were scalar and null, it would not do
    anything if an input was an array and null. Function implementors were
    responsible for handling null array inputs and making sure the behavior
    was consistent with the scalar null behavior caused by `NullHandling`.
    
    If the function signature correctly described the accepted types, then
    the null array input handling would also work for null scalar inputs.
    However, if the function signature was `VariadicAny`, then the null
    array input handling would not work for null scalar inputs. The reason
    is that when the signature is `VariadicAny`, null inputs are not
    properly typed (for example `ScalarValue::Null` instead of
    `ScalarValue::Int64(None)`. So it turns out that `NullHandling` was
    only useful for compensating for non-descriptive function signatures.
    
    Furthermore, many array functions use a signature of `VariadicAny`
    and reject invalid types within the function implementation. This does
    not work with `NullHandling::Propagate`, because any null input would
    skip the function implementation, which would skip the type validation.
    So, if a function wanted to use `NullHandling::Propagate`, then they
    would need to use a descriptive function signature. However, using a
    descriptive function signature removes the usefulness of
    `NullHandling::Propagate`. So as it turns out `NullHandling::Propagate`
    is never useful.
    
    For all the reasons stated above, this commit removes the
    `NullHandling` enum.
---
 datafusion/expr/src/lib.rs                      |  4 ++--
 datafusion/expr/src/udf.rs                      | 19 -------------------
 datafusion/functions-nested/src/extract.rs      | 14 +-------------
 datafusion/physical-expr/src/scalar_function.rs | 12 +-----------
 4 files changed, 4 insertions(+), 45 deletions(-)

diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 46c0f5a3dd..aaa65c676a 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -95,8 +95,8 @@ pub use udaf::{
     SetMonotonicity, StatisticsArgs,
 };
 pub use udf::{
-    scalar_doc_sections, NullHandling, ReturnInfo, ReturnTypeArgs, 
ScalarFunctionArgs,
-    ScalarUDF, ScalarUDFImpl,
+    scalar_doc_sections, ReturnInfo, ReturnTypeArgs, ScalarFunctionArgs, 
ScalarUDF,
+    ScalarUDFImpl,
 };
 pub use udwf::{window_doc_sections, ReversedUDWF, WindowUDF, WindowUDFImpl};
 pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits};
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 7c91b6b3b4..aa6a5cddad 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -200,11 +200,6 @@ impl ScalarUDF {
         self.inner.return_type_from_args(args)
     }
 
-    /// Returns the behavior that this function has when any of the inputs are 
Null.
-    pub fn null_handling(&self) -> NullHandling {
-        self.inner.null_handling()
-    }
-
     /// Do the function rewrite
     ///
     /// See [`ScalarUDFImpl::simplify`] for more details.
@@ -422,15 +417,6 @@ impl ReturnInfo {
     }
 }
 
-/// A function's behavior when the input is Null.
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub enum NullHandling {
-    /// Null inputs are passed into the function implementation.
-    PassThrough,
-    /// Any Null input causes the function to return Null.
-    Propagate,
-}
-
 /// Trait for implementing user defined scalar functions.
 ///
 /// This trait exposes the full API for implementing user defined functions and
@@ -603,11 +589,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
         true
     }
 
-    /// Returns the behavior that this function has when any of the inputs are 
Null.
-    fn null_handling(&self) -> NullHandling {
-        NullHandling::PassThrough
-    }
-
     /// Invoke the function on `args`, returning the appropriate result
     ///
     /// Note: This method is deprecated and will be removed in future releases.
diff --git a/datafusion/functions-nested/src/extract.rs 
b/datafusion/functions-nested/src/extract.rs
index b472316d03..33f7904c16 100644
--- a/datafusion/functions-nested/src/extract.rs
+++ b/datafusion/functions-nested/src/extract.rs
@@ -33,7 +33,7 @@ use datafusion_common::{
 };
 use datafusion_expr::{ArrayFunctionSignature, Expr, TypeSignature};
 use datafusion_expr::{
-    ColumnarValue, Documentation, NullHandling, ScalarUDFImpl, Signature, 
Volatility,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 use std::any::Any;
@@ -385,10 +385,6 @@ impl ScalarUDFImpl for ArraySlice {
         Ok(arg_types[0].clone())
     }
 
-    fn null_handling(&self) -> NullHandling {
-        NullHandling::Propagate
-    }
-
     fn invoke_batch(
         &self,
         args: &[ColumnarValue],
@@ -690,10 +686,6 @@ impl ScalarUDFImpl for ArrayPopFront {
         Ok(arg_types[0].clone())
     }
 
-    fn null_handling(&self) -> NullHandling {
-        NullHandling::Propagate
-    }
-
     fn invoke_batch(
         &self,
         args: &[ColumnarValue],
@@ -794,10 +786,6 @@ impl ScalarUDFImpl for ArrayPopBack {
         Ok(arg_types[0].clone())
     }
 
-    fn null_handling(&self) -> NullHandling {
-        NullHandling::Propagate
-    }
-
     fn invoke_batch(
         &self,
         args: &[ColumnarValue],
diff --git a/datafusion/physical-expr/src/scalar_function.rs 
b/datafusion/physical-expr/src/scalar_function.rs
index 1cd4b673ce..936adbc098 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -45,8 +45,7 @@ use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
 use datafusion_expr::{
-    expr_vec_fmt, ColumnarValue, Expr, NullHandling, ReturnTypeArgs, 
ScalarFunctionArgs,
-    ScalarUDF,
+    expr_vec_fmt, ColumnarValue, Expr, ReturnTypeArgs, ScalarFunctionArgs, 
ScalarUDF,
 };
 
 /// Physical expression of a scalar function
@@ -187,15 +186,6 @@ impl PhysicalExpr for ScalarFunctionExpr {
             .map(|e| e.evaluate(batch))
             .collect::<Result<Vec<_>>>()?;
 
-        if self.fun.null_handling() == NullHandling::Propagate
-            && args.iter().any(
-                |arg| matches!(arg, ColumnarValue::Scalar(scalar) if 
scalar.is_null()),
-            )
-        {
-            let null_value = ScalarValue::try_from(&self.return_type)?;
-            return Ok(ColumnarValue::Scalar(null_value));
-        }
-
         let input_empty = args.is_empty();
         let input_all_scalar = args
             .iter()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to