This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0022d8e503 chore: Cleanup returning null arrays (#20423)
0022d8e503 is described below

commit 0022d8e503f0dc0ee40ead545114147fc703e263
Author: Neil Conway <[email protected]>
AuthorDate: Thu Feb 19 11:15:28 2026 -0500

    chore: Cleanup returning null arrays (#20423)
    
    Cleanup a few places where the code returned a null array but it would
    be a bit cleaner and faster to return a typed scalar null instead.
    
    ## Which issue does this PR close?
    
    Does not close an issue; this cleanup was mentioned in the code review
    for #20361
    
    ## Rationale for this change
    
    Returning a typed scalar null should be preferred to returning a null
    array: it still has type information, and avoids materializing an
    all-null array. The downstream consumer can always materialize the
    equivalent array if they want to.
    
    ## What changes are included in this PR?
    
    Cleanup five instances of this pattern.
    
    ## Are these changes tested?
    
    Yes. No new test cases possible/warranted.
    
    ## Are there any user-facing changes?
    
    No.
---
 datafusion/functions/src/math/gcd.rs               |  7 ++----
 datafusion/functions/src/unicode/find_in_set.rs    | 26 +++++++++++-----------
 datafusion/spark/src/function/datetime/next_day.rs |  7 ++----
 datafusion/spark/src/function/hash/sha2.rs         |  9 ++------
 4 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/datafusion/functions/src/math/gcd.rs 
b/datafusion/functions/src/math/gcd.rs
index baf52d7806..1f6a353a85 100644
--- a/datafusion/functions/src/math/gcd.rs
+++ b/datafusion/functions/src/math/gcd.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray, 
new_null_array};
+use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray};
 use arrow::compute::try_binary;
 use arrow::datatypes::{DataType, Int64Type};
 use arrow::error::ArrowError;
@@ -144,10 +144,7 @@ fn compute_gcd_with_scalar(arr: &ArrayRef, scalar: 
Option<i64>) -> Result<Column
 
             result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
         }
-        None => Ok(ColumnarValue::Array(new_null_array(
-            &DataType::Int64,
-            arr.len(),
-        ))),
+        None => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
     }
 }
 
diff --git a/datafusion/functions/src/unicode/find_in_set.rs 
b/datafusion/functions/src/unicode/find_in_set.rs
index 0feb637924..0cf20584a6 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
 
 use arrow::array::{
     ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, 
OffsetSizeTrait,
-    PrimitiveArray, new_null_array,
+    PrimitiveArray,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 
@@ -138,9 +138,11 @@ impl ScalarUDFImpl for FindInSetFunc {
                     | ScalarValue::LargeUtf8(str_list_literal),
                 ),
             ) => {
-                let result_array = match str_list_literal {
+                match str_list_literal {
                     // find_in_set(column_a, null) = null
-                    None => new_null_array(return_field.data_type(), 
str_array.len()),
+                    None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
+                        return_field.data_type(),
+                    )?)),
                     Some(str_list_literal) => {
                         let str_list = 
str_list_literal.split(',').collect::<Vec<&str>>();
                         let result = match str_array.data_type() {
@@ -171,10 +173,9 @@ impl ScalarUDFImpl for FindInSetFunc {
                                 )
                             }
                         };
-                        Arc::new(result?)
+                        Ok(ColumnarValue::Array(Arc::new(result?)))
                     }
-                };
-                Ok(ColumnarValue::Array(result_array))
+                }
             }
 
             // `string` is scalar, `str_list` is an array
@@ -186,11 +187,11 @@ impl ScalarUDFImpl for FindInSetFunc {
                 ),
                 ColumnarValue::Array(str_list_array),
             ) => {
-                let res = match string_literal {
+                match string_literal {
                     // find_in_set(null, column_b) = null
-                    None => {
-                        new_null_array(return_field.data_type(), 
str_list_array.len())
-                    }
+                    None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
+                        return_field.data_type(),
+                    )?)),
                     Some(string) => {
                         let result = match str_list_array.data_type() {
                             DataType::Utf8 => {
@@ -217,10 +218,9 @@ impl ScalarUDFImpl for FindInSetFunc {
                                 )
                             }
                         };
-                        Arc::new(result?)
+                        Ok(ColumnarValue::Array(Arc::new(result?)))
                     }
-                };
-                Ok(ColumnarValue::Array(res))
+                }
             }
 
             // both inputs are arrays
diff --git a/datafusion/spark/src/function/datetime/next_day.rs 
b/datafusion/spark/src/function/datetime/next_day.rs
index 2acd295f8f..375b8308d1 100644
--- a/datafusion/spark/src/function/datetime/next_day.rs
+++ b/datafusion/spark/src/function/datetime/next_day.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, 
new_null_array};
+use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType};
 use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
 use chrono::{Datelike, Duration, Weekday};
 use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
@@ -129,10 +129,7 @@ impl ScalarUDFImpl for SparkNextDay {
                         } else {
                             // TODO: if spark.sql.ansi.enabled is false,
                             //  returns NULL instead of an error for a 
malformed dayOfWeek.
-                            Ok(ColumnarValue::Array(Arc::new(new_null_array(
-                                &DataType::Date32,
-                                date_array.len(),
-                            ))))
+                            
Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
                         }
                     }
                     _ => exec_err!(
diff --git a/datafusion/spark/src/function/hash/sha2.rs 
b/datafusion/spark/src/function/hash/sha2.rs
index 2f01854d37..3fa41aba71 100644
--- a/datafusion/spark/src/function/hash/sha2.rs
+++ b/datafusion/spark/src/function/hash/sha2.rs
@@ -15,9 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{
-    ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray, 
new_null_array,
-};
+use arrow::array::{ArrayRef, AsArray, BinaryArrayType, Int32Array, 
StringArray};
 use arrow::datatypes::{DataType, Int32Type};
 use datafusion_common::types::{
     NativeType, logical_binary, logical_int32, logical_string,
@@ -170,10 +168,7 @@ impl ScalarUDFImpl for SparkSha2 {
             (
                 ColumnarValue::Array(_),
                 ColumnarValue::Scalar(ScalarValue::Int32(None)),
-            ) => Ok(ColumnarValue::Array(new_null_array(
-                &DataType::Utf8,
-                args.number_rows,
-            ))),
+            ) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
             _ => {
                 // Fallback to existing behavior for any array/mixed cases
                 make_scalar_function(sha2_impl, vec![])(&args.args)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to