This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0022d8e503 chore: Cleanup returning null arrays (#20423)
0022d8e503 is described below
commit 0022d8e503f0dc0ee40ead545114147fc703e263
Author: Neil Conway <[email protected]>
AuthorDate: Thu Feb 19 11:15:28 2026 -0500
chore: Cleanup returning null arrays (#20423)
Cleanup a few places where the code returned a null array but it would
be a bit cleaner and faster to return a typed scalar null instead.
## Which issue does this PR close?
Does not close an issue; this cleanup was mentioned in the code review
for #20361
## Rationale for this change
Returning a typed scalar null should be preferred to returning a null
array: it still has type information, and avoids materializing an
all-null array. The downstream consumer can always materialize the
equivalent array if they want to.
## What changes are included in this PR?
Cleanup five instances of this pattern.
## Are these changes tested?
Yes. No new test cases possible/warranted.
## Are there any user-facing changes?
No.
---
datafusion/functions/src/math/gcd.rs | 7 ++----
datafusion/functions/src/unicode/find_in_set.rs | 26 +++++++++++-----------
datafusion/spark/src/function/datetime/next_day.rs | 7 ++----
datafusion/spark/src/function/hash/sha2.rs | 9 ++------
4 files changed, 19 insertions(+), 30 deletions(-)
diff --git a/datafusion/functions/src/math/gcd.rs
b/datafusion/functions/src/math/gcd.rs
index baf52d7806..1f6a353a85 100644
--- a/datafusion/functions/src/math/gcd.rs
+++ b/datafusion/functions/src/math/gcd.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray,
new_null_array};
+use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray};
use arrow::compute::try_binary;
use arrow::datatypes::{DataType, Int64Type};
use arrow::error::ArrowError;
@@ -144,10 +144,7 @@ fn compute_gcd_with_scalar(arr: &ArrayRef, scalar:
Option<i64>) -> Result<Column
result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
}
- None => Ok(ColumnarValue::Array(new_null_array(
- &DataType::Int64,
- arr.len(),
- ))),
+ None => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
}
}
diff --git a/datafusion/functions/src/unicode/find_in_set.rs
b/datafusion/functions/src/unicode/find_in_set.rs
index 0feb637924..0cf20584a6 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
OffsetSizeTrait,
- PrimitiveArray, new_null_array,
+ PrimitiveArray,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
@@ -138,9 +138,11 @@ impl ScalarUDFImpl for FindInSetFunc {
| ScalarValue::LargeUtf8(str_list_literal),
),
) => {
- let result_array = match str_list_literal {
+ match str_list_literal {
// find_in_set(column_a, null) = null
- None => new_null_array(return_field.data_type(),
str_array.len()),
+ None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
+ return_field.data_type(),
+ )?)),
Some(str_list_literal) => {
let str_list =
str_list_literal.split(',').collect::<Vec<&str>>();
let result = match str_array.data_type() {
@@ -171,10 +173,9 @@ impl ScalarUDFImpl for FindInSetFunc {
)
}
};
- Arc::new(result?)
+ Ok(ColumnarValue::Array(Arc::new(result?)))
}
- };
- Ok(ColumnarValue::Array(result_array))
+ }
}
// `string` is scalar, `str_list` is an array
@@ -186,11 +187,11 @@ impl ScalarUDFImpl for FindInSetFunc {
),
ColumnarValue::Array(str_list_array),
) => {
- let res = match string_literal {
+ match string_literal {
// find_in_set(null, column_b) = null
- None => {
- new_null_array(return_field.data_type(),
str_list_array.len())
- }
+ None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
+ return_field.data_type(),
+ )?)),
Some(string) => {
let result = match str_list_array.data_type() {
DataType::Utf8 => {
@@ -217,10 +218,9 @@ impl ScalarUDFImpl for FindInSetFunc {
)
}
};
- Arc::new(result?)
+ Ok(ColumnarValue::Array(Arc::new(result?)))
}
- };
- Ok(ColumnarValue::Array(res))
+ }
}
// both inputs are arrays
diff --git a/datafusion/spark/src/function/datetime/next_day.rs
b/datafusion/spark/src/function/datetime/next_day.rs
index 2acd295f8f..375b8308d1 100644
--- a/datafusion/spark/src/function/datetime/next_day.rs
+++ b/datafusion/spark/src/function/datetime/next_day.rs
@@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;
-use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType,
new_null_array};
+use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType};
use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
use chrono::{Datelike, Duration, Weekday};
use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
@@ -129,10 +129,7 @@ impl ScalarUDFImpl for SparkNextDay {
} else {
// TODO: if spark.sql.ansi.enabled is false,
// returns NULL instead of an error for a
malformed dayOfWeek.
- Ok(ColumnarValue::Array(Arc::new(new_null_array(
- &DataType::Date32,
- date_array.len(),
- ))))
+
Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
}
}
_ => exec_err!(
diff --git a/datafusion/spark/src/function/hash/sha2.rs
b/datafusion/spark/src/function/hash/sha2.rs
index 2f01854d37..3fa41aba71 100644
--- a/datafusion/spark/src/function/hash/sha2.rs
+++ b/datafusion/spark/src/function/hash/sha2.rs
@@ -15,9 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{
- ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray,
new_null_array,
-};
+use arrow::array::{ArrayRef, AsArray, BinaryArrayType, Int32Array,
StringArray};
use arrow::datatypes::{DataType, Int32Type};
use datafusion_common::types::{
NativeType, logical_binary, logical_int32, logical_string,
@@ -170,10 +168,7 @@ impl ScalarUDFImpl for SparkSha2 {
(
ColumnarValue::Array(_),
ColumnarValue::Scalar(ScalarValue::Int32(None)),
- ) => Ok(ColumnarValue::Array(new_null_array(
- &DataType::Utf8,
- args.number_rows,
- ))),
+ ) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
_ => {
// Fallback to existing behavior for any array/mixed cases
make_scalar_function(sha2_impl, vec![])(&args.args)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]