This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3a0064d82d fix: custom nullability for length (#19175) (#19182)
3a0064d82d is described below
commit 3a0064d82d1c8511e8ad91406bf32aa70e38f258
Author: Kushagra S <[email protected]>
AuthorDate: Mon Dec 8 10:30:51 2025 +0530
fix: custom nullability for length (#19175) (#19182)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #19175
## What changes are included in this PR?
- includes custom nullability for length, determined dynamically using
input arguments.
---------
Co-authored-by: Raz Luvaton <[email protected]>
---
datafusion/spark/src/function/string/length.rs | 54 +++++++++++++++++++++++---
1 file changed, 49 insertions(+), 5 deletions(-)
diff --git a/datafusion/spark/src/function/string/length.rs
b/datafusion/spark/src/function/string/length.rs
index ac6030770f..078b294cac 100644
--- a/datafusion/spark/src/function/string/length.rs
+++ b/datafusion/spark/src/function/string/length.rs
@@ -18,10 +18,11 @@
use arrow::array::{
Array, ArrayRef, AsArray, BinaryArrayType, PrimitiveArray, StringArrayType,
};
-use arrow::datatypes::{DataType, Int32Type};
+use arrow::datatypes::{DataType, Field, FieldRef, Int32Type};
use datafusion_common::exec_err;
use datafusion_expr::{
- ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
Signature,
+ Volatility,
};
use datafusion_functions::utils::make_scalar_function;
use std::sync::Arc;
@@ -78,8 +79,9 @@ impl ScalarUDFImpl for SparkLengthFunc {
}
fn return_type(&self, _args: &[DataType]) ->
datafusion_common::Result<DataType> {
- // spark length always returns Int32
- Ok(DataType::Int32)
+ datafusion_common::internal_err!(
+ "return_type should not be called, use return_field_from_args
instead"
+ )
}
fn invoke_with_args(
@@ -92,6 +94,15 @@ impl ScalarUDFImpl for SparkLengthFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn return_field_from_args(
+ &self,
+ args: ReturnFieldArgs,
+ ) -> datafusion_common::Result<FieldRef> {
+ let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+ // spark length always returns Int32
+ Ok(Arc::new(Field::new(self.name(), DataType::Int32, nullable)))
+ }
}
fn spark_length(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
@@ -193,8 +204,9 @@ mod tests {
use crate::function::utils::test::test_scalar_function;
use arrow::array::{Array, Int32Array};
use arrow::datatypes::DataType::Int32;
+ use arrow::datatypes::{Field, FieldRef};
use datafusion_common::{Result, ScalarValue};
- use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+ use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
macro_rules! test_spark_length_string {
($INPUT:expr, $EXPECTED:expr) => {
@@ -279,4 +291,36 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_spark_length_nullability() -> Result<()> {
+ let func = SparkLengthFunc::new();
+
+ let nullable_field: FieldRef = Arc::new(Field::new("col",
DataType::Utf8, true));
+
+ let out_nullable = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[nullable_field],
+ scalar_arguments: &[None],
+ })?;
+
+ assert!(
+ out_nullable.is_nullable(),
+ "length(col) should be nullable when child is nullable"
+ );
+
+ let non_nullable_field: FieldRef =
+ Arc::new(Field::new("col", DataType::Utf8, false));
+
+ let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[non_nullable_field],
+ scalar_arguments: &[None],
+ })?;
+
+ assert!(
+ !out_non_nullable.is_nullable(),
+ "length(col) should NOT be nullable when child is NOT nullable"
+ );
+
+ Ok(())
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]