This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 94d9c8e68c Simplify make_date & fix null handling (#19296)
94d9c8e68c is described below
commit 94d9c8e68c01fc4d7f12d63b61988d74e59bfd10
Author: Jeffrey Vo <[email protected]>
AuthorDate: Sun Dec 14 20:16:15 2025 +0900
Simplify make_date & fix null handling (#19296)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
N/A
## Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
Refactor implementation of `make_date` to be more like `make_time`, see
#19183
## What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
Refactoring. Also fix null handling so that any null inputs output null
instead of an error.
## Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
Yes.
## Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
Null inputs no longer raise an error, instead returning null.
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
.../examples/builtin_functions/date_time.rs | 7 +-
datafusion/functions/src/datetime/make_date.rs | 333 +++++----------------
datafusion/functions/src/datetime/make_time.rs | 5 +-
datafusion/sqllogictest/test_files/timestamps.slt | 55 +++-
4 files changed, 114 insertions(+), 286 deletions(-)
diff --git a/datafusion-examples/examples/builtin_functions/date_time.rs
b/datafusion-examples/examples/builtin_functions/date_time.rs
index 7fb2c25f56..08d4bc6e29 100644
--- a/datafusion-examples/examples/builtin_functions/date_time.rs
+++ b/datafusion-examples/examples/builtin_functions/date_time.rs
@@ -181,12 +181,13 @@ async fn query_make_date() -> Result<()> {
// invalid column values will result in an error
let result = ctx
- .sql("select make_date(2024, null, 23)")
+ .sql("select make_date(2024, '', 23)")
.await?
.collect()
.await;
- let expected = "Execution error: Unable to parse date from null/empty
value";
+ let expected =
+ "Arrow error: Cast error: Cannot cast string '' to value of Int32
type";
assert_contains!(result.unwrap_err().to_string(), expected);
// invalid date values will also result in an error
@@ -196,7 +197,7 @@ async fn query_make_date() -> Result<()> {
.collect()
.await;
- let expected = "Execution error: Unable to parse date from 2024, 1, 32";
+ let expected = "Execution error: Day value '32' is out of range";
assert_contains!(result.unwrap_err().to_string(), expected);
Ok(())
diff --git a/datafusion/functions/src/datetime/make_date.rs
b/datafusion/functions/src/datetime/make_date.rs
index fbe28b8c23..8a87809c43 100644
--- a/datafusion/functions/src/datetime/make_date.rs
+++ b/datafusion/functions/src/datetime/make_date.rs
@@ -18,17 +18,19 @@
use std::any::Any;
use std::sync::Arc;
-use arrow::array::PrimitiveArray;
use arrow::array::builder::PrimitiveBuilder;
use arrow::array::cast::AsArray;
use arrow::array::types::{Date32Type, Int32Type};
+use arrow::array::{Array, PrimitiveArray};
use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8,
Utf8View};
+use arrow::datatypes::DataType::Date32;
use chrono::prelude::*;
+use datafusion_common::types::{NativeType, logical_int32, logical_string};
use datafusion_common::{Result, ScalarValue, exec_err,
utils::take_function_args};
use datafusion_expr::{
- ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
TypeSignatureClass,
+ Volatility,
};
use datafusion_macros::user_doc;
@@ -79,12 +81,16 @@ impl Default for MakeDateFunc {
impl MakeDateFunc {
pub fn new() -> Self {
+ let int = Coercion::new_implicit(
+ TypeSignatureClass::Native(logical_int32()),
+ vec![
+ TypeSignatureClass::Integer,
+ TypeSignatureClass::Native(logical_string()),
+ ],
+ NativeType::Int32,
+ );
Self {
- signature: Signature::uniform(
- 3,
- vec![Int32, Int64, UInt32, UInt64, Utf8, Utf8View],
- Volatility::Immutable,
- ),
+ signature: Signature::coercible(vec![int; 3],
Volatility::Immutable),
}
}
}
@@ -110,87 +116,59 @@ impl ScalarUDFImpl for MakeDateFunc {
&self,
args: datafusion_expr::ScalarFunctionArgs,
) -> Result<ColumnarValue> {
- // first, identify if any of the arguments is an Array. If yes, store
its `len`,
- // as any scalar will need to be converted to an array of len `len`.
- let args = args.args;
- let len = args
- .iter()
- .fold(Option::<usize>::None, |acc, arg| match arg {
- ColumnarValue::Scalar(_) => acc,
- ColumnarValue::Array(a) => Some(a.len()),
- });
+ let [years, months, days] = take_function_args(self.name(),
args.args)?;
- let [years, months, days] = take_function_args(self.name(), args)?;
-
- if matches!(years, ColumnarValue::Scalar(ScalarValue::Null))
- || matches!(months, ColumnarValue::Scalar(ScalarValue::Null))
- || matches!(days, ColumnarValue::Scalar(ScalarValue::Null))
- {
- return Ok(ColumnarValue::Scalar(ScalarValue::Null));
- }
-
- let years = years.cast_to(&Int32, None)?;
- let months = months.cast_to(&Int32, None)?;
- let days = days.cast_to(&Int32, None)?;
-
- let scalar_value_fn = |col: &ColumnarValue| -> Result<i32> {
- let ColumnarValue::Scalar(s) = col else {
- return exec_err!("Expected scalar value");
- };
- let ScalarValue::Int32(Some(i)) = s else {
- return exec_err!("Unable to parse date from null/empty value");
- };
- Ok(*i)
- };
-
- let value = if let Some(array_size) = len {
- let to_primitive_array_fn =
- |col: &ColumnarValue| -> PrimitiveArray<Int32Type> {
- match col {
- ColumnarValue::Array(a) => {
- a.as_primitive::<Int32Type>().to_owned()
- }
- _ => {
- let v = scalar_value_fn(col).unwrap();
- PrimitiveArray::<Int32Type>::from_value(v,
array_size)
- }
+ match (years, months, days) {
+ (ColumnarValue::Scalar(y), _, _) if y.is_null() => {
+ Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+ }
+ (_, ColumnarValue::Scalar(m), _) if m.is_null() => {
+ Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+ }
+ (_, _, ColumnarValue::Scalar(d)) if d.is_null() => {
+ Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+ }
+ (
+ ColumnarValue::Scalar(ScalarValue::Int32(Some(years))),
+ ColumnarValue::Scalar(ScalarValue::Int32(Some(months))),
+ ColumnarValue::Scalar(ScalarValue::Int32(Some(days))),
+ ) => {
+ let mut value = 0;
+ make_date_inner(years, months, days, |days: i32| value =
days)?;
+ Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(value))))
+ }
+ (years, months, days) => {
+ let len = args.number_rows;
+ let years = years.into_array(len)?;
+ let months = months.into_array(len)?;
+ let days = days.into_array(len)?;
+
+ let years = years.as_primitive::<Int32Type>();
+ let months = months.as_primitive::<Int32Type>();
+ let days = days.as_primitive::<Int32Type>();
+
+ let mut builder: PrimitiveBuilder<Date32Type> =
+ PrimitiveArray::builder(len);
+
+ for i in 0..len {
+ // match postgresql behaviour which returns null for any
null input
+ if years.is_null(i) || months.is_null(i) ||
days.is_null(i) {
+ builder.append_null();
+ } else {
+ make_date_inner(
+ years.value(i),
+ months.value(i),
+ days.value(i),
+ |days: i32| builder.append_value(days),
+ )?;
}
- };
-
- let years = to_primitive_array_fn(&years);
- let months = to_primitive_array_fn(&months);
- let days = to_primitive_array_fn(&days);
+ }
- let mut builder: PrimitiveBuilder<Date32Type> =
- PrimitiveArray::builder(array_size);
- for i in 0..array_size {
- make_date_inner(
- years.value(i),
- months.value(i),
- days.value(i),
- |days: i32| builder.append_value(days),
- )?;
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
}
-
- let arr = builder.finish();
-
- ColumnarValue::Array(Arc::new(arr))
- } else {
- // For scalar only columns the operation is faster without using
the PrimitiveArray.
- // Also, keep the output as scalar since all inputs are scalar.
- let mut value = 0;
- make_date_inner(
- scalar_value_fn(&years)?,
- scalar_value_fn(&months)?,
- scalar_value_fn(&days)?,
- |days: i32| value = days,
- )?;
-
- ColumnarValue::Scalar(ScalarValue::Date32(Some(value)))
- };
-
- Ok(value)
+ }
}
+
fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
@@ -204,11 +182,13 @@ fn make_date_inner<F: FnMut(i32)>(
day: i32,
mut date_consumer_fn: F,
) -> Result<()> {
- let Ok(m) = u32::try_from(month) else {
- return exec_err!("Month value '{month:?}' is out of range");
+ let m = match month {
+ 1..=12 => month as u32,
+ _ => return exec_err!("Month value '{month:?}' is out of range"),
};
- let Ok(d) = u32::try_from(day) else {
- return exec_err!("Day value '{day:?}' is out of range");
+ let d = match day {
+ 1..=31 => day as u32,
+ _ => return exec_err!("Day value '{day:?}' is out of range"),
};
if let Some(date) = NaiveDate::from_ymd_opt(year, m, d) {
@@ -225,180 +205,3 @@ fn make_date_inner<F: FnMut(i32)>(
exec_err!("Unable to parse date from {year}, {month}, {day}")
}
}
-
-#[cfg(test)]
-mod tests {
- use crate::datetime::make_date::MakeDateFunc;
- use arrow::array::{Array, Date32Array, Int32Array, Int64Array,
UInt32Array};
- use arrow::datatypes::{DataType, Field};
- use datafusion_common::config::ConfigOptions;
- use datafusion_common::{DataFusionError, ScalarValue};
- use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
- use std::sync::Arc;
-
- fn invoke_make_date_with_args(
- args: Vec<ColumnarValue>,
- number_rows: usize,
- ) -> Result<ColumnarValue, DataFusionError> {
- let arg_fields = args
- .iter()
- .map(|arg| Field::new("a", arg.data_type(), true).into())
- .collect::<Vec<_>>();
- let args = datafusion_expr::ScalarFunctionArgs {
- args,
- arg_fields,
- number_rows,
- return_field: Field::new("f", DataType::Date32, true).into(),
- config_options: Arc::new(ConfigOptions::default()),
- };
- MakeDateFunc::new().invoke_with_args(args)
- }
-
- #[test]
- fn test_make_date() {
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::Int32(Some(2024))),
- ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
- ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
- ],
- 1,
- )
- .expect("that make_date parsed values without error");
-
- if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
- assert_eq!(19736, date.unwrap());
- } else {
- panic!("Expected a scalar value")
- }
-
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::Int64(Some(2024))),
- ColumnarValue::Scalar(ScalarValue::UInt64(Some(1))),
- ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
- ],
- 1,
- )
- .expect("that make_date parsed values without error");
-
- if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
- assert_eq!(19736, date.unwrap());
- } else {
- panic!("Expected a scalar value")
- }
-
- let res = invoke_make_date_with_args(
- vec![
-
ColumnarValue::Scalar(ScalarValue::Utf8(Some("2024".to_string()))),
-
ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("1".to_string()))),
-
ColumnarValue::Scalar(ScalarValue::Utf8(Some("14".to_string()))),
- ],
- 1,
- )
- .expect("that make_date parsed values without error");
-
- if let ColumnarValue::Scalar(ScalarValue::Date32(date)) = res {
- assert_eq!(19736, date.unwrap());
- } else {
- panic!("Expected a scalar value")
- }
-
- let years = Arc::new((2021..2025).map(Some).collect::<Int64Array>());
- let months = Arc::new((1..5).map(Some).collect::<Int32Array>());
- let days = Arc::new((11..15).map(Some).collect::<UInt32Array>());
- let batch_len = years.len();
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Array(years),
- ColumnarValue::Array(months),
- ColumnarValue::Array(days),
- ],
- batch_len,
- )
- .unwrap();
-
- if let ColumnarValue::Array(array) = res {
- assert_eq!(array.len(), 4);
- let mut builder = Date32Array::builder(4);
- builder.append_value(18_638);
- builder.append_value(19_035);
- builder.append_value(19_429);
- builder.append_value(19_827);
- assert_eq!(&builder.finish() as &dyn Array, array.as_ref());
- } else {
- panic!("Expected a columnar array")
- }
-
- //
- // Fallible test cases
- //
-
- // invalid number of arguments
- let res = invoke_make_date_with_args(
- vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))],
- 1,
- );
- assert_eq!(
- res.err().unwrap().strip_backtrace(),
- "Execution error: make_date function requires 3 arguments, got 1"
- );
-
- // invalid type
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
-
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
-
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
- ],
- 1,
- );
- assert_eq!(
- res.err().unwrap().strip_backtrace(),
- "Arrow error: Cast error: Casting from Interval(YearMonth) to
Int32 not supported"
- );
-
- // overflow of month
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
- ColumnarValue::Scalar(ScalarValue::UInt64(Some(u64::MAX))),
- ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
- ],
- 1,
- );
- assert_eq!(
- res.err().unwrap().strip_backtrace(),
- "Arrow error: Cast error: Can't cast value 18446744073709551615 to
type Int32"
- );
-
- // overflow of day
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
- ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),
- ColumnarValue::Scalar(ScalarValue::UInt32(Some(u32::MAX))),
- ],
- 1,
- );
- assert_eq!(
- res.err().unwrap().strip_backtrace(),
- "Arrow error: Cast error: Can't cast value 4294967295 to type
Int32"
- );
- }
-
- #[test]
- fn test_make_date_null_param() {
- let res = invoke_make_date_with_args(
- vec![
- ColumnarValue::Scalar(ScalarValue::Null),
- ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
- ColumnarValue::Scalar(ScalarValue::UInt32(Some(14))),
- ],
- 1,
- )
- .expect("that make_date parsed values without error");
-
- assert!(matches!(res, ColumnarValue::Scalar(ScalarValue::Null)));
- }
-}
diff --git a/datafusion/functions/src/datetime/make_time.rs
b/datafusion/functions/src/datetime/make_time.rs
index 7083bf3185..5775ce851a 100644
--- a/datafusion/functions/src/datetime/make_time.rs
+++ b/datafusion/functions/src/datetime/make_time.rs
@@ -90,10 +90,7 @@ impl MakeTimeFunc {
NativeType::Int32,
);
Self {
- signature: Signature::coercible(
- vec![int.clone(), int.clone(), int.clone()],
- Volatility::Immutable,
- ),
+ signature: Signature::coercible(vec![int; 3],
Volatility::Immutable),
}
}
}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 6bed34e0a2..6ad6c799f2 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2847,8 +2847,12 @@ select make_date(t.year, t.month, '4') from table_nums t;
statement ok
insert into table_nums values (2024, null, 23);
-query error DataFusion error: Execution error: Unable to parse date from 2024,
0, 23
+query D
select make_date(t.year, t.month, t.day) from table_nums t;
+----
+2024-01-23
+2023-11-30
+NULL
statement ok
drop table table_nums;
@@ -2867,48 +2871,71 @@ select make_date(t.year, t.month, t.day) from
table_strings t;
statement ok
insert into table_strings values (2024, null, 23);
-query error DataFusion error: Execution error: Unable to parse date from 2024,
0, 23
+query D
select make_date(t.year, t.month, t.day) from table_strings t;
+----
+2024-01-23
+2023-11-30
+NULL
statement ok
drop table table_strings;
-query error DataFusion error: Execution error: Unable to parse date from 2024,
13, 23
+query error DataFusion error: Execution error: Month value '13' is out of range
select make_date(2024, 13, 23);
-query error DataFusion error: Execution error: Unable to parse date from 2024,
1, 32
-select make_date(2024, 01, 32);
+query error DataFusion error: Execution error: Day value '32' is out of range
+select make_date(2024, 1, 32);
-query error DataFusion error: Execution error: Unable to parse date from 2024,
0, 23
+query error DataFusion error: Execution error: Month value '0' is out of range
select make_date(2024, 0, 23);
query error DataFusion error: Execution error: Month value '\-1' is out of
range
select make_date(2024, -1, 23);
-query error DataFusion error: Execution error: Unable to parse date from 2024,
12, 0
+query error DataFusion error: Execution error: Day value '0' is out of range
select make_date(2024, 12, 0);
-query error DataFusion error: Execution error: Day value '\-1' is out of range
+query error DataFusion error: Execution error: Month value '13' is out of range
select make_date(2024, 13, -1);
-query error DataFusion error: Execution error: Unable to parse date from
null/empty value
+query D
select make_date(null, 1, 23);
+----
+NULL
-query error DataFusion error: Arrow error: Cast error: Cannot cast string ''
to value of Int32 type
+query error Cast error: Cannot cast string '' to value of Int32 type
select make_date('', 1, 23);
-query error DataFusion error: Execution error: Unable to parse date from
null/empty value
+query D
select make_date(2024, null, 23);
+----
+NULL
-query error DataFusion error: Arrow error: Cast error: Cannot cast string ''
to value of Int32 type
+query error Arrow error: Cast error: Cannot cast string '' to value of Int32
type
select make_date(2024, '', 27);
-query error DataFusion error: Execution error: Unable to parse date from
null/empty value
+query D
select make_date(2024, 1, null);
+----
+NULL
-query error DataFusion error: Arrow error: Cast error: Cannot cast string ''
to value of Int32 type
+query error Arrow error: Cast error: Cannot cast string '' to value of Int32
type
select make_date(2024, 1, '');
+query error DataFusion error: Execution error: Unable to parse date from 2024,
11, 31
+select make_date(2024, 11, 31);
+
+query D
+select make_date(null, 1::bigint, 14::bigint unsigned);
+----
+NULL
+
+query error DataFusion error: Error during planning: Function 'make_date'
expects 3 arguments but received 1
+select make_date(1);
+
+query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(Int32\),
Int32\)\) but received NativeType::Interval\(MonthDayNano\), DataType:
Interval\(MonthDayNano\)
+select make_date(interval '1 day', '2001-05-21'::timestamp,
'2001-05-21'::timestamp);
##########
## make time tests
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]