This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5a01e68643 Update to_date udf function to support a consistent set of
argument types (#19134)
5a01e68643 is described below
commit 5a01e68643a198a1aaa7124524d7be5be7df24ec
Author: Bruce Ritchie <[email protected]>
AuthorDate: Fri Dec 12 18:37:38 2025 -0500
Update to_date udf function to support a consistent set of argument types
(#19134)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #19120
## Rationale for this change
Improved type support for to_date function.
## What changes are included in this PR?
Code, slt, updated docs.
## Are these changes tested?
Yes
## Are there any user-facing changes?
More types supported.
---
datafusion/functions/src/datetime/to_date.rs | 39 +++++++++-
datafusion/sqllogictest/test_files/dates.slt | 102 +++++++++++++++++++++++++
docs/source/user-guide/sql/scalar_functions.md | 2 +-
3 files changed, 140 insertions(+), 3 deletions(-)
diff --git a/datafusion/functions/src/datetime/to_date.rs
b/datafusion/functions/src/datetime/to_date.rs
index ac95bbd0d0..471995089b 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -16,10 +16,12 @@
// under the License.
use crate::datetime::common::*;
+use arrow::compute::cast_with_options;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::*;
use arrow::error::ArrowError::ParseError;
use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
+use datafusion_common::format::DEFAULT_CAST_OPTIONS;
use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -30,7 +32,7 @@ use std::any::Any;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r"Converts a value to a date (`YYYY-MM-DD`).
-Supports strings, integer and double types as input.
+Supports strings, numeric and timestamp types as input.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono
format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are
provided.
Integers and doubles are interpreted as days since the unix epoch
(`1970-01-01T00:00:00Z`).
Returns the corresponding date.
@@ -144,9 +146,42 @@ impl ScalarUDFImpl for ToDateFunc {
}
match args[0].data_type() {
- Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
+ Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
args[0].cast_to(&Date32, None)
}
+ UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
+ // Arrow cast doesn't support direct casting of these types to
date32
+ // as it only supports Int32 and Int64. To work around that
limitation,
+ // use cast_with_options to cast to Int32 and then cast the
result of
+ // that to Date32.
+ match &args[0] {
+ ColumnarValue::Array(array) => {
+ Ok(ColumnarValue::Array(cast_with_options(
+ &cast_with_options(&array, &Int32,
&DEFAULT_CAST_OPTIONS)?,
+ &Date32,
+ &DEFAULT_CAST_OPTIONS,
+ )?))
+ }
+ ColumnarValue::Scalar(scalar) => {
+ let sv =
+ scalar.cast_to_with_options(&Int32,
&DEFAULT_CAST_OPTIONS)?;
+ Ok(ColumnarValue::Scalar(
+ sv.cast_to_with_options(&Date32,
&DEFAULT_CAST_OPTIONS)?,
+ ))
+ }
+ }
+ }
+ Float16
+ | Float32
+ | Float64
+ | Decimal32(_, _)
+ | Decimal64(_, _)
+ | Decimal128(_, _)
+ | Decimal256(_, _) => {
+ // The only way this makes sense is to get the Int64 value of
the float
+ // or decimal and then cast that to Date32.
+ args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
+ }
Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
other => {
exec_err!("Unsupported data type {} for function to_date",
other)
diff --git a/datafusion/sqllogictest/test_files/dates.slt
b/datafusion/sqllogictest/test_files/dates.slt
index abf64675e9..6ba34cfcac 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -164,12 +164,114 @@ SELECT to_date('21311111');
statement error DataFusion error: Arrow error:
SELECT to_date('213111111');
+# verify date cast with tinyint input
+query DDDDDD
+SELECT to_date(null::tinyint), to_date(0::tinyint), to_date(19::tinyint),
to_date(1::tinyint), to_date(-1::tinyint), to_date((0-1)::tinyint)
+----
+NULL 1970-01-01 1970-01-20 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with smallint input
+query DDDDDD
+SELECT to_date(null::smallint), to_date(0::smallint),
to_date(19234::smallint), to_date(1::smallint), to_date(-1::smallint),
to_date((0-1)::smallint)
+----
+NULL 1970-01-01 2022-08-30 1970-01-02 1969-12-31 1969-12-31
+
# verify date cast with integer input
query DDDDDD
SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1),
to_date(0-1)
----
NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31
+# verify date cast with bigint input
+query DDDDDD
+SELECT to_date(null::bigint), to_date(0::bigint), to_date(191234::bigint),
to_date(1::bigint), to_date(-1::bigint), to_date((0-1)::bigint)
+----
+NULL 1970-01-01 2493-07-31 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with unsigned tinyint input
+query DDDD
+SELECT to_date(null::tinyint unsigned), to_date(0::tinyint unsigned),
to_date(192::tinyint unsigned), to_date(1::tinyint unsigned)
+----
+NULL 1970-01-01 1970-07-12 1970-01-02
+
+# verify date cast with unsigned smallint input
+query DDDD
+SELECT to_date(null::smallint unsigned), to_date(0::smallint unsigned),
to_date(19260::smallint unsigned), to_date(1::smallint unsigned)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02
+
+# verify date cast with unsigned int input
+query DDDD
+SELECT to_date(null::int unsigned), to_date(0::int unsigned),
to_date(19260::int unsigned), to_date(1::int unsigned)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02
+
+# verify date cast with unsigned bigint input
+query DDDD
+SELECT to_date(null::bigint unsigned), to_date(0::bigint unsigned),
to_date(19260000::bigint unsigned), to_date(1::bigint unsigned)
+----
+NULL 1970-01-01 +54702-02-03 1970-01-02
+
+# verify date cast with real input (float32)
+query DDDDDD
+SELECT to_date(null::real), to_date(0.0::real), to_date(19260.1::real),
to_date(1.1::real), to_date(-1.1::real), to_date(0-1.1::real)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with double input (float64)
+query DDDDDD
+SELECT to_date(null::double), to_date(0.0::double), to_date(19260.1::double),
to_date(1.1::double), to_date(-1.1::double), to_date(0-1.1::double)
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with decimal32 input (Decimal32)
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal32(8,2)')), to_date(arrow_cast(0.0,
'Decimal32(8,2)')), to_date(arrow_cast(19260.1, 'Decimal32(8,2)')),
to_date(arrow_cast(1.1, 'Decimal32(8,2)')), to_date(arrow_cast(-1.1,
'Decimal32(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal32(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal64 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal64(8,2)')), to_date(arrow_cast(0.0,
'Decimal64(8,2)')), to_date(arrow_cast(19260.1, 'Decimal64(8,2)')),
to_date(arrow_cast(1.1, 'Decimal64(8,2)')), to_date(arrow_cast(-1.1,
'Decimal64(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal64(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal128 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal128(8,2)')), to_date(arrow_cast(0.0,
'Decimal128(8,2)')), to_date(arrow_cast(19260.1, 'Decimal128(8,2)')),
to_date(arrow_cast(1.1, 'Decimal128(8,2)')), to_date(arrow_cast(-1.1,
'Decimal128(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal128(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with decimal input (Decimal128)
+query DDDDDD
+SELECT to_date(null::decimal(10,2)), to_date(0.0::decimal(10,2)),
to_date(19260.1::decimal(10,2)), to_date(1.1::decimal(10,2)),
to_date(-1.1::decimal(10,2)), to_date(0-1.1::decimal(10,2))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with Decimal256 input
+query DDDDDD
+SELECT to_date(arrow_cast(null, 'Decimal256(8,2)')), to_date(arrow_cast(0.0,
'Decimal256(8,2)')), to_date(arrow_cast(19260.1, 'Decimal256(8,2)')),
to_date(arrow_cast(1.1, 'Decimal256(8,2)')), to_date(arrow_cast(-1.1,
'Decimal256(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal256(8,2)'))
+----
+NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date cast with date input
+query DDDD
+SELECT to_date('2024-12-1'::date), to_date('1920-01-12'::date),
to_date('1970-01-01'::date), to_date('-0200-07-22'::date)
+----
+2024-12-01 1920-01-12 1970-01-01 -0200-07-22
+
+# verify date cast with date64 input
+query DDDD
+SELECT to_date(arrow_cast('2024-12-1', 'Date64')),
to_date(arrow_cast('1920-01-12', 'Date64')), to_date(arrow_cast('1970-01-01',
'Date64')), to_date(arrow_cast(-863999913600000, 'Date64'))
+----
+2024-12-01 1920-01-12 1970-01-01 -25410-12-07
+
+# verify date cast with timestamp input
+query DD
+SELECT to_date('2024-12-01T00:32:45'::timestamp),
to_date('1677-12-01T00:32:45'::timestamp)
+----
+2024-12-01 1677-12-01
+
# verify date output types
query TTT
SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)),
arrow_typeof(to_date('2023-01-10 12:34:56.000'))
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index bcf7e6f645..c0e3cd6737 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2709,7 +2709,7 @@ Additional examples can be found
[here](https://github.com/apache/datafusion/blo
### `to_date`
Converts a value to a date (`YYYY-MM-DD`).
-Supports strings, integer and double types as input.
+Supports strings, numeric and timestamp types as input.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono
format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are
provided.
Integers and doubles are interpreted as days since the unix epoch
(`1970-01-01T00:00:00Z`).
Returns the corresponding date.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]