This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 4e32ab9c79 Fix discrepancy in Float64 to timestamp(9) casts for constants (#16639) 4e32ab9c79 is described below commit 4e32ab9c79af9e59d2ab6cf40180ca6a2cf64fbd Author: Piotr Findeisen <piotr.findei...@gmail.com> AuthorDate: Thu Jul 17 21:09:16 2025 +0200 Fix discrepancy in Float64 to timestamp(9) casts for constants (#16639) * Fix discrepancy in Float64 to timestamp(9) casts Before the change, when casting `Float64` value to `Timestamp(Nanosecond, None)`, the result would depend on whether the source value is constant-foldable scalar. This is because `ScalarValue.cast_to` had a special treatment for that source & destination type pair, producing a different result from the canonical one. * Test Float32 cast to timestamp ntz too * restore to_timestamp(double) behavior the function was not meant to be changed --- datafusion/common/src/scalar/mod.rs | 5 - datafusion/functions/src/datetime/to_timestamp.rs | 17 +++- datafusion/sqllogictest/test_files/timestamps.slt | 117 +++++++++++++++++++++- 3 files changed, 129 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 75a341bcb1..36a5e517f9 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3076,11 +3076,6 @@ impl ScalarValue { cast_options: &CastOptions<'static>, ) -> Result<Self> { let scalar_array = match (self, target_type) { - ( - ScalarValue::Float64(Some(float_ts)), - DataType::Timestamp(TimeUnit::Nanosecond, None), - ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64)) - .to_array()?, ( ScalarValue::Decimal128(Some(decimal_value), _, scale), DataType::Timestamp(time_unit, None), diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 8b26a1c259..9e2c46187e 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -19,12 +19,14 @@ use std::any::Any; use std::sync::Arc; use crate::datetime::common::*; +use arrow::array::Float64Array; use arrow::datatypes::DataType::*; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{ ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; +use datafusion_common::format::DEFAULT_CAST_OPTIONS; use datafusion_common::{exec_err, Result, ScalarType, ScalarValue}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, @@ -319,9 +321,22 @@ impl ScalarUDFImpl for ToTimestampFunc { Int32 | Int64 => args[0] .cast_to(&Timestamp(Second, None), None)? .cast_to(&Timestamp(Nanosecond, None), None), - Null | Float64 | Timestamp(_, None) => { + Null | Timestamp(_, None) => { args[0].cast_to(&Timestamp(Nanosecond, None), None) } + Float64 => { + let rescaled = arrow::compute::kernels::numeric::mul( + &args[0].to_array(1)?, + &arrow::array::Scalar::new(Float64Array::from(vec![ + 1_000_000_000f64, + ])), + )?; + Ok(ColumnarValue::Array(arrow::compute::cast_with_options( + &rescaled, + &Timestamp(Nanosecond, None), + &DEFAULT_CAST_OPTIONS, + )?)) + } Timestamp(_, Some(tz)) => { args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None) } diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 44d0f1f97d..e7911b38cb 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -176,6 +176,115 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01' 2000-01-01T01:01:01Z +########## +## cast tests +########## + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(0))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(0))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(3))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(3))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(6))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(6))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1 AS float) AS timestamp(9))) AS t1, + (SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t2, + (SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t3, + (SELECT CAST(CAST(1 AS double) AS timestamp(9))) AS t4, + (SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t5, + (SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t6 +) +---- +true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(0))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(0))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(3))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(3))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(6))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(6))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 + +query BPPPPPP +SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, * +FROM (SELECT + (SELECT CAST(CAST(1.125 AS float) AS timestamp(9))) AS t1, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2, + (SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3, + (SELECT CAST(CAST(1.125 AS double) AS timestamp(9))) AS t4, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5, + (SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6 +) +---- +true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 + + ########## ## to_timestamp tests ########## @@ -394,12 +503,12 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08 12 query PPP SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp as c3; ---- -1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 +1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2, (-1.1)::timestamp as c3; ---- -1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 +1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999 query PPP SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as c3; @@ -409,12 +518,12 @@ SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as query PPP SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2, 1.23456789::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as timestamp) as c2, 123456789.123456789::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 +1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789 # to_timestamp Decimal128 inputs --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org