Omega359 commented on code in PR #9019: URL: https://github.com/apache/arrow-datafusion/pull/9019#discussion_r1485601150
########## datafusion/sqllogictest/test_files/dates.slt: ########## @@ -107,3 +107,154 @@ query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs + +# to_date_test +statement ok +create table to_date_t1(ts bigint) as VALUES + (1235865600000), + (1235865660000), + (1238544000000); + +# query_cast_timestamp_millis +query D +SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3 +---- +2003-11-02 +2003-11-02 +2003-11-29 + +query D +SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); +---- +2023-01-13 + +statement error DataFusion error: Execution error: to_date function unsupported data type at index 1: List +SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+')); + +# query date with arrow_cast +query D +select to_date(arrow_cast(123, 'Int64')) +---- +1970-05-04 + +statement error DataFusion error: Arrow error: +SELECT to_date('21311111'); + +# verify date cast with integer input +query DDDDDD +SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), to_date(0-1) +---- +NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31 + +# verify date output types +query TTT +SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), arrow_typeof(to_date('2023-01-10 12:34:56.000')) +---- +Date32 Date32 Date32 + +# verify date data with formatting options +query DDDDDD +SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', '%s') +---- +NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31 + +# verify date data with formatting options +query DDDDDD +SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', '%s') +---- +NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31 + +# verify date output types with formatting options +query TTT +SELECT arrow_typeof(to_date(1, '%c', '%s')), arrow_typeof(to_date(null, '%+', '%s')), arrow_typeof(to_date('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f')) +---- +Date32 Date32 Date32 + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') Review Comment: This test is repeated - did you mean to change the following tests? ########## datafusion/sqllogictest/test_files/dates.slt: ########## @@ -107,3 +107,27 @@ query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs + +# to_date_test +statement ok +create table to_date_t1(ts bigint) as VALUES + (1235865600000), + (1235865660000), + (1238544000000); + + +# query_cast_timestamp_millis +query D +SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3 +---- +2003-11-02 +2003-11-02 +2003-11-29 + +query D +SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); +---- +2023-01-13 + +statement error DataFusion error: Internal error: to_date function unsupported data type at index 1: List +SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+')); Review Comment: I think there shjould be more tests of just dates without time or tz component. Need test to verify null, invalid formats, etc. ########## datafusion/sqllogictest/test_files/dates.slt: ########## @@ -107,3 +107,154 @@ query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs + +# to_date_test +statement ok +create table to_date_t1(ts bigint) as VALUES + (1235865600000), + (1235865660000), + (1238544000000); + +# query_cast_timestamp_millis +query D +SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3 +---- +2003-11-02 +2003-11-02 +2003-11-29 + +query D +SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); +---- +2023-01-13 + +statement error DataFusion error: Execution error: to_date function unsupported data type at index 1: List +SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+')); + +# query date with arrow_cast +query D +select to_date(arrow_cast(123, 'Int64')) +---- +1970-05-04 + +statement error DataFusion error: Arrow error: +SELECT to_date('21311111'); + +# verify date cast with integer input +query DDDDDD +SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), to_date(0-1) +---- +NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31 + +# verify date output types +query TTT +SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), arrow_typeof(to_date('2023-01-10 12:34:56.000')) +---- +Date32 Date32 Date32 + +# verify date data with formatting options +query DDDDDD +SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', '%s') +---- +NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31 + +# verify date data with formatting options +query DDDDDD +SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', '%s') +---- +NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31 + +# verify date output types with formatting options +query TTT +SELECT arrow_typeof(to_date(1, '%c', '%s')), arrow_typeof(to_date(null, '%+', '%s')), arrow_typeof(to_date('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f')) +---- +Date32 Date32 Date32 + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') + +# to_date with invalid formatting +query error input contains invalid characters +SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+') + +# to_date with broken formatting +query error bad or unsupported format string +SELECT to_date('2020-09-08 12/00/00+00:00', '%q') Review Comment: This test is repeated as well. ########## datafusion/physical-expr/src/datetime_expressions.rs: ########## @@ -396,6 +397,39 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> { string_to_timestamp_nanos(s).map_err(|e| e.into()) } +fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> { + match args.len() { + 1 => handle::<Date32Type, _, Date32Type>( + args, + |s| { + string_to_timestamp_nanos_shim(s) Review Comment: I still think for one-arg version of this impl we should just use the arrow function to cast from string -> date. Untested but something like this: `1 => cast_column(&args[0], &DataType::Date32, None),` ########## datafusion/physical-expr/src/datetime_expressions.rs: ########## @@ -423,6 +457,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>( } } +/// to_date SQL function +pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> { + to_date_impl(args, "to_date") Review Comment: Since there is only a single use of the impl method just inline it here ########## datafusion/physical-expr/src/datetime_expressions.rs: ########## @@ -386,11 +386,33 @@ where } } +// fn to_date_impl<T: ArrowTimestampType + ScalarType<i64>>( +// args: &[ColumnarValue], +// ) -> Result<ColumnarValue> { +// } + /// Calls string_to_timestamp_nanos and converts the error type fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> { string_to_timestamp_nanos(s).map_err(|e| e.into()) } +fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> { + match args.len() { + 1 => handle::<Date64Type, _, Date64Type>( + args, + |s| string_to_timestamp_nanos_shim(s).map(|n| n / 1_000_000), Review Comment: Hmm. While this may work I'm not 100% sure it's the best approach as it'll attempt to do a lot more things than I think is required for a simple date. Just casting the string to a date32 I am pretty sure will handle the parsing of the basic date in an optimized manner (see [parse.rs](https://github.com/apache/arrow-rs/blob/master/arrow-cast/src/parse.rs#L547) in the arrow-cast crate) ########## datafusion/physical-expr/src/datetime_expressions.rs: ########## @@ -1337,6 +1376,36 @@ fn validate_to_timestamp_data_types( None } +/// to_date SQL function implementation +pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> { + if args.is_empty() { + return exec_err!( + "to_date function requires 1 or more arguments, got {}", + args.len() + ); + } + + // validate that any args after the first one are Utf8 + if args.len() > 1 { + if let Some(value) = validate_to_timestamp_data_types(args, "to_date") { Review Comment: the validate method should likely be renamed as it won't be used just for timestamps now ########## datafusion/physical-expr/src/datetime_expressions.rs: ########## @@ -423,6 +457,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>( } } +/// to_date SQL function +pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> { Review Comment: Can we add some rustdoc with a dataframe based example to this? See [this as an example](https://github.com/apache/arrow-datafusion/blob/main/datafusion/physical-expr/src/regex_expressions.rs#L99) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org