Omega359 commented on code in PR #9019:
URL: https://github.com/apache/arrow-datafusion/pull/9019#discussion_r1485601150


##########
datafusion/sqllogictest/test_files/dates.slt:
##########
@@ -107,3 +107,154 @@ query ?
 SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
 ----
 730 days 0 hours 0 mins 0.000000000 secs
+
+# to_date_test
+statement ok
+create table to_date_t1(ts bigint) as VALUES
+   (1235865600000),
+   (1235865660000),
+   (1238544000000);
+
+# query_cast_timestamp_millis
+query D
+SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
+----
+2003-11-02
+2003-11-02
+2003-11-29
+
+query D
+SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', 
'%m-%d-%Y %H:%M:%S%#z');
+----
+2023-01-13
+
+statement error DataFusion error: Execution error: to_date function 
unsupported data type at index 1: List
+SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y 
%H:%M:%S%#z', '%+'));
+
+# query date with arrow_cast
+query D
+select to_date(arrow_cast(123, 'Int64'))
+----
+1970-05-04
+
+statement error DataFusion error: Arrow error:
+SELECT to_date('21311111');
+
+# verify date cast with integer input
+query DDDDDD
+SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), 
to_date(0-1)
+----
+NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date output types
+query TTT
+SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), 
arrow_typeof(to_date('2023-01-10 12:34:56.000'))
+----
+Date32 Date32 Date32
+
+# verify date data with formatting options
+query DDDDDD
+SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), 
to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', 
'%s')
+----
+NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31
+
+# verify date data with formatting options
+query DDDDDD
+SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), 
to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', 
'%s')
+----
+NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31
+
+# verify date output types with formatting options
+query TTT
+SELECT arrow_typeof(to_date(1, '%c', '%s')), arrow_typeof(to_date(null, '%+', 
'%s')), arrow_typeof(to_date('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
+----
+Date32 Date32 Date32
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')

Review Comment:
   This test is repeated - did you mean to change the following tests?



##########
datafusion/sqllogictest/test_files/dates.slt:
##########
@@ -107,3 +107,27 @@ query ?
 SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
 ----
 730 days 0 hours 0 mins 0.000000000 secs
+
+# to_date_test
+statement ok
+create table to_date_t1(ts bigint) as VALUES
+   (1235865600000),
+   (1235865660000),
+   (1238544000000);
+
+
+# query_cast_timestamp_millis
+query D
+SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
+----
+2003-11-02
+2003-11-02
+2003-11-29
+
+query D
+SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', 
'%m-%d-%Y %H:%M:%S%#z');
+----
+2023-01-13
+
+statement error DataFusion error: Internal error: to_date function unsupported 
data type at index 1: List
+SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y 
%H:%M:%S%#z', '%+'));

Review Comment:
   I think there shjould be more tests of just dates without time or tz 
component. Need test to verify null, invalid formats, etc.



##########
datafusion/sqllogictest/test_files/dates.slt:
##########
@@ -107,3 +107,154 @@ query ?
 SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
 ----
 730 days 0 hours 0 mins 0.000000000 secs
+
+# to_date_test
+statement ok
+create table to_date_t1(ts bigint) as VALUES
+   (1235865600000),
+   (1235865660000),
+   (1238544000000);
+
+# query_cast_timestamp_millis
+query D
+SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
+----
+2003-11-02
+2003-11-02
+2003-11-29
+
+query D
+SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', 
'%m-%d-%Y %H:%M:%S%#z');
+----
+2023-01-13
+
+statement error DataFusion error: Execution error: to_date function 
unsupported data type at index 1: List
+SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y 
%H:%M:%S%#z', '%+'));
+
+# query date with arrow_cast
+query D
+select to_date(arrow_cast(123, 'Int64'))
+----
+1970-05-04
+
+statement error DataFusion error: Arrow error:
+SELECT to_date('21311111');
+
+# verify date cast with integer input
+query DDDDDD
+SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), 
to_date(0-1)
+----
+NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31
+
+# verify date output types
+query TTT
+SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), 
arrow_typeof(to_date('2023-01-10 12:34:56.000'))
+----
+Date32 Date32 Date32
+
+# verify date data with formatting options
+query DDDDDD
+SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), 
to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', 
'%s')
+----
+NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31
+
+# verify date data with formatting options
+query DDDDDD
+SELECT to_date(null, '%+'), to_date(0, '%s'), to_date(192663, '%s'), 
to_date(1, '%+', '%s'), to_date(-1, '%c', '%+', '%s'), to_date(0-1, '%c', '%+', 
'%s')
+----
+NULL 1970-01-01 2497-06-29 1970-01-02 1969-12-31 1969-12-31
+
+# verify date output types with formatting options
+query TTT
+SELECT arrow_typeof(to_date(1, '%c', '%s')), arrow_typeof(to_date(null, '%+', 
'%s')), arrow_typeof(to_date('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
+----
+Date32 Date32 Date32
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_date with invalid formatting
+query error input contains invalid characters
+SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
+
+# to_date with broken formatting
+query error bad or unsupported format string
+SELECT to_date('2020-09-08 12/00/00+00:00', '%q')

Review Comment:
   This test is repeated as well.



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -396,6 +397,39 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
     string_to_timestamp_nanos(s).map_err(|e| e.into())
 }
 
+fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
+    match args.len() {
+        1 => handle::<Date32Type, _, Date32Type>(
+            args,
+            |s| {
+                string_to_timestamp_nanos_shim(s)

Review Comment:
   I still think for one-arg version of this impl we should just use the arrow 
function to cast from string -> date. Untested but something like this:
   `1 => cast_column(&args[0], &DataType::Date32, None),`



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -423,6 +457,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + 
ScalarType<i64>>(
     }
 }
 
+/// to_date SQL function
+pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    to_date_impl(args, "to_date")

Review Comment:
   Since there is only a single use of the impl method just inline it here



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -386,11 +386,33 @@ where
     }
 }
 
+// fn to_date_impl<T: ArrowTimestampType + ScalarType<i64>>(
+//     args: &[ColumnarValue],
+// ) -> Result<ColumnarValue> {
+// }
+
 /// Calls string_to_timestamp_nanos and converts the error type
 fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
     string_to_timestamp_nanos(s).map_err(|e| e.into())
 }
 
+fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
+    match args.len() {
+        1 => handle::<Date64Type, _, Date64Type>(
+            args,
+            |s| string_to_timestamp_nanos_shim(s).map(|n| n / 1_000_000),

Review Comment:
   Hmm. While this may work I'm not 100% sure it's the best approach as it'll 
attempt to do a lot more things than I think is required for a simple date. 
Just casting the string to a date32 I am pretty sure will handle the parsing of 
the basic date in an optimized manner (see 
[parse.rs](https://github.com/apache/arrow-rs/blob/master/arrow-cast/src/parse.rs#L547)
 in the arrow-cast crate)



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -1337,6 +1376,36 @@ fn validate_to_timestamp_data_types(
     None
 }
 
+/// to_date SQL function implementation
+pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.is_empty() {
+        return exec_err!(
+            "to_date function requires 1 or more arguments, got {}",
+            args.len()
+        );
+    }
+
+    // validate that any args after the first one are Utf8
+    if args.len() > 1 {
+        if let Some(value) = validate_to_timestamp_data_types(args, "to_date") 
{

Review Comment:
   the validate method should likely be renamed as it won't be used just for 
timestamps now



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -423,6 +457,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + 
ScalarType<i64>>(
     }
 }
 
+/// to_date SQL function
+pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {

Review Comment:
   Can we add some rustdoc with a dataframe based example to this? See [this as 
an 
example](https://github.com/apache/arrow-datafusion/blob/main/datafusion/physical-expr/src/regex_expressions.rs#L99)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to