alamb commented on code in PR #12227: URL: https://github.com/apache/datafusion/pull/12227#discussion_r1747559413
########## datafusion/functions/src/datetime/to_date.rs: ########## @@ -118,3 +118,212 @@ impl ScalarUDFImpl for ToDateFunc { } } } + +#[cfg(test)] +mod tests { + use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; + use datafusion_common::ScalarValue; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + use super::ToDateFunc; + + #[test] + fn test_to_date_without_format() { + struct TestCase { + name: &'static str, + date_str: &'static str, + } + + let test_cases = vec![ + TestCase { + name: "Largest four-digit year (9999)", + date_str: "9999-12-31", + }, + TestCase { + name: "Year 1 (0001)", + date_str: "0001-12-31", + }, + TestCase { + name: "Year before epoch (1969)", + date_str: "1969-01-01", + }, + TestCase { + name: "Switch Julian/Gregorian calendar (1582-10-10)", + date_str: "1582-10-10", + }, + ]; + + for tc in &test_cases { + let date_scalar = ScalarValue::Utf8(Some(tc.date_str.to_string())); + let to_date_result = + ToDateFunc::new().invoke(&[ColumnarValue::Scalar(date_scalar)]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d"); + assert_eq!( + date_val, expected, + "{}: to_date created wrong value", + tc.name + ); + } + _ => panic!("Could not convert '{}' to Date", tc.date_str), + } + } + } + + #[test] + fn test_to_date_with_format() { + struct TestCase { + name: &'static str, + date_str: &'static str, + format_str: &'static str, + formatted_date: &'static str, + } + + let test_cases = vec![ + TestCase { + name: "Largest four-digit year (9999)", + date_str: "9999-12-31", + format_str: "%Y%m%d", + formatted_date: "99991231", + }, + TestCase { + name: "Smallest four-digit year (-9999)", + date_str: "-9999-12-31", + format_str: "%Y/%m/%d", + formatted_date: "-9999/12/31", + }, + TestCase { + name: "Year 1 (0001)", + date_str: "0001-12-31", + format_str: "%Y%m%d", + formatted_date: "00011231", + }, + TestCase { + name: "Year before epoch (1969)", + date_str: "1969-01-01", + format_str: "%Y%m%d", + formatted_date: "19690101", + }, + TestCase { + name: "Switch Julian/Gregorian calendar (1582-10-10)", + date_str: "1582-10-10", + format_str: "%Y%m%d", + formatted_date: "15821010", + }, + TestCase { + name: "Negative Year, BC (-42-01-01)", + date_str: "-42-01-01", + format_str: "%Y/%m/%d", + formatted_date: "-42/01/01", + }, + ]; + + for tc in &test_cases { + let formatted_date_scalar = + ScalarValue::Utf8(Some(tc.formatted_date.to_string())); + let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string())); + + let to_date_result = ToDateFunc::new().invoke(&[ + ColumnarValue::Scalar(formatted_date_scalar), + ColumnarValue::Scalar(format_scalar), + ]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d"); + assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str); + } + _ => panic!( + "Could not convert '{}' with format string '{}'to Date", + tc.date_str, tc.format_str + ), + } + } + } + + #[test] + fn test_to_date_multiple_format_strings() { + let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into())); + let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into())); + let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into())); + + let to_date_result = ToDateFunc::new().invoke(&[ + ColumnarValue::Scalar(formatted_date_scalar), + ColumnarValue::Scalar(format1_scalar), + ColumnarValue::Scalar(format2_scalar), + ]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d"); + assert_eq!( + date_val, expected, + "to_date created wrong value for date with 2 format strings" + ); + } + _ => panic!("Conversion failed",), + } + } + + #[test] + fn test_to_date_from_timestamp() { + let test_cases = vec![ + "2020-09-08T13:42:29Z", + "2020-09-08T13:42:29.190855-05:00", + "2020-09-08 12:13:29", + ]; + for date_str in test_cases { + let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into())); Review Comment: BTW I think you can write the same thing more concisely like ```suggestion let formatted_date_scalar = ScalarValue::from(date_str); ``` ########## datafusion/functions/src/datetime/to_date.rs: ########## @@ -118,3 +118,212 @@ impl ScalarUDFImpl for ToDateFunc { } } } + +#[cfg(test)] +mod tests { + use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; + use datafusion_common::ScalarValue; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + use super::ToDateFunc; + + #[test] + fn test_to_date_without_format() { + struct TestCase { + name: &'static str, + date_str: &'static str, + } + + let test_cases = vec![ + TestCase { + name: "Largest four-digit year (9999)", + date_str: "9999-12-31", + }, + TestCase { + name: "Year 1 (0001)", + date_str: "0001-12-31", + }, + TestCase { + name: "Year before epoch (1969)", + date_str: "1969-01-01", + }, + TestCase { + name: "Switch Julian/Gregorian calendar (1582-10-10)", + date_str: "1582-10-10", + }, + ]; + + for tc in &test_cases { + let date_scalar = ScalarValue::Utf8(Some(tc.date_str.to_string())); + let to_date_result = + ToDateFunc::new().invoke(&[ColumnarValue::Scalar(date_scalar)]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d"); + assert_eq!( + date_val, expected, + "{}: to_date created wrong value", + tc.name + ); + } + _ => panic!("Could not convert '{}' to Date", tc.date_str), + } + } + } + + #[test] + fn test_to_date_with_format() { + struct TestCase { + name: &'static str, + date_str: &'static str, + format_str: &'static str, + formatted_date: &'static str, + } + + let test_cases = vec![ + TestCase { + name: "Largest four-digit year (9999)", + date_str: "9999-12-31", + format_str: "%Y%m%d", + formatted_date: "99991231", + }, + TestCase { + name: "Smallest four-digit year (-9999)", + date_str: "-9999-12-31", + format_str: "%Y/%m/%d", + formatted_date: "-9999/12/31", + }, + TestCase { + name: "Year 1 (0001)", + date_str: "0001-12-31", + format_str: "%Y%m%d", + formatted_date: "00011231", + }, + TestCase { + name: "Year before epoch (1969)", + date_str: "1969-01-01", + format_str: "%Y%m%d", + formatted_date: "19690101", + }, + TestCase { + name: "Switch Julian/Gregorian calendar (1582-10-10)", + date_str: "1582-10-10", + format_str: "%Y%m%d", + formatted_date: "15821010", + }, + TestCase { + name: "Negative Year, BC (-42-01-01)", + date_str: "-42-01-01", + format_str: "%Y/%m/%d", + formatted_date: "-42/01/01", + }, + ]; + + for tc in &test_cases { + let formatted_date_scalar = + ScalarValue::Utf8(Some(tc.formatted_date.to_string())); + let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string())); + + let to_date_result = ToDateFunc::new().invoke(&[ + ColumnarValue::Scalar(formatted_date_scalar), + ColumnarValue::Scalar(format_scalar), + ]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d"); + assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str); + } + _ => panic!( + "Could not convert '{}' with format string '{}'to Date", + tc.date_str, tc.format_str + ), + } + } + } + + #[test] + fn test_to_date_multiple_format_strings() { + let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into())); + let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into())); + let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into())); + + let to_date_result = ToDateFunc::new().invoke(&[ + ColumnarValue::Scalar(formatted_date_scalar), + ColumnarValue::Scalar(format1_scalar), + ColumnarValue::Scalar(format2_scalar), + ]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { + let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d"); + assert_eq!( + date_val, expected, + "to_date created wrong value for date with 2 format strings" + ); + } + _ => panic!("Conversion failed",), + } + } + + #[test] + fn test_to_date_from_timestamp() { + let test_cases = vec![ + "2020-09-08T13:42:29Z", + "2020-09-08T13:42:29.190855-05:00", + "2020-09-08 12:13:29", + ]; + for date_str in test_cases { + let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into())); + + let to_date_result = + ToDateFunc::new().invoke(&[ColumnarValue::Scalar(formatted_date_scalar)]); + + match to_date_result { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => { Review Comment: The machinery to call the function is also a but clunky, I wonder if it would make the tests easier to read (as there would be less conversion machinery) if we added a function like ```rust /// Invokes ToDate on the input string, and returns the resulting Date32 value fn run_to_date(input: &str) -> Result<i32> { .. } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org