andygrove commented on code in PR #335: URL: https://github.com/apache/datafusion-comet/pull/335#discussion_r1581837448
########## core/src/execution/datafusion/expressions/cast.rs: ########## @@ -222,3 +263,139 @@ impl PhysicalExpr for Cast { self.hash(&mut s); } } + +fn parse_timestamp(value: &str, eval_mode: EvalMode) -> CometResult<Option<i64>> { + let value = value.trim(); + if value.is_empty() { + return Ok(None); + } + + // Define regex patterns and corresponding parsing functions + let patterns = &[ + (Regex::new(r"^\d{4}$").unwrap(), parse_str_to_year_timestamp as fn(&str) -> CometResult<Option<i64>>), + (Regex::new(r"^\d{4}-\d{2}$").unwrap(), parse_str_to_month_timestamp), + (Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(), parse_str_to_day_timestamp), + (Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{1,2}$").unwrap(), parse_str_to_hour_timestamp), + (Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(), parse_str_to_minute_timestamp), + (Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(), parse_str_to_second_timestamp), + (Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(), parse_str_to_nanosecond_timestamp), + (Regex::new(r"^T\d{1,2}$").unwrap(), parse_str_to_time_only_timestamp), + ]; + + let mut timestamp = None; + + // Iterate through patterns and try matching + for (pattern, parse_func) in patterns { + if pattern.is_match(value) { + timestamp = parse_func(value)?; + break; + } + } + + if eval_mode == EvalMode::Ansi && timestamp.is_none() { + return Err(CometError::CastInvalidValue { + value: value.to_string(), + from_type: "STRING".to_string(), + to_type: "TIMESTAMP".to_string(), + }); + } + + Ok(Some(timestamp.unwrap())) +} + +fn parse_ymd_timestamp(year: i32, month: u32, day: u32) -> CometResult<Option<i64>> { + let datetime = chrono::NaiveDate::from_ymd_opt(year, month, day); + let timestamp = datetime.unwrap().and_hms_milli_opt(0, 0, 0, 0); + Ok(Some(timestamp.unwrap().and_utc().timestamp_millis())) +} + +fn parse_hms_timestamp(year: i32, month: u32, day: u32, hour: u32, minute: u32, second: u32, millisecond: u32) -> CometResult<Option<i64>> { + let datetime = chrono::NaiveDate::from_ymd_opt(year, month, day); + let timestamp = datetime.unwrap().and_hms_nano_opt(hour, minute, second, millisecond); + Ok(Some(timestamp.unwrap().and_utc().timestamp_millis())) +} + +fn get_timestamp_values(value: &str, timestamp_type: &str) -> CometResult<Option<i64>> { + let values: Vec<_> = value.split(|c| c == 'T' || c == '-' || c == ':' || c == '.' ).collect(); + let year = values[0].parse::<i32>().unwrap_or_default(); + let month = values.get(1).map_or(1, |m| m.parse::<u32>().unwrap_or(1)); + let day = values.get(2).map_or(1, |d| d.parse::<u32>().unwrap_or(1)); + let hour = values.get(3).map_or(0, |h| h.parse::<u32>().unwrap_or(0)); + let minute = values.get(4).map_or(0, |m| m.parse::<u32>().unwrap_or(0)); + let second = values.get(5).map_or(0, |s| s.parse::<u32>().unwrap_or(0)); + let millisecond = values.get(6).map_or(0, |ms| ms.parse::<u32>().unwrap_or(0)); + + match timestamp_type { + "year" => parse_ymd_timestamp(year, 1, 1), + "month" => parse_ymd_timestamp(year, month, 1), + "day" => parse_ymd_timestamp(year, month, day), + "hour" => parse_hms_timestamp(year, month, day, hour, 0, 0, 0), + "minute" => parse_hms_timestamp(year, month, day, hour, minute, 0, 0), + "second" => parse_hms_timestamp(year, month, day, hour, minute, second, 0), + "millisecond" => parse_hms_timestamp(year, month, day, hour, minute, second, millisecond), + _ => Err(CometError::CastInvalidValue { + value: value.to_string(), + from_type: "STRING".to_string(), + to_type: "TIMESTAMP".to_string(), + }), + } +} + +fn parse_str_to_year_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "year") +} + +fn parse_str_to_month_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "month") +} + +fn parse_str_to_day_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "day") +} + +fn parse_str_to_hour_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "hour") +} + +fn parse_str_to_minute_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "minute") +} + +fn parse_str_to_second_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "second") +} + +fn parse_str_to_nanosecond_timestamp(value: &str) -> CometResult<Option<i64>> { + get_timestamp_values(value, "millisecond") +} + +fn parse_str_to_time_only_timestamp(value: &str) -> CometResult<Option<i64>> { + let values: Vec<_> = value.split("T").collect(); + let time_values: Vec<_> = values[1].split(":").collect(); + let hour = time_values[0].parse::<u32>().unwrap(); + let minute = time_values.get(1).map_or(0, |m| m.parse::<u32>().unwrap_or(0)); + let second = time_values.get(2).map_or(0, |s| s.parse::<u32>().unwrap_or(0)); + let millisecond = time_values.get(3).map_or(0, |ms| ms.parse::<u32>().unwrap_or(0)); + let datetime = chrono::Local::now().to_utc().date_naive(); + let timestamp = datetime.and_hms_milli_opt(hour, minute, second, millisecond); + Ok(Some(timestamp.unwrap().and_utc().timestamp_millis())) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_timestamp_test() { Review Comment: Also you will need to fix my copy and paste error in this test and change `DoubleType` to `TimestampType` 🤦 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org