Dandandan commented on a change in pull request #9359: URL: https://github.com/apache/arrow/pull/9359#discussion_r579655053
########## File path: rust/datafusion/src/physical_plan/datetime_expressions.rs ########## @@ -344,6 +347,98 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> { }) } +macro_rules! extract_date_part { + ($ARRAY: expr, $FN:expr) => { + match $ARRAY.data_type() { + DataType::Date32 => { + let array = $ARRAY.as_any().downcast_ref::<Date32Array>().unwrap(); + Ok($FN(array)?) + } + DataType::Date64 => { + let array = $ARRAY.as_any().downcast_ref::<Date64Array>().unwrap(); + Ok($FN(array)?) + } + DataType::Timestamp(time_unit, None) => match time_unit { + TimeUnit::Second => { + let array = $ARRAY + .as_any() + .downcast_ref::<TimestampSecondArray>() + .unwrap(); + Ok($FN(array)?) + } + TimeUnit::Millisecond => { + let array = $ARRAY + .as_any() + .downcast_ref::<TimestampMillisecondArray>() + .unwrap(); + Ok($FN(array)?) + } + TimeUnit::Microsecond => { + let array = $ARRAY + .as_any() + .downcast_ref::<TimestampMicrosecondArray>() + .unwrap(); + Ok($FN(array)?) + } + TimeUnit::Nanosecond => { + let array = $ARRAY + .as_any() + .downcast_ref::<TimestampNanosecondArray>() + .unwrap(); + Ok($FN(array)?) + } + }, + datatype => Err(DataFusionError::Internal(format!( + "Extract does not support datatype {:?}", + datatype + ))), + } + }; +} + +/// DATE_PART SQL function +pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> { + if args.len() != 2 { + return Err(DataFusionError::Execution( + "Expected two arguments in DATE_PART".to_string(), + )); + } + let (date_part, array) = (&args[0], &args[1]); + + let date_part = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = date_part { + v + } else { + return Err(DataFusionError::Execution( + "First argument of `DATE_PART` must be non-null scalar Utf8".to_string(), + )); + }; + + let is_scalar = matches!(array, ColumnarValue::Scalar(_)); + + let array = match array { Review comment: Yes, indeed. For now we can use this approach to avoid reimplementing hours/years etc, with a bit of overhead. Maybe longer term would be nice to have something like `Datum` in Arrow in order to both gain some performance and avoid reimplementing things for the scalar case. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org