This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 89ca58326 feat: interval add timestamp (#5491)
89ca58326 is described below
commit 89ca58326f5cc3901c33b522ff3848af47ebf4b2
Author: Alex Huang <[email protected]>
AuthorDate: Wed Mar 8 21:11:58 2023 +0100
feat: interval add timestamp (#5491)
* feat: interval add timestamp
* add end-to-end tests
* update interval test and coercion rule
---
datafusion/common/src/scalar.rs | 38 ++++++++++++++++
.../tests/sqllogictests/test_files/timestamps.slt | 41 +++++++++++++++++
datafusion/expr/src/type_coercion.rs | 5 +++
datafusion/expr/src/type_coercion/binary.rs | 51 +++++++++++++++-------
datafusion/physical-expr/src/planner.rs | 10 +++++
5 files changed, 129 insertions(+), 16 deletions(-)
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 3f2f4bb39..5dc042586 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -516,18 +516,34 @@ macro_rules! impl_op {
let value = seconds_add(*ts_s, $RHS, get_sign!($OPERATION))?;
Ok(ScalarValue::TimestampSecond(Some(value), zone.clone()))
}
+ (_, ScalarValue::TimestampSecond(Some(ts_s), zone)) => {
+ let value = seconds_add(*ts_s, $LHS, get_sign!($OPERATION))?;
+ Ok(ScalarValue::TimestampSecond(Some(value), zone.clone()))
+ }
(ScalarValue::TimestampMillisecond(Some(ts_ms), zone), _) => {
let value = milliseconds_add(*ts_ms, $RHS,
get_sign!($OPERATION))?;
Ok(ScalarValue::TimestampMillisecond(Some(value),
zone.clone()))
}
+ (_, ScalarValue::TimestampMillisecond(Some(ts_ms), zone)) => {
+ let value = milliseconds_add(*ts_ms, $LHS,
get_sign!($OPERATION))?;
+ Ok(ScalarValue::TimestampMillisecond(Some(value),
zone.clone()))
+ }
(ScalarValue::TimestampMicrosecond(Some(ts_us), zone), _) => {
let value = microseconds_add(*ts_us, $RHS,
get_sign!($OPERATION))?;
Ok(ScalarValue::TimestampMicrosecond(Some(value),
zone.clone()))
}
+ (_, ScalarValue::TimestampMicrosecond(Some(ts_us), zone)) => {
+ let value = microseconds_add(*ts_us, $LHS,
get_sign!($OPERATION))?;
+ Ok(ScalarValue::TimestampMicrosecond(Some(value),
zone.clone()))
+ }
(ScalarValue::TimestampNanosecond(Some(ts_ns), zone), _) => {
let value = nanoseconds_add(*ts_ns, $RHS,
get_sign!($OPERATION))?;
Ok(ScalarValue::TimestampNanosecond(Some(value), zone.clone()))
}
+ (_, ScalarValue::TimestampNanosecond(Some(ts_ns), zone)) => {
+ let value = nanoseconds_add(*ts_ns, $LHS,
get_sign!($OPERATION))?;
+ Ok(ScalarValue::TimestampNanosecond(Some(value), zone.clone()))
+ }
_ => Err(DataFusionError::Internal(format!(
"Operator {} is not implemented for types {:?} and {:?}",
stringify!($OPERATION),
@@ -2941,6 +2957,28 @@ mod tests {
Ok(())
}
+ #[test]
+ fn test_interval_add_timestamp() -> Result<()> {
+ let interval = ScalarValue::IntervalMonthDayNano(Some(123));
+ let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
+ let result = interval.add(×tamp)?;
+ let expect = timestamp.add(&interval)?;
+ assert_eq!(result, expect);
+
+ let interval = ScalarValue::IntervalYearMonth(Some(123));
+ let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
+ let result = interval.add(×tamp)?;
+ let expect = timestamp.add(&interval)?;
+ assert_eq!(result, expect);
+
+ let interval = ScalarValue::IntervalDayTime(Some(123));
+ let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
+ let result = interval.add(×tamp)?;
+ let expect = timestamp.add(&interval)?;
+ assert_eq!(result, expect);
+ Ok(())
+ }
+
#[test]
fn scalar_decimal_test() -> Result<()> {
let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index dce6213f7..1958adea1 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -191,3 +191,44 @@ query P
SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z');
----
2022-08-03T14:38:50
+
+# Test that interval can add a timestamp
+query P
+SELECT timestamp '2013-07-01 12:00:00' + INTERVAL '8' DAY;
+----
+2013-07-09T12:00:00
+
+query P
+SELECT '2000-01-01T00:00:00'::timestamp + INTERVAL '8' DAY;
+----
+2000-01-09T00:00:00
+
+query P
+SELECT '2000-01-01T00:00:00'::timestamp + INTERVAL '8' YEAR;
+----
+2008-01-01T00:00:00
+
+query P
+SELECT '2000-01-01T00:00:00'::timestamp + INTERVAL '8' MONTH;
+----
+2000-09-01T00:00:00
+
+query P
+SELECT INTERVAL '8' DAY + timestamp '2013-07-01 12:00:00';
+----
+2013-07-09T12:00:00
+
+query P
+SELECT INTERVAL '8' DAY + '2000-01-01T00:00:00'::timestamp;
+----
+2000-01-09T00:00:00
+
+query P
+SELECT INTERVAL '8' YEAR + '2000-01-01T00:00:00'::timestamp;
+----
+2008-01-01T00:00:00
+
+query P
+SELECT INTERVAL '8' MONTH + '2000-01-01T00:00:00'::timestamp;
+----
+2000-09-01T00:00:00
\ No newline at end of file
diff --git a/datafusion/expr/src/type_coercion.rs
b/datafusion/expr/src/type_coercion.rs
index 373f4a7a2..740b10e28 100644
--- a/datafusion/expr/src/type_coercion.rs
+++ b/datafusion/expr/src/type_coercion.rs
@@ -67,6 +67,11 @@ pub fn is_timestamp(dt: &DataType) -> bool {
matches!(dt, DataType::Timestamp(_, _))
}
+/// Determine whether the given data type 'dt' is a `Interval`.
+pub fn is_interval(dt: &DataType) -> bool {
+ matches!(dt, DataType::Interval(_))
+}
+
/// Determine whether the given data type `dt` is a `Date`.
pub fn is_date(dt: &DataType) -> bool {
matches!(dt, DataType::Date32 | DataType::Date64)
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index d2e081af1..5ee66837e 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -17,7 +17,7 @@
//! Coercion rules for matching argument types for binary operators
-use crate::type_coercion::{is_date, is_numeric, is_timestamp};
+use crate::type_coercion::{is_date, is_interval, is_numeric, is_timestamp};
use crate::Operator;
use arrow::compute::can_cast_types;
use arrow::datatypes::{
@@ -114,22 +114,12 @@ pub fn coerce_types(
| Operator::GtEq
| Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
Operator::Plus | Operator::Minus
- if is_date(lhs_type) || is_timestamp(lhs_type) =>
+ if is_date(lhs_type)
+ || is_date(rhs_type)
+ || is_timestamp(lhs_type)
+ || is_timestamp(rhs_type) =>
{
- match rhs_type {
- // timestamp/date +/- interval returns timestamp/date
- DataType::Interval(_) => Some(lhs_type.clone()),
- // providing more helpful error message
- DataType::Date32 | DataType::Date64 | DataType::Timestamp(_,
_) => {
- return Err(DataFusionError::Plan(
- format!(
- "'{lhs_type:?} {op} {rhs_type:?}' is an
unsupported operation. \
- addition/subtraction on dates/timestamps only
supported with interval types"
- ),
- ));
- }
- _ => None,
- }
+ temporal_add_sub_coercion(lhs_type, rhs_type, op)?
}
// for math expressions, the final value of the coercion is also the
return type
// because coercion favours higher information types
@@ -214,6 +204,35 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<D
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
}
+/// Return the output type from performing addition or subtraction operations
on temporal data types
+pub fn temporal_add_sub_coercion(
+ lhs_type: &DataType,
+ rhs_type: &DataType,
+ op: &Operator,
+) -> Result<Option<DataType>> {
+ // interval + date or timestamp
+ if is_interval(lhs_type) && (is_date(rhs_type) || is_timestamp(rhs_type)) {
+ return Ok(Some(rhs_type.clone()));
+ }
+
+ // date or timestamp + interval
+ if is_interval(rhs_type) && (is_date(lhs_type) || is_timestamp(lhs_type)) {
+ return Ok(Some(lhs_type.clone()));
+ }
+
+ // date or timestamp + date or timestamp
+ if (is_date(lhs_type) || is_timestamp(lhs_type))
+ && (is_date(rhs_type) || is_timestamp(rhs_type))
+ {
+ return Err(DataFusionError::Plan(
+ format!(
+ "'{lhs_type:?} {op} {rhs_type:?}' is an
unsupported operation. \
+ addition/subtraction on dates/timestamps only
supported with interval types"
+ ),));
+ }
+ Ok(None)
+}
+
/// Returns the output type of applying numeric operations such as `=`
/// to arguments `lhs_type` and `rhs_type` if one is numeric and one
/// is `Utf8`/`LargeUtf8`.
diff --git a/datafusion/physical-expr/src/planner.rs
b/datafusion/physical-expr/src/planner.rs
index 105de17c2..1fbd73b3b 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -197,6 +197,16 @@ pub fn create_physical_expr(
rhs,
input_schema,
)?)),
+ (
+ DataType::Interval(_),
+ Operator::Plus | Operator::Minus,
+ DataType::Date32 | DataType::Date64 |
DataType::Timestamp(_, _),
+ ) => Ok(Arc::new(DateTimeIntervalExpr::try_new(
+ rhs,
+ *op,
+ lhs,
+ input_schema,
+ )?)),
_ => {
// Note that the logical planner is responsible
// for type coercion on the arguments (e.g. if one