This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 21ec332842 support EXTRACT on intervals and durations (#12514)
21ec332842 is described below
commit 21ec3328428d4f27aa84abf7bd0c58332edcaed5
Author: Nick Cameron <[email protected]>
AuthorDate: Sat Sep 21 08:29:13 2024 +1200
support EXTRACT on intervals and durations (#12514)
Signed-off-by: Nick Cameron <[email protected]>
---
datafusion/functions/src/datetime/date_part.rs | 47 ++++++-
datafusion/sqllogictest/test_files/expr.slt | 183 +++++++++++++++++++++++++
2 files changed, 224 insertions(+), 6 deletions(-)
diff --git a/datafusion/functions/src/datetime/date_part.rs
b/datafusion/functions/src/datetime/date_part.rs
index 8ee82d8726..b6a9a1c7e9 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -23,8 +23,10 @@ use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
use arrow::compute::{binary, cast, date_part, DatePart};
use arrow::datatypes::DataType::{
- Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
+ Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp,
Utf8,
+ Utf8View,
};
+use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth};
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{DataType, TimeUnit};
@@ -109,6 +111,20 @@ impl DatePartFunc {
Exact(vec![Utf8View, Time64(Microsecond)]),
Exact(vec![Utf8, Time64(Nanosecond)]),
Exact(vec![Utf8View, Time64(Nanosecond)]),
+ Exact(vec![Utf8, Interval(YearMonth)]),
+ Exact(vec![Utf8View, Interval(YearMonth)]),
+ Exact(vec![Utf8, Interval(DayTime)]),
+ Exact(vec![Utf8View, Interval(DayTime)]),
+ Exact(vec![Utf8, Interval(MonthDayNano)]),
+ Exact(vec![Utf8View, Interval(MonthDayNano)]),
+ Exact(vec![Utf8, Duration(Second)]),
+ Exact(vec![Utf8View, Duration(Second)]),
+ Exact(vec![Utf8, Duration(Millisecond)]),
+ Exact(vec![Utf8View, Duration(Millisecond)]),
+ Exact(vec![Utf8, Duration(Microsecond)]),
+ Exact(vec![Utf8View, Duration(Microsecond)]),
+ Exact(vec![Utf8, Duration(Nanosecond)]),
+ Exact(vec![Utf8View, Duration(Nanosecond)]),
],
Volatility::Immutable,
),
@@ -224,10 +240,28 @@ fn seconds(array: &dyn Array, unit: TimeUnit) ->
Result<ArrayRef> {
let subsecs = date_part(array, DatePart::Nanosecond)?;
let subsecs = as_int32_array(subsecs.as_ref())?;
- let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
- (secs as f64 + (subsecs as f64 / 1_000_000_000_f64)) * sf
- })?;
- Ok(Arc::new(r))
+ // Special case where there are no nulls.
+ if subsecs.null_count() == 0 {
+ let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
+ (secs as f64 + ((subsecs % 1_000_000_000) as f64 /
1_000_000_000_f64)) * sf
+ })?;
+ Ok(Arc::new(r))
+ } else {
+ // Nulls in secs are preserved, nulls in subsecs are treated as zero
to account for the case
+ // where the number of nanoseconds overflows.
+ let r: Float64Array = secs
+ .iter()
+ .zip(subsecs)
+ .map(|(secs, subsecs)| {
+ secs.map(|secs| {
+ let subsecs = subsecs.unwrap_or(0);
+ (secs as f64 + ((subsecs % 1_000_000_000) as f64 /
1_000_000_000_f64))
+ * sf
+ })
+ })
+ .collect();
+ Ok(Arc::new(r))
+ }
}
fn epoch(array: &dyn Array) -> Result<ArrayRef> {
@@ -256,7 +290,8 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Time64(Nanosecond) => {
as_time64_nanosecond_array(array)?.unary(|x| x as f64 /
1_000_000_000_f64)
}
- d => return exec_err!("Can not convert {d:?} to epoch"),
+ Interval(_) | Duration(_) => return seconds(array, Second),
+ d => return exec_err!("Cannot convert {d:?} to epoch"),
};
Ok(Arc::new(f))
}
diff --git a/datafusion/sqllogictest/test_files/expr.slt
b/datafusion/sqllogictest/test_files/expr.slt
index e8d8329d34..182afff7a6 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -1472,6 +1472,189 @@ SELECT extract(epoch from arrow_cast('1969-12-31',
'Date64'))
----
-86400
+# test_extract_interval
+
+query R
+SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)'))
+----
+10
+
+query R
+SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)'))
+----
+0
+
+query R
+SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)'))
+----
+0
+
+query R
+SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)'))
+----
+10
+
+query R
+SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)'))
+----
+1
+
+query R
+SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)'))
+----
+8
+
+query error DataFusion error: Arrow error: Compute error: Year does not
support: Interval\(DayTime\)
+SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)'))
+
+query error DataFusion error: Arrow error: Compute error: Month does not
support: Interval\(DayTime\)
+SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)'))
+
+query R
+SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)'))
+----
+10
+
+query R
+SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+0
+
+query R
+SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+14400
+
+query R
+SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
+----
+5
+
+query R
+SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+864000
+
+query R
+SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)'))
+----
+0
+
+query R
+SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)'))
+----
+0
+
+query R
+SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(milliseconds from arrow_cast('2 seconds',
'Interval(MonthDayNano)'))
+----
+2000
+
+query R
+SELECT extract(second from arrow_cast('2030 milliseconds',
'Interval(MonthDayNano)'))
+----
+2.03
+
+query R
+SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)'))
+----
+NULL
+
+statement ok
+create table t (id int, i interval) as values
+ (0, interval '5 months 1 day 10 nanoseconds'),
+ (1, interval '1 year 3 months'),
+ (2, interval '3 days 2 milliseconds'),
+ (3, interval '2 seconds'),
+ (4, interval '8 months'),
+ (5, NULL);
+
+query IRR rowsort
+select
+ id,
+ extract(second from i),
+ extract(month from i)
+from t
+order by id;
+----
+0 0.00000001 5
+1 0 15
+2 0.002 0
+3 2 0
+4 0 8
+5 NULL NULL
+
+statement ok
+drop table t;
+
+# test_extract_duration
+
+query R
+SELECT extract(second from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(seconds from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(epoch from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)'))
+----
+2000
+
+query R
+SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)'))
+----
+0.002
+
+query R
+SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)'))
+----
+2.002
+
+query R
+SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)'))
+----
+2002
+
+query R
+SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
+----
+10
+
+query error DataFusion error: Arrow error: Compute error: Month does not
support: Duration\(Second\)
+SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))
+
+query error DataFusion error: Arrow error: Compute error: Year does not
support: Duration\(Second\)
+SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))
+
+query R
+SELECT extract(day from arrow_cast(NULL, 'Duration(Second)'))
+----
+NULL
+
# test_extract_date_part_func
query B
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]