This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 21ec332842 support EXTRACT on intervals and durations (#12514)
21ec332842 is described below

commit 21ec3328428d4f27aa84abf7bd0c58332edcaed5
Author: Nick Cameron <[email protected]>
AuthorDate: Sat Sep 21 08:29:13 2024 +1200

    support EXTRACT on intervals and durations (#12514)
    
    Signed-off-by: Nick Cameron <[email protected]>
---
 datafusion/functions/src/datetime/date_part.rs |  47 ++++++-
 datafusion/sqllogictest/test_files/expr.slt    | 183 +++++++++++++++++++++++++
 2 files changed, 224 insertions(+), 6 deletions(-)

diff --git a/datafusion/functions/src/datetime/date_part.rs 
b/datafusion/functions/src/datetime/date_part.rs
index 8ee82d8726..b6a9a1c7e9 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -23,8 +23,10 @@ use arrow::array::{Array, ArrayRef, Float64Array};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
 use arrow::compute::{binary, cast, date_part, DatePart};
 use arrow::datatypes::DataType::{
-    Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
+    Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp, 
Utf8,
+    Utf8View,
 };
+use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth};
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{DataType, TimeUnit};
 
@@ -109,6 +111,20 @@ impl DatePartFunc {
                     Exact(vec![Utf8View, Time64(Microsecond)]),
                     Exact(vec![Utf8, Time64(Nanosecond)]),
                     Exact(vec![Utf8View, Time64(Nanosecond)]),
+                    Exact(vec![Utf8, Interval(YearMonth)]),
+                    Exact(vec![Utf8View, Interval(YearMonth)]),
+                    Exact(vec![Utf8, Interval(DayTime)]),
+                    Exact(vec![Utf8View, Interval(DayTime)]),
+                    Exact(vec![Utf8, Interval(MonthDayNano)]),
+                    Exact(vec![Utf8View, Interval(MonthDayNano)]),
+                    Exact(vec![Utf8, Duration(Second)]),
+                    Exact(vec![Utf8View, Duration(Second)]),
+                    Exact(vec![Utf8, Duration(Millisecond)]),
+                    Exact(vec![Utf8View, Duration(Millisecond)]),
+                    Exact(vec![Utf8, Duration(Microsecond)]),
+                    Exact(vec![Utf8View, Duration(Microsecond)]),
+                    Exact(vec![Utf8, Duration(Nanosecond)]),
+                    Exact(vec![Utf8View, Duration(Nanosecond)]),
                 ],
                 Volatility::Immutable,
             ),
@@ -224,10 +240,28 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> 
Result<ArrayRef> {
     let subsecs = date_part(array, DatePart::Nanosecond)?;
     let subsecs = as_int32_array(subsecs.as_ref())?;
 
-    let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
-        (secs as f64 + (subsecs as f64 / 1_000_000_000_f64)) * sf
-    })?;
-    Ok(Arc::new(r))
+    // Special case where there are no nulls.
+    if subsecs.null_count() == 0 {
+        let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
+            (secs as f64 + ((subsecs % 1_000_000_000) as f64 / 
1_000_000_000_f64)) * sf
+        })?;
+        Ok(Arc::new(r))
+    } else {
+        // Nulls in secs are preserved, nulls in subsecs are treated as zero 
to account for the case
+        // where the number of nanoseconds overflows.
+        let r: Float64Array = secs
+            .iter()
+            .zip(subsecs)
+            .map(|(secs, subsecs)| {
+                secs.map(|secs| {
+                    let subsecs = subsecs.unwrap_or(0);
+                    (secs as f64 + ((subsecs % 1_000_000_000) as f64 / 
1_000_000_000_f64))
+                        * sf
+                })
+            })
+            .collect();
+        Ok(Arc::new(r))
+    }
 }
 
 fn epoch(array: &dyn Array) -> Result<ArrayRef> {
@@ -256,7 +290,8 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
         Time64(Nanosecond) => {
             as_time64_nanosecond_array(array)?.unary(|x| x as f64 / 
1_000_000_000_f64)
         }
-        d => return exec_err!("Can not convert {d:?} to epoch"),
+        Interval(_) | Duration(_) => return seconds(array, Second),
+        d => return exec_err!("Cannot convert {d:?} to epoch"),
     };
     Ok(Arc::new(f))
 }
diff --git a/datafusion/sqllogictest/test_files/expr.slt 
b/datafusion/sqllogictest/test_files/expr.slt
index e8d8329d34..182afff7a6 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -1472,6 +1472,189 @@ SELECT extract(epoch from arrow_cast('1969-12-31', 
'Date64'))
 ----
 -86400
 
+# test_extract_interval
+
+query R
+SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)'))
+----
+10
+
+query R
+SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)'))
+----
+0
+
+query R
+SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)'))
+----
+0
+
+query R
+SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)'))
+----
+10
+
+query R
+SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)'))
+----
+1
+
+query R
+SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)'))
+----
+8
+
+query error DataFusion error: Arrow error: Compute error: Year does not 
support: Interval\(DayTime\)
+SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)'))
+
+query error DataFusion error: Arrow error: Compute error: Month does not 
support: Interval\(DayTime\)
+SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)'))
+
+query R
+SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)'))
+----
+10
+
+query R
+SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+0
+
+query R
+SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+14400
+
+query R
+SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
+----
+5
+
+query R
+SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)'))
+----
+864000
+
+query R
+SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)'))
+----
+0
+
+query R
+SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)'))
+----
+0
+
+query R
+SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
+----
+2
+
+query R
+SELECT extract(milliseconds from arrow_cast('2 seconds', 
'Interval(MonthDayNano)'))
+----
+2000
+
+query R
+SELECT extract(second from arrow_cast('2030 milliseconds', 
'Interval(MonthDayNano)'))
+----
+2.03
+
+query R
+SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)'))
+----
+NULL
+
+statement ok
+create table t (id int, i interval) as values
+  (0, interval '5 months 1 day 10 nanoseconds'),
+  (1, interval '1 year 3 months'),
+  (2, interval '3 days 2 milliseconds'),
+  (3, interval '2 seconds'),
+  (4, interval '8 months'),
+  (5, NULL);
+
+query IRR rowsort
+select
+  id,
+  extract(second from i),
+  extract(month from i)
+from t
+order by id;
+----
+0 0.00000001 5
+1 0 15
+2 0.002 0
+3 2 0
+4 0 8
+5 NULL NULL
+
+statement ok
+drop table t;
+
+# test_extract_duration
+
+query R
+SELECT extract(second from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(seconds from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(epoch from arrow_cast(2, 'Duration(Second)'))
+----
+2
+
+query R
+SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)'))
+----
+2000
+
+query R
+SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)'))
+----
+0.002
+
+query R
+SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)'))
+----
+2.002
+
+query R
+SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)'))
+----
+2002
+
+query R
+SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
+----
+10
+
+query error DataFusion error: Arrow error: Compute error: Month does not 
support: Duration\(Second\)
+SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))
+
+query error DataFusion error: Arrow error: Compute error: Year does not 
support: Duration\(Second\)
+SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))
+
+query R
+SELECT extract(day from arrow_cast(NULL, 'Duration(Second)'))
+----
+NULL
+
 # test_extract_date_part_func
 
 query B


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to