This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7bd77477b3 fix interval units parsing (#12448)
7bd77477b3 is described below

commit 7bd77477b3aa65d23fc096db140a08617ba2bd55
Author: Samuel Colvin <[email protected]>
AuthorDate: Sun Sep 15 15:01:11 2024 +0100

    fix interval units parsing (#12448)
    
    * fix interval units properly
    
    * fix remaining tests :fingers-crossed:
    
    * fix logical conflict
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/sql/src/expr/value.rs                | 59 +++++--------------------
 datafusion/sql/src/statement.rs                 |  4 ++
 datafusion/sqllogictest/test_files/expr.slt     |  6 ---
 datafusion/sqllogictest/test_files/interval.slt | 41 +++++++++++++++++
 4 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 64575645bc..be0909b584 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
+use arrow::compute::kernels::cast_utils::{
+    parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
+};
 use arrow::datatypes::DECIMAL128_MAX_PRECISION;
 use arrow_schema::DataType;
 use datafusion_common::{
@@ -232,27 +234,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let value = interval_literal(*interval.value, negative)?;
 
-        let value = if has_units(&value) {
-            // If the interval already contains a unit
-            // `INTERVAL '5 month' rather than `INTERVAL '5' month`
-            // skip the other unit
-            value
-        } else {
-            // leading_field really means the unit if specified
-            // for example, "month" in  `INTERVAL '5' month`
-            match interval.leading_field.as_ref() {
-                Some(leading_field) => {
-                    format!("{value} {leading_field}")
-                }
-                None => {
-                    // default to seconds for the units
-                    // `INTERVAL '5' is parsed as '5 seconds'
-                    format!("{value} seconds")
-                }
-            }
+        // leading_field really means the unit if specified
+        // for example, "month" in  `INTERVAL '5' month`
+        let value = match interval.leading_field.as_ref() {
+            Some(leading_field) => format!("{value} {leading_field}"),
+            None => value,
         };
 
-        let val = parse_interval_month_day_nano(&value)?;
+        let config = IntervalParseConfig::new(IntervalUnit::Second);
+        let val = parse_interval_month_day_nano_config(&value, config)?;
         Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
     }
 }
@@ -292,35 +282,6 @@ fn interval_literal(interval_value: SQLExpr, negative: 
bool) -> Result<String> {
     }
 }
 
-// TODO make interval parsing better in arrow-rs / expose `IntervalType`
-fn has_units(val: &str) -> bool {
-    let val = val.to_lowercase();
-    val.ends_with("century")
-        || val.ends_with("centuries")
-        || val.ends_with("decade")
-        || val.ends_with("decades")
-        || val.ends_with("year")
-        || val.ends_with("years")
-        || val.ends_with("month")
-        || val.ends_with("months")
-        || val.ends_with("week")
-        || val.ends_with("weeks")
-        || val.ends_with("day")
-        || val.ends_with("days")
-        || val.ends_with("hour")
-        || val.ends_with("hours")
-        || val.ends_with("minute")
-        || val.ends_with("minutes")
-        || val.ends_with("second")
-        || val.ends_with("seconds")
-        || val.ends_with("millisecond")
-        || val.ends_with("milliseconds")
-        || val.ends_with("microsecond")
-        || val.ends_with("microseconds")
-        || val.ends_with("nanosecond")
-        || val.ends_with("nanoseconds")
-}
-
 /// Try to decode bytes from hex literal string.
 ///
 /// None will be returned if the input literal is hex-invalid.
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 34214f711a..d9719e0805 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -253,6 +253,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 order_by,
                 partition_by,
                 cluster_by,
+                clustered_by,
                 options,
                 strict,
                 copy_grants,
@@ -346,6 +347,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if cluster_by.is_some() {
                     return not_impl_err!("Cluster by not supported")?;
                 }
+                if clustered_by.is_some() {
+                    return not_impl_err!("Clustered by not supported")?;
+                }
                 if options.is_some() {
                     return not_impl_err!("Options not supported")?;
                 }
diff --git a/datafusion/sqllogictest/test_files/expr.slt 
b/datafusion/sqllogictest/test_files/expr.slt
index a478e36172..e8d8329d34 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -229,12 +229,6 @@ SELECT interval '5 day'
 ----
 5 days
 
-# Hour is ignored, this matches PostgreSQL
-query ?
-SELECT interval '5 day' hour
-----
-5 days
-
 query ?
 SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'
 ----
diff --git a/datafusion/sqllogictest/test_files/interval.slt 
b/datafusion/sqllogictest/test_files/interval.slt
index c73e340f91..db453adf12 100644
--- a/datafusion/sqllogictest/test_files/interval.slt
+++ b/datafusion/sqllogictest/test_files/interval.slt
@@ -486,6 +486,31 @@ select i - d from t;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic 
expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid 
types
 select i - ts from t;
 
+# interval unit abreiviation and plurals
+query ?
+select interval '1s'
+----
+1.000000000 secs
+
+query ?
+select '1s'::interval
+----
+1.000000000 secs
+
+query ?
+select interval'1sec'
+----
+1.000000000 secs
+
+query ?
+select interval '1ms'
+----
+0.001000000 secs
+
+query ?
+select interval '1 y' + interval '1 year'
+----
+24 mons
 
 # interval (scalar) + date / timestamp (array)
 query D
@@ -502,6 +527,22 @@ select '1 month'::interval + ts from t;
 2000-02-01T12:11:10
 2000-03-01T00:00:00
 
+# trailing extra unit, this matches PostgreSQL
+query ?
+select interval '5 day 1' hour
+----
+5 days 1 hours
+
+# trailing extra unit, this matches PostgreSQL
+query ?
+select interval '5 day 0' hour
+----
+5 days
+
+# This is interpreted as "0 hours" with PostgreSQL, should be fixed with
+query error DataFusion error: Arrow error: Parser error: Invalid input syntax 
for type interval: "5 day HOUR"
+SELECT interval '5 day' hour
+
 # expected error interval (scalar) - date / timestamp (array)
 query error DataFusion error: Error during planning: Cannot coerce arithmetic 
expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - d from t;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to