This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new dd5683745e Minor: return NULL for range and generate_series (#10275)
dd5683745e is described below

commit dd5683745e7d527b01b804c8f4f1a0a53aa225e8
Author: Lordworms <[email protected]>
AuthorDate: Mon Apr 29 18:14:06 2024 -0500

    Minor: return NULL for range and generate_series (#10275)
    
    * return NULL for range and generate_series
    
    * Update datafusion/sqllogictest/test_files/array.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * Update datafusion/sqllogictest/test_files/array.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions-array/src/range.rs      |  45 ++++++---
 datafusion/functions-array/src/udf.rs        | 140 ---------------------------
 datafusion/sqllogictest/test_files/array.slt |  37 +++++--
 3 files changed, 59 insertions(+), 163 deletions(-)

diff --git a/datafusion/functions-array/src/range.rs 
b/datafusion/functions-array/src/range.rs
index 1c9e0c878e..150fe59602 100644
--- a/datafusion/functions-array/src/range.rs
+++ b/datafusion/functions-array/src/range.rs
@@ -17,14 +17,12 @@
 
 //! [`ScalarUDFImpl`] definitions for range and gen_series functions.
 
+use crate::utils::make_scalar_function;
 use arrow::array::{Array, ArrayRef, Int64Array, ListArray};
 use arrow::datatypes::{DataType, Field};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
-use std::any::Any;
-
-use crate::utils::make_scalar_function;
 use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
-use arrow_array::Date32Array;
+use arrow_array::{Date32Array, NullArray};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
 use arrow_schema::DataType::{Date32, Int64, Interval, List};
 use arrow_schema::IntervalUnit::MonthDayNano;
 use datafusion_common::cast::{as_date32_array, as_int64_array, 
as_interval_mdn_array};
@@ -34,6 +32,7 @@ use datafusion_expr::Expr;
 use datafusion_expr::{
     ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
 };
+use std::any::Any;
 use std::sync::Arc;
 
 make_udf_function!(
@@ -57,6 +56,7 @@ impl Range {
                     TypeSignature::Exact(vec![Int64, Int64]),
                     TypeSignature::Exact(vec![Int64, Int64, Int64]),
                     TypeSignature::Exact(vec![Date32, Date32, 
Interval(MonthDayNano)]),
+                    TypeSignature::Any(3),
                 ],
                 Volatility::Immutable,
             ),
@@ -77,14 +77,21 @@ impl ScalarUDFImpl for Range {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(List(Arc::new(Field::new(
-            "item",
-            arg_types[0].clone(),
-            true,
-        ))))
+        if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
+            Ok(DataType::Null)
+        } else {
+            Ok(List(Arc::new(Field::new(
+                "item",
+                arg_types[0].clone(),
+                true,
+            ))))
+        }
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+            return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
+        }
         match args[0].data_type() {
             Int64 => make_scalar_function(|args| gen_range_inner(args, 
false))(args),
             Date32 => make_scalar_function(|args| gen_range_date(args, 
false))(args),
@@ -120,6 +127,7 @@ impl GenSeries {
                     TypeSignature::Exact(vec![Int64, Int64]),
                     TypeSignature::Exact(vec![Int64, Int64, Int64]),
                     TypeSignature::Exact(vec![Date32, Date32, 
Interval(MonthDayNano)]),
+                    TypeSignature::Any(3),
                 ],
                 Volatility::Immutable,
             ),
@@ -140,14 +148,21 @@ impl ScalarUDFImpl for GenSeries {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(List(Arc::new(Field::new(
-            "item",
-            arg_types[0].clone(),
-            true,
-        ))))
+        if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
+            Ok(DataType::Null)
+        } else {
+            Ok(List(Arc::new(Field::new(
+                "item",
+                arg_types[0].clone(),
+                true,
+            ))))
+        }
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+            return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
+        }
         match args[0].data_type() {
             Int64 => make_scalar_function(|args| gen_range_inner(args, 
true))(args),
             Date32 => make_scalar_function(|args| gen_range_date(args, 
true))(args),
diff --git a/datafusion/functions-array/src/udf.rs 
b/datafusion/functions-array/src/udf.rs
index 1462b3efad..c723fbb42c 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -166,146 +166,6 @@ impl ScalarUDFImpl for StringToArray {
     }
 }
 
-make_udf_function!(
-    Range,
-    range,
-    start stop step,
-    "create a list of values in the range between start and stop",
-    range_udf
-);
-#[derive(Debug)]
-pub struct Range {
-    signature: Signature,
-    aliases: Vec<String>,
-}
-impl Range {
-    pub fn new() -> Self {
-        use DataType::*;
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
-                    TypeSignature::Exact(vec![Date32, Date32, 
Interval(MonthDayNano)]),
-                ],
-                Volatility::Immutable,
-            ),
-            aliases: vec![String::from("range")],
-        }
-    }
-}
-impl ScalarUDFImpl for Range {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn name(&self) -> &str {
-        "range"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-        Ok(List(Arc::new(Field::new(
-            "item",
-            arg_types[0].clone(),
-            true,
-        ))))
-    }
-
-    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(args)?;
-        match args[0].data_type() {
-            arrow::datatypes::DataType::Int64 => {
-                crate::kernels::gen_range(&args, 
false).map(ColumnarValue::Array)
-            }
-            arrow::datatypes::DataType::Date32 => {
-                crate::kernels::gen_range_date(&args, 
false).map(ColumnarValue::Array)
-            }
-            _ => {
-                exec_err!("unsupported type for range")
-            }
-        }
-    }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-}
-
-make_udf_function!(
-    GenSeries,
-    gen_series,
-    start stop step,
-    "create a list of values in the range between start and stop, include 
upper bound",
-    gen_series_udf
-);
-#[derive(Debug)]
-pub struct GenSeries {
-    signature: Signature,
-    aliases: Vec<String>,
-}
-impl GenSeries {
-    pub fn new() -> Self {
-        use DataType::*;
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64]),
-                    TypeSignature::Exact(vec![Int64, Int64, Int64]),
-                    TypeSignature::Exact(vec![Date32, Date32, 
Interval(MonthDayNano)]),
-                ],
-                Volatility::Immutable,
-            ),
-            aliases: vec![String::from("generate_series")],
-        }
-    }
-}
-impl ScalarUDFImpl for GenSeries {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn name(&self) -> &str {
-        "generate_series"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        use DataType::*;
-        Ok(List(Arc::new(Field::new(
-            "item",
-            arg_types[0].clone(),
-            true,
-        ))))
-    }
-
-    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(args)?;
-        match args[0].data_type() {
-            arrow::datatypes::DataType::Int64 => {
-                crate::kernels::gen_range(&args, 
true).map(ColumnarValue::Array)
-            }
-            arrow::datatypes::DataType::Date32 => {
-                crate::kernels::gen_range_date(&args, 
true).map(ColumnarValue::Array)
-            }
-            _ => {
-                exec_err!("unsupported type for range")
-            }
-        }
-    }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-}
-
 make_udf_function!(
     ArrayDims,
     array_dims,
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index b33419ecd4..3b90187f07 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5634,15 +5634,26 @@ select range(NULL)
 ----
 NULL
 
-## should throw error
-query error
+## should return NULL
+query ?
 select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
+----
+NULL
 
-query error
+query ?
 select range(DATE '1992-09-01', DATE '1993-03-01', NULL);
+----
+NULL
 
-query error
+query ?
 select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
+----
+NULL
+
+query ?
+select range(NULL, NULL, NULL);
+----
+NULL
 
 query ?
 select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR)
@@ -5668,16 +5679,26 @@ select generate_series(5),
 ----
 [0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 
7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 
1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 
1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 
1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 
1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 
1993-01-09, 1993-01-08, 1993-01-07, 1993-01-0 [...]
 
-## should throw error
-query error
+## should return NULL
+query ?
 select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
+----
+NULL
 
-query error
+query ?
 select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL);
+----
+NULL
 
-query error
+query ?
 select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
+----
+NULL
 
+query ?
+select generate_series(NULL, NULL, NULL);
+----
+NULL
 
 query ?
 select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' 
YEAR)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to