This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0809f45c7e Add documentation about type signatures, and export 
`TIMEZONE_WILDCARD` (#7726)
0809f45c7e is described below

commit 0809f45c7e7c28138704490fbf168598f99fe742
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Oct 7 06:22:47 2023 -0400

    Add documentation about type signatures, and export `TIMEZONE_WILDCARD` 
(#7726)
    
    * Add documentation and export `TIMEZONE_WILDCARD`
    
    * improve example
    
    * Apply suggestions from code review
    
    Co-authored-by: Yongting You <[email protected]>
    
    ---------
    
    Co-authored-by: Yongting You <[email protected]>
---
 datafusion/expr/src/built_in_function.rs       |  40 ++++------
 datafusion/expr/src/lib.rs                     |   2 +-
 datafusion/expr/src/signature.rs               | 102 ++++++++++++++++++-------
 datafusion/expr/src/type_coercion/functions.rs |  13 +---
 4 files changed, 95 insertions(+), 62 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 70514f52d5..3ce573eaf1 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -18,7 +18,8 @@
 //! Built-in functions module contains all the built-in functions definitions.
 
 use crate::nullif::SUPPORTED_NULLIF_TYPES;
-use crate::type_coercion::functions::{data_types, TIMEZONE_PLACEHOLDER};
+use crate::signature::TIMEZONE_WILDCARD;
+use crate::type_coercion::functions::data_types;
 use crate::{
     conditional_expressions, struct_expressions, utils, Signature, 
TypeSignature,
     Volatility,
@@ -1029,22 +1030,22 @@ impl BuiltinScalarFunction {
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Nanosecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Microsecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Millisecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Second, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                 ],
                 self.volatility(),
@@ -1059,11 +1060,8 @@ impl BuiltinScalarFunction {
                         ]),
                         Exact(vec![
                             Interval(MonthDayNano),
-                            Timestamp(
-                                array_type.clone(),
-                                Some(TIMEZONE_PLACEHOLDER.into()),
-                            ),
-                            Timestamp(Nanosecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                            Timestamp(array_type.clone(), 
Some(TIMEZONE_WILDCARD.into())),
+                            Timestamp(Nanosecond, 
Some(TIMEZONE_WILDCARD.into())),
                         ]),
                         Exact(vec![
                             Interval(DayTime),
@@ -1072,11 +1070,8 @@ impl BuiltinScalarFunction {
                         ]),
                         Exact(vec![
                             Interval(DayTime),
-                            Timestamp(
-                                array_type.clone(),
-                                Some(TIMEZONE_PLACEHOLDER.into()),
-                            ),
-                            Timestamp(Nanosecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                            Timestamp(array_type.clone(), 
Some(TIMEZONE_WILDCARD.into())),
+                            Timestamp(Nanosecond, 
Some(TIMEZONE_WILDCARD.into())),
                         ]),
                         Exact(vec![
                             Interval(MonthDayNano),
@@ -1084,10 +1079,7 @@ impl BuiltinScalarFunction {
                         ]),
                         Exact(vec![
                             Interval(MonthDayNano),
-                            Timestamp(
-                                array_type.clone(),
-                                Some(TIMEZONE_PLACEHOLDER.into()),
-                            ),
+                            Timestamp(array_type.clone(), 
Some(TIMEZONE_WILDCARD.into())),
                         ]),
                         Exact(vec![
                             Interval(DayTime),
@@ -1095,7 +1087,7 @@ impl BuiltinScalarFunction {
                         ]),
                         Exact(vec![
                             Interval(DayTime),
-                            Timestamp(array_type, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                            Timestamp(array_type, 
Some(TIMEZONE_WILDCARD.into())),
                         ]),
                     ]
                 };
@@ -1115,22 +1107,22 @@ impl BuiltinScalarFunction {
                     Exact(vec![Utf8, Timestamp(Second, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Second, Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Microsecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Millisecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
                     Exact(vec![
                         Utf8,
-                        Timestamp(Nanosecond, 
Some(TIMEZONE_PLACEHOLDER.into())),
+                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
                     ]),
                 ],
                 self.volatility(),
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index d35233bc39..fee0bdf5c1 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -74,7 +74,7 @@ pub use logical_plan::*;
 pub use nullif::SUPPORTED_NULLIF_TYPES;
 pub use operator::Operator;
 pub use partition_evaluator::PartitionEvaluator;
-pub use signature::{Signature, TypeSignature, Volatility};
+pub use signature::{Signature, TypeSignature, Volatility, TIMEZONE_WILDCARD};
 pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
 pub use udaf::AggregateUDF;
 pub use udf::ScalarUDF;
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index 988fe7c91d..0d732ee6e5 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -20,35 +20,82 @@
 
 use arrow::datatypes::DataType;
 
+/// Constant that is used as a placeholder for any valid timezone.
+/// This is used where a function can accept a timestamp type with any
+/// valid timezone, it exists to avoid the need to enumerate all possible
+/// timezones. See [`TypeSignature`] for more details.
+///
+/// Type coercion always ensures that functions will be executed using
+/// timestamp arrays that have a valid time zone. Functions must never
+/// return results with this timezone.
+pub const TIMEZONE_WILDCARD: &str = "+TZ";
+
 ///A function's volatility, which defines the functions eligibility for 
certain optimizations
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub enum Volatility {
-    /// Immutable - An immutable function will always return the same output 
when given the same
-    /// input. An example of this is [super::BuiltinScalarFunction::Cos].
+    /// An immutable function will always return the same output when given 
the same
+    /// input. An example of this is [super::BuiltinScalarFunction::Cos]. 
DataFusion
+    /// will attempt to inline immutable functions during planning.
     Immutable,
-    /// Stable - A stable function may return different values given the same 
input across different
+    /// A stable function may return different values given the same input 
across different
     /// queries but must return the same value for a given input within a 
query. An example of
-    /// this is [super::BuiltinScalarFunction::Now].
+    /// this is [super::BuiltinScalarFunction::Now]. DataFusion
+    /// will attempt to inline `Stable` functions during planning, when 
possible.
+    /// For query `select col1, now() from t1`, it might take a while to 
execute but
+    /// `now()` column will be the same for each output row, which is evaluated
+    /// during planning.
     Stable,
-    /// Volatile - A volatile function may change the return value from 
evaluation to evaluation.
+    /// A volatile function may change the return value from evaluation to 
evaluation.
     /// Multiple invocations of a volatile function may return different 
results when used in the
-    /// same query. An example of this is 
[super::BuiltinScalarFunction::Random].
+    /// same query. An example of this is 
[super::BuiltinScalarFunction::Random]. DataFusion
+    /// can not evaluate such functions during planning.
+    /// In the query `select col1, random() from t1`, `random()` function will 
be evaluated
+    /// for each output row, resulting in a unique random value for each row.
     Volatile,
 }
 
-/// A function's type signature, which defines the function's supported 
argument types.
+/// A function's type signature defines the types of arguments the function 
supports.
+///
+/// Functions typically support only a few different types of arguments 
compared to the
+/// different datatypes in Arrow. To make functions easy to use, when possible 
DataFusion
+/// automatically coerces (add casts to) function arguments so they match the 
type signature.
+///
+/// For example, a function like `cos` may only be implemented for `Float64` 
arguments. To support a query
+/// that calles `cos` with a different argument type, such as 
`cos(int_column)`, type coercion automatically
+/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
+///
+/// # Data Types
+/// Types to match are represented using Arrow's [`DataType`].  
[`DataType::Timestamp`] has an optional variable
+/// timezone specification. To specify a function can handle a timestamp with 
*ANY* timezone, use
+/// the [`TIMEZONE_WILDCARD`]. For example:
+///
+/// ```
+/// # use arrow::datatypes::{DataType, TimeUnit};
+/// # use datafusion_expr::{TIMEZONE_WILDCARD, TypeSignature};
+/// let type_signature = TypeSignature::Exact(vec![
+///   // A nanosecond precision timestamp with ANY timezone
+///   // matches  Timestamp(Nanosecond, Some("+0:00"))
+///   // matches  Timestamp(Nanosecond, Some("+5:00"))
+///   // does not match  Timestamp(Nanosecond, None)
+///   DataType::Timestamp(TimeUnit::Nanosecond, 
Some(TIMEZONE_WILDCARD.into())),
+/// ]);
+/// ```
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum TypeSignature {
-    /// arbitrary number of arguments of an common type out of a list of valid 
types
-    // A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
+    /// arbitrary number of arguments of an common type out of a list of valid 
types.
+    ///
+    /// # Examples
+    /// A function such as `concat` is `Variadic(vec![DataType::Utf8, 
DataType::LargeUtf8])`
     Variadic(Vec<DataType>),
-    /// arbitrary number of arguments of an arbitrary but equal type
-    // A function such as `array` is `VariadicEqual`
-    // The first argument decides the type used for coercion
+    /// arbitrary number of arguments of an arbitrary but equal type.
+    /// DataFusion attempts to coerce all argument types to match the first 
argument's type
+    ///
+    /// # Examples
+    /// A function such as `array` is `VariadicEqual`
     VariadicEqual,
     /// arbitrary number of arguments with arbitrary types
     VariadicAny,
-    /// fixed number of arguments of an arbitrary but equal type out of a list 
of valid types
+    /// fixed number of arguments of an arbitrary but equal type out of a list 
of valid types.
     ///
     /// # Examples
     /// 1. A function of one argument of f64 is `Uniform(1, 
vec![DataType::Float64])`
@@ -58,7 +105,8 @@ pub enum TypeSignature {
     Exact(Vec<DataType>),
     /// fixed number of arguments of arbitrary types
     Any(usize),
-    /// One of a list of signatures
+    /// Matches exactly one of a list of [`TypeSignature`]s. Coercion is 
attempted to match
+    /// the signatures in order, and stops after the first success, if any.
     OneOf(Vec<TypeSignature>),
 }
 
@@ -104,46 +152,48 @@ impl TypeSignature {
     }
 }
 
-/// The signature of a function defines the supported argument types
-/// and its volatility.
+/// Defines the supported argument types ([`TypeSignature`]) and 
[`Volatility`] for a function.
+///
+/// DataFusion will automatically coerce (cast) argument types to one of the 
supported
+/// function signatures, if possible.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Signature {
-    /// type_signature - The types that the function accepts. See 
[TypeSignature] for more information.
+    /// The data types that the function accepts. See [TypeSignature] for more 
information.
     pub type_signature: TypeSignature,
-    /// volatility - The volatility of the function. See [Volatility] for more 
information.
+    /// The volatility of the function. See [Volatility] for more information.
     pub volatility: Volatility,
 }
 
 impl Signature {
-    /// new - Creates a new Signature from any type signature and the 
volatility.
+    /// Creates a new Signature from a given type signature and volatility.
     pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
         Signature {
             type_signature,
             volatility,
         }
     }
-    /// variadic - Creates a variadic signature that represents an arbitrary 
number of arguments all from a type in common_types.
+    /// An arbitrary number of arguments with the same type, from those listed 
in `common_types`.
     pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> 
Self {
         Self {
             type_signature: TypeSignature::Variadic(common_types),
             volatility,
         }
     }
-    /// variadic_equal - Creates a variadic signature that represents an 
arbitrary number of arguments of the same type.
+    /// An arbitrary number of arguments of the same type.
     pub fn variadic_equal(volatility: Volatility) -> Self {
         Self {
             type_signature: TypeSignature::VariadicEqual,
             volatility,
         }
     }
-    /// variadic_any - Creates a variadic signature that represents an 
arbitrary number of arguments of any type.
+    /// An arbitrary number of arguments of any type.
     pub fn variadic_any(volatility: Volatility) -> Self {
         Self {
             type_signature: TypeSignature::VariadicAny,
             volatility,
         }
     }
-    /// uniform - Creates a function with a fixed number of arguments of the 
same type, which must be from valid_types.
+    /// A fixed number of arguments of the same type, from those listed in 
`valid_types`.
     pub fn uniform(
         arg_count: usize,
         valid_types: Vec<DataType>,
@@ -154,21 +204,21 @@ impl Signature {
             volatility,
         }
     }
-    /// exact - Creates a signature which must match the types in exact_types 
in order.
+    /// Exactly matches the types in `exact_types`, in order.
     pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::Exact(exact_types),
             volatility,
         }
     }
-    /// any - Creates a signature which can a be made of any type but of a 
specified number
+    /// A specified number of arguments of any type
     pub fn any(arg_count: usize, volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::Any(arg_count),
             volatility,
         }
     }
-    /// one_of Creates a signature which can match any of the [TypeSignature]s 
which are passed in.
+    /// Any one of a list of [TypeSignature]s.
     pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) 
-> Self {
         Signature {
             type_signature: TypeSignature::OneOf(type_signatures),
diff --git a/datafusion/expr/src/type_coercion/functions.rs 
b/datafusion/expr/src/type_coercion/functions.rs
index 5452c8a5c8..17ca40236d 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::signature::TIMEZONE_WILDCARD;
 use crate::{Signature, TypeSignature};
 use arrow::{
     compute::can_cast_types,
@@ -22,16 +23,6 @@ use arrow::{
 };
 use datafusion_common::{plan_err, DataFusionError, Result};
 
-/// Constant that is used as a placeholder for any valid timezone.
-/// This is used where a function can accept a timestamp type with any
-/// valid timezone, it exists to avoid the need to enumerate all possible
-/// timezones.
-///
-/// Type coercion always ensures that functions will be executed using
-/// timestamp arrays that have a valid time zone. Functions must never
-/// return results with this timezone.
-pub(crate) const TIMEZONE_PLACEHOLDER: &str = "+TZ";
-
 /// Performs type coercion for function arguments.
 ///
 /// Returns the data types to which each argument must be coerced to
@@ -232,7 +223,7 @@ fn coerced_from<'a>(
         Utf8 | LargeUtf8 => Some(type_into.clone()),
         Null if can_cast_types(type_from, type_into) => 
Some(type_into.clone()),
 
-        Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_PLACEHOLDER => {
+        Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => {
             match type_from {
                 Timestamp(_, Some(from_tz)) => {
                     Some(Timestamp(unit.clone(), Some(from_tz.clone())))

Reply via email to