alamb commented on code in PR #7614:
URL: https://github.com/apache/arrow-datafusion/pull/7614#discussion_r1333352337
##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
///
/// See the module level documentation for more detail on coercion.
pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
- use self::DataType::*;
-
if type_into == type_from {
return true;
}
- // Null can convert to most of types
+ if let Some(coerced) = coerced_from(type_into, type_from) {
+ return coerced == type_into;
+ }
+ false
+}
+
+fn coerced_from<'a>(
+ type_into: &'a DataType,
+ type_from: &'a DataType,
+) -> Option<&'a DataType> {
+ use self::DataType::*;
+
match type_into {
- Int8 => matches!(type_from, Null | Int8),
- Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
- Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16),
- Int64 => matches!(
- type_from,
- Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
- ),
- UInt8 => matches!(type_from, Null | UInt8),
- UInt16 => matches!(type_from, Null | UInt8 | UInt16),
- UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
- UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
- Float32 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- ),
- Float64 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- | Float64
- | Decimal128(_, _)
- ),
- Timestamp(TimeUnit::Nanosecond, _) => {
- matches!(
+ // coerced into type_into
+ Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+ Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) =>
Some(type_into),
+ Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16) => {
+ Some(type_into)
+ }
+ Int64
+ if matches!(
+ type_from,
+ Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+ ) =>
+ {
+ Some(type_into)
+ }
+ UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+ UInt16 if matches!(type_from, Null | UInt8 | UInt16) =>
Some(type_into),
+ UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) =>
Some(type_into),
+ UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64)
=> {
+ Some(type_into)
+ }
+ Float32
+ if matches!(
type_from,
- Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
- )
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ ) =>
+ {
+ Some(type_into)
}
- Interval(_) => {
- matches!(type_from, Utf8 | LargeUtf8)
+ Float64
+ if matches!(
+ type_from,
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ | Float64
+ | Decimal128(_, _)
+ ) =>
+ {
+ Some(type_into)
+ }
+ Timestamp(TimeUnit::Nanosecond, None)
+ if matches!(
+ type_from,
+ Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+ ) =>
+ {
+ Some(type_into)
}
- Utf8 | LargeUtf8 => true,
- Null => can_cast_types(type_from, type_into),
- _ => false,
+ Interval(_) if matches!(type_from, Utf8 | LargeUtf8) =>
Some(type_into),
+ Utf8 | LargeUtf8 => Some(type_into),
+ Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+ // timestamp coercions, with timezone, accept the type_from timezone
if valid
Review Comment:
```suggestion
// Coerce to consistent timezones, if the `type_from` timezone is
valid
```
##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -100,6 +100,40 @@ select * from foo where ts != '2000-02-01T00:00:00';
statement ok
drop table foo;
+
+##########
Review Comment:
Could you also possibly add an invalid timezone test like
```sql
❯ select arrow_cast('2021-01-02T03:04:00', 'Timestamp(Nanosecond,
Some("Foo"))');
Optimizer rule 'simplify_expressions' failed
caused by
Arrow error: Parser error: Invalid timezone "Foo": 'Foo' is not a valid
timezone
```
##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
///
/// See the module level documentation for more detail on coercion.
pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
- use self::DataType::*;
-
if type_into == type_from {
return true;
}
- // Null can convert to most of types
+ if let Some(coerced) = coerced_from(type_into, type_from) {
+ return coerced == type_into;
+ }
+ false
+}
+
+fn coerced_from<'a>(
+ type_into: &'a DataType,
+ type_from: &'a DataType,
+) -> Option<&'a DataType> {
+ use self::DataType::*;
+
match type_into {
- Int8 => matches!(type_from, Null | Int8),
- Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
- Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16),
- Int64 => matches!(
- type_from,
- Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
- ),
- UInt8 => matches!(type_from, Null | UInt8),
- UInt16 => matches!(type_from, Null | UInt8 | UInt16),
- UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
- UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
- Float32 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- ),
- Float64 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- | Float64
- | Decimal128(_, _)
- ),
- Timestamp(TimeUnit::Nanosecond, _) => {
- matches!(
+ // coerced into type_into
+ Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+ Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) =>
Some(type_into),
+ Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16) => {
+ Some(type_into)
+ }
+ Int64
+ if matches!(
+ type_from,
+ Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+ ) =>
+ {
+ Some(type_into)
+ }
+ UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+ UInt16 if matches!(type_from, Null | UInt8 | UInt16) =>
Some(type_into),
+ UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) =>
Some(type_into),
+ UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64)
=> {
+ Some(type_into)
+ }
+ Float32
+ if matches!(
type_from,
- Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
- )
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ ) =>
+ {
+ Some(type_into)
}
- Interval(_) => {
- matches!(type_from, Utf8 | LargeUtf8)
+ Float64
+ if matches!(
+ type_from,
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ | Float64
+ | Decimal128(_, _)
+ ) =>
+ {
+ Some(type_into)
+ }
+ Timestamp(TimeUnit::Nanosecond, None)
+ if matches!(
+ type_from,
+ Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+ ) =>
+ {
+ Some(type_into)
}
- Utf8 | LargeUtf8 => true,
- Null => can_cast_types(type_from, type_into),
- _ => false,
+ Interval(_) if matches!(type_from, Utf8 | LargeUtf8) =>
Some(type_into),
+ Utf8 | LargeUtf8 => Some(type_into),
+ Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+ // timestamp coercions, with timezone, accept the type_from timezone
if valid
+ Timestamp(TimeUnit::Nanosecond, Some(_))
+ if matches!(
+ type_from,
+ Timestamp(TimeUnit::Nanosecond, Some(from_tz)) if
arrow_array::timezone::Tz::from_str(from_tz).is_ok()
Review Comment:
Now that I think about this I wonder if there is any reason to check for
valid timezones here at all -- if there is an invalid timezone, any actual
calculation will fail susbequently
Perhaps we can sidestep the whole "don't ignore error" thing entirely if we
just skipped the check 🤔
##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
///
/// See the module level documentation for more detail on coercion.
pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
- use self::DataType::*;
-
if type_into == type_from {
return true;
}
- // Null can convert to most of types
+ if let Some(coerced) = coerced_from(type_into, type_from) {
+ return coerced == type_into;
+ }
+ false
+}
+
+fn coerced_from<'a>(
+ type_into: &'a DataType,
+ type_from: &'a DataType,
+) -> Option<&'a DataType> {
+ use self::DataType::*;
+
match type_into {
- Int8 => matches!(type_from, Null | Int8),
- Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
- Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16),
- Int64 => matches!(
- type_from,
- Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
- ),
- UInt8 => matches!(type_from, Null | UInt8),
- UInt16 => matches!(type_from, Null | UInt8 | UInt16),
- UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
- UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
- Float32 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- ),
- Float64 => matches!(
- type_from,
- Null | Int8
- | Int16
- | Int32
- | Int64
- | UInt8
- | UInt16
- | UInt32
- | UInt64
- | Float32
- | Float64
- | Decimal128(_, _)
- ),
- Timestamp(TimeUnit::Nanosecond, _) => {
- matches!(
+ // coerced into type_into
+ Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+ Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) =>
Some(type_into),
+ Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 |
UInt16) => {
+ Some(type_into)
+ }
+ Int64
+ if matches!(
+ type_from,
+ Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+ ) =>
+ {
+ Some(type_into)
+ }
+ UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+ UInt16 if matches!(type_from, Null | UInt8 | UInt16) =>
Some(type_into),
+ UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) =>
Some(type_into),
+ UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64)
=> {
+ Some(type_into)
+ }
+ Float32
+ if matches!(
type_from,
- Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
- )
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ ) =>
+ {
+ Some(type_into)
}
- Interval(_) => {
- matches!(type_from, Utf8 | LargeUtf8)
+ Float64
+ if matches!(
+ type_from,
+ Null | Int8
+ | Int16
+ | Int32
+ | Int64
+ | UInt8
+ | UInt16
+ | UInt32
+ | UInt64
+ | Float32
+ | Float64
+ | Decimal128(_, _)
+ ) =>
+ {
+ Some(type_into)
+ }
+ Timestamp(TimeUnit::Nanosecond, None)
+ if matches!(
+ type_from,
+ Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+ ) =>
+ {
+ Some(type_into)
}
- Utf8 | LargeUtf8 => true,
- Null => can_cast_types(type_from, type_into),
- _ => false,
+ Interval(_) if matches!(type_from, Utf8 | LargeUtf8) =>
Some(type_into),
+ Utf8 | LargeUtf8 => Some(type_into),
+ Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+ // timestamp coercions, with timezone, accept the type_from timezone
if valid
+ Timestamp(TimeUnit::Nanosecond, Some(_))
+ if matches!(
+ type_from,
+ Timestamp(TimeUnit::Nanosecond, Some(from_tz)) if
arrow_array::timezone::Tz::from_str(from_tz).is_ok()
Review Comment:
Now that I think about this I wonder if there is any reason to check for
valid timezones here at all -- if there is an invalid timezone, any actual
calculation will fail susbequently
Perhaps we can sidestep the whole "don't ignore error" thing entirely if we
just skipped the check 🤔
##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -100,6 +100,40 @@ select * from foo where ts != '2000-02-01T00:00:00';
statement ok
drop table foo;
+
+##########
Review Comment:
Could you also possibly add an invalid timezone test like
```sql
❯ select arrow_cast('2021-01-02T03:04:00', 'Timestamp(Nanosecond,
Some("Foo"))');
Optimizer rule 'simplify_expressions' failed
caused by
Arrow error: Parser error: Invalid timezone "Foo": 'Foo' is not a valid
timezone
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]