This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 722b0a1 ARROW-5039: [Rust] [DataFusion] Re-implement CAST support
722b0a1 is described below
commit 722b0a122787bc6e1aef30659d3cbab046a03711
Author: Andy Grove <[email protected]>
AuthorDate: Sun Mar 31 17:08:45 2019 -0600
ARROW-5039: [Rust] [DataFusion] Re-implement CAST support
This PR achieves a few things:
- Removes some PoC code and replaces it with generic code for evaluating
CAST expressions
- Adds support for casting from UInt64 in arrow compute module (looks like
this was just missed)
- Improves error messages and rustdocs
- Adds a bounds check to avoid a panic if a column index is incorrect
Author: Andy Grove <[email protected]>
Closes #4054 from andygrove/ARROW-5039 and squashes the following commits:
fbc606c <Andy Grove> tests for casting from all floating point types, plus
fix bug in mapping from f32 to u16
ae4338c <Andy Grove> tests for casting from all unsigned int types
46a016e <Andy Grove> tests for casting from all signed int types
9ddd055 <Andy Grove> check for null after cast
ddc80dd <Andy Grove> add unit test for casting from int64
e2a9e33 <Andy Grove> revert last commit
84951ad <Andy Grove> Revert adding Int64 cast support
588459a <Andy Grove> Code cleanup
28d63c6 <Andy Grove> Improve error messages and rustdocs
542ad26 <Andy Grove> Re-implement CAST support in DataFusion
c01763c <Andy Grove> Add support for casting from UInt64
---
rust/arrow/src/compute/kernels/cast.rs | 959 +++++++++++++++++++++++++++-
rust/datafusion/src/execution/context.rs | 2 +-
rust/datafusion/src/execution/expression.rs | 93 ++-
rust/datafusion/tests/sql.rs | 10 +
4 files changed, 1014 insertions(+), 50 deletions(-)
diff --git a/rust/arrow/src/compute/kernels/cast.rs
b/rust/arrow/src/compute/kernels/cast.rs
index d19097f..b0f19cd 100644
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ b/rust/arrow/src/compute/kernels/cast.rs
@@ -283,6 +283,16 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) ->
Result<ArrayRef> {
(Int32, Float32) => cast_numeric_arrays::<Int32Type,
Float32Type>(array),
(Int32, Float64) => cast_numeric_arrays::<Int32Type,
Float64Type>(array),
+ (Int64, UInt8) => cast_numeric_arrays::<Int64Type, UInt8Type>(array),
+ (Int64, UInt16) => cast_numeric_arrays::<Int64Type, UInt16Type>(array),
+ (Int64, UInt32) => cast_numeric_arrays::<Int64Type, UInt32Type>(array),
+ (Int64, UInt64) => cast_numeric_arrays::<Int64Type, UInt64Type>(array),
+ (Int64, Int8) => cast_numeric_arrays::<Int64Type, Int8Type>(array),
+ (Int64, Int16) => cast_numeric_arrays::<Int64Type, Int16Type>(array),
+ (Int64, Int32) => cast_numeric_arrays::<Int64Type, Int32Type>(array),
+ (Int64, Float32) => cast_numeric_arrays::<Int64Type,
Float32Type>(array),
+ (Int64, Float64) => cast_numeric_arrays::<Int64Type,
Float64Type>(array),
+
(Float32, UInt8) => cast_numeric_arrays::<Float32Type,
UInt8Type>(array),
(Float32, UInt16) => cast_numeric_arrays::<Float32Type,
UInt16Type>(array),
(Float32, UInt32) => cast_numeric_arrays::<Float32Type,
UInt32Type>(array),
@@ -294,7 +304,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) ->
Result<ArrayRef> {
(Float32, Float64) => cast_numeric_arrays::<Float32Type,
Float64Type>(array),
(Float64, UInt8) => cast_numeric_arrays::<Float64Type,
UInt8Type>(array),
- (Float64, UInt16) => cast_numeric_arrays::<UInt16Type,
Float32Type>(array),
+ (Float64, UInt16) => cast_numeric_arrays::<Float64Type,
UInt16Type>(array),
(Float64, UInt32) => cast_numeric_arrays::<Float64Type,
UInt32Type>(array),
(Float64, UInt64) => cast_numeric_arrays::<Float64Type,
UInt64Type>(array),
(Float64, Int8) => cast_numeric_arrays::<Float64Type, Int8Type>(array),
@@ -771,4 +781,951 @@ mod tests {
)
.unwrap();
}
+
+ #[test]
+ fn test_cast_from_f64() {
+ let f64_values: Vec<f64> = vec![
+ std::i64::MIN as f64,
+ std::i32::MIN as f64,
+ std::i16::MIN as f64,
+ std::i8::MIN as f64,
+ 0_f64,
+ std::u8::MAX as f64,
+ std::u16::MAX as f64,
+ std::u32::MAX as f64,
+ std::u64::MAX as f64,
+ ];
+ let f64_array: ArrayRef =
Arc::new(Float64Array::from(f64_values.clone()));
+
+ let f64_expected = vec![
+ "-9223372036854776000.0",
+ "-2147483648.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967295.0",
+ "18446744073709552000.0",
+ ];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&f64_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec![
+ "-9223372000000000000.0",
+ "-2147483600.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967300.0",
+ "18446744000000000000.0",
+ ];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&f64_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec![
+ "-9223372036854775808",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "255",
+ "65535",
+ "4294967295",
+ "null",
+ ];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&f64_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec![
+ "null",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "255",
+ "65535",
+ "null",
+ "null",
+ ];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&f64_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec![
+ "null", "null", "-32768", "-128", "0", "255", "null", "null",
"null",
+ ];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&f64_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec![
+ "null", "null", "null", "-128", "0", "null", "null", "null",
"null",
+ ];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&f64_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec![
+ "null",
+ "null",
+ "null",
+ "null",
+ "0",
+ "255",
+ "65535",
+ "4294967295",
+ "null",
+ ];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&f64_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec![
+ "null",
+ "null",
+ "null",
+ "null",
+ "0",
+ "255",
+ "65535",
+ "4294967295",
+ "null",
+ ];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&f64_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec![
+ "null", "null", "null", "null", "0", "255", "65535", "null",
"null",
+ ];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&f64_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec![
+ "null", "null", "null", "null", "0", "255", "null", "null", "null",
+ ];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&f64_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_f32() {
+ let f32_values: Vec<f32> = vec![
+ std::i32::MIN as f32,
+ std::i32::MIN as f32,
+ std::i16::MIN as f32,
+ std::i8::MIN as f32,
+ 0_f32,
+ std::u8::MAX as f32,
+ std::u16::MAX as f32,
+ std::u32::MAX as f32,
+ std::u32::MAX as f32,
+ ];
+ let f32_array: ArrayRef =
Arc::new(Float32Array::from(f32_values.clone()));
+
+ let f64_expected = vec![
+ "-2147483648.0",
+ "-2147483648.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967296.0",
+ "4294967296.0",
+ ];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&f32_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec![
+ "-2147483600.0",
+ "-2147483600.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967300.0",
+ "4294967300.0",
+ ];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&f32_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec![
+ "-2147483648",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "255",
+ "65535",
+ "4294967296",
+ "4294967296",
+ ];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&f32_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec![
+ "-2147483648",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "255",
+ "65535",
+ "null",
+ "null",
+ ];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&f32_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec![
+ "null", "null", "-32768", "-128", "0", "255", "null", "null",
"null",
+ ];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&f32_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec![
+ "null", "null", "null", "-128", "0", "null", "null", "null",
"null",
+ ];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&f32_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec![
+ "null",
+ "null",
+ "null",
+ "null",
+ "0",
+ "255",
+ "65535",
+ "4294967296",
+ "4294967296",
+ ];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&f32_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec![
+ "null", "null", "null", "null", "0", "255", "65535", "null",
"null",
+ ];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&f32_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec![
+ "null", "null", "null", "null", "0", "255", "65535", "null",
"null",
+ ];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&f32_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec![
+ "null", "null", "null", "null", "0", "255", "null", "null", "null",
+ ];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&f32_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_uint64() {
+ let u64_values: Vec<u64> = vec![
+ 0,
+ std::u8::MAX as u64,
+ std::u16::MAX as u64,
+ std::u32::MAX as u64,
+ std::u64::MAX,
+ ];
+ let u64_array: ArrayRef =
Arc::new(UInt64Array::from(u64_values.clone()));
+
+ let f64_expected = vec![
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967295.0",
+ "18446744073709552000.0",
+ ];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&u64_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec![
+ "0.0",
+ "255.0",
+ "65535.0",
+ "4294967300.0",
+ "18446744000000000000.0",
+ ];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&u64_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["0", "255", "65535", "4294967295", "null"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&u64_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["0", "255", "65535", "null", "null"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&u64_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["0", "255", "null", "null", "null"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&u64_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["0", "null", "null", "null", "null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&u64_array, &DataType::Int8)
+ );
+
+ let u64_expected =
+ vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&u64_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["0", "255", "65535", "4294967295", "null"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&u64_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["0", "255", "65535", "null", "null"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&u64_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["0", "255", "null", "null", "null"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&u64_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_uint32() {
+ let u32_values: Vec<u32> = vec![
+ 0,
+ std::u8::MAX as u32,
+ std::u16::MAX as u32,
+ std::u32::MAX as u32,
+ ];
+ let u32_array: ArrayRef =
Arc::new(UInt32Array::from(u32_values.clone()));
+
+ let f64_expected = vec!["0.0", "255.0", "65535.0", "4294967295.0"];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&u32_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec!["0.0", "255.0", "65535.0", "4294967300.0"];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&u32_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["0", "255", "65535", "4294967295"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&u32_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["0", "255", "65535", "null"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&u32_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["0", "255", "null", "null"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&u32_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["0", "null", "null", "null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&u32_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec!["0", "255", "65535", "4294967295"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&u32_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["0", "255", "65535", "4294967295"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&u32_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["0", "255", "65535", "null"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&u32_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["0", "255", "null", "null"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&u32_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_uint16() {
+ let u16_values: Vec<u16> = vec![0, std::u8::MAX as u16, std::u16::MAX
as u16];
+ let u16_array: ArrayRef =
Arc::new(UInt16Array::from(u16_values.clone()));
+
+ let f64_expected = vec!["0.0", "255.0", "65535.0"];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&u16_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec!["0.0", "255.0", "65535.0"];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&u16_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["0", "255", "65535"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&u16_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["0", "255", "65535"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&u16_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["0", "255", "null"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&u16_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["0", "null", "null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&u16_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec!["0", "255", "65535"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&u16_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["0", "255", "65535"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&u16_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["0", "255", "65535"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&u16_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["0", "255", "null"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&u16_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_uint8() {
+ let u8_values: Vec<u8> = vec![0, std::u8::MAX];
+ let u8_array: ArrayRef = Arc::new(UInt8Array::from(u8_values.clone()));
+
+ let f64_expected = vec!["0.0", "255.0"];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&u8_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec!["0.0", "255.0"];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&u8_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["0", "255"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&u8_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["0", "255"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&u8_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["0", "255"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&u8_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["0", "null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&u8_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec!["0", "255"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&u8_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["0", "255"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&u8_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["0", "255"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&u8_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["0", "255"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&u8_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_int64() {
+ let i64_values: Vec<i64> = vec![
+ std::i64::MIN,
+ std::i32::MIN as i64,
+ std::i16::MIN as i64,
+ std::i8::MIN as i64,
+ 0,
+ std::i8::MAX as i64,
+ std::i16::MAX as i64,
+ std::i32::MAX as i64,
+ std::i64::MAX,
+ ];
+ let i64_array: ArrayRef =
Arc::new(Int64Array::from(i64_values.clone()));
+
+ let f64_expected = vec![
+ "-9223372036854776000.0",
+ "-2147483648.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "127.0",
+ "32767.0",
+ "2147483647.0",
+ "9223372036854776000.0",
+ ];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&i64_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec![
+ "-9223372000000000000.0",
+ "-2147483600.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "127.0",
+ "32767.0",
+ "2147483600.0",
+ "9223372000000000000.0",
+ ];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&i64_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec![
+ "-9223372036854775808",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "127",
+ "32767",
+ "2147483647",
+ "9223372036854775807",
+ ];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&i64_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec![
+ "null",
+ "-2147483648",
+ "-32768",
+ "-128",
+ "0",
+ "127",
+ "32767",
+ "2147483647",
+ "null",
+ ];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&i64_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec![
+ "null", "null", "-32768", "-128", "0", "127", "32767", "null",
"null",
+ ];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&i64_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec![
+ "null", "null", "null", "-128", "0", "127", "null", "null", "null",
+ ];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&i64_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec![
+ "null",
+ "null",
+ "null",
+ "null",
+ "0",
+ "127",
+ "32767",
+ "2147483647",
+ "9223372036854775807",
+ ];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&i64_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec![
+ "null",
+ "null",
+ "null",
+ "null",
+ "0",
+ "127",
+ "32767",
+ "2147483647",
+ "null",
+ ];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&i64_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec![
+ "null", "null", "null", "null", "0", "127", "32767", "null",
"null",
+ ];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&i64_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec![
+ "null", "null", "null", "null", "0", "127", "null", "null", "null",
+ ];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&i64_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_int32() {
+ let i32_values: Vec<i32> = vec![
+ std::i32::MIN as i32,
+ std::i16::MIN as i32,
+ std::i8::MIN as i32,
+ 0,
+ std::i8::MAX as i32,
+ std::i16::MAX as i32,
+ std::i32::MAX as i32,
+ ];
+ let i32_array: ArrayRef =
Arc::new(Int32Array::from(i32_values.clone()));
+
+ let f64_expected = vec![
+ "-2147483648.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "127.0",
+ "32767.0",
+ "2147483647.0",
+ ];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&i32_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec![
+ "-2147483600.0",
+ "-32768.0",
+ "-128.0",
+ "0.0",
+ "127.0",
+ "32767.0",
+ "2147483600.0",
+ ];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&i32_array, &DataType::Float32)
+ );
+
+ let i16_expected = vec!["null", "-32768", "-128", "0", "127", "32767",
"null"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&i32_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["null", "null", "-128", "0", "127", "null",
"null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&i32_array, &DataType::Int8)
+ );
+
+ let u64_expected =
+ vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&i32_array, &DataType::UInt64)
+ );
+
+ let u32_expected =
+ vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&i32_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["null", "null", "null", "0", "127", "32767",
"null"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&i32_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["null", "null", "null", "0", "127", "null",
"null"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&i32_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_int16() {
+ let i16_values: Vec<i16> = vec![
+ std::i16::MIN,
+ std::i8::MIN as i16,
+ 0,
+ std::i8::MAX as i16,
+ std::i16::MAX,
+ ];
+ let i16_array: ArrayRef =
Arc::new(Int16Array::from(i16_values.clone()));
+
+ let f64_expected = vec!["-32768.0", "-128.0", "0.0", "127.0",
"32767.0"];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&i16_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec!["-32768.0", "-128.0", "0.0", "127.0",
"32767.0"];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&i16_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["-32768", "-128", "0", "127", "32767"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&i16_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["-32768", "-128", "0", "127", "32767"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&i16_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["-32768", "-128", "0", "127", "32767"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&i16_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["null", "-128", "0", "127", "null"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&i16_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec!["null", "null", "0", "127", "32767"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&i16_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["null", "null", "0", "127", "32767"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&i16_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["null", "null", "0", "127", "32767"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&i16_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["null", "null", "0", "127", "null"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&i16_array, &DataType::UInt8)
+ );
+ }
+
+ #[test]
+ fn test_cast_from_int8() {
+ let i8_values: Vec<i8> = vec![std::i8::MIN, 0, std::i8::MAX];
+ let i8_array: ArrayRef = Arc::new(Int8Array::from(i8_values.clone()));
+
+ let f64_expected = vec!["-128.0", "0.0", "127.0"];
+ assert_eq!(
+ f64_expected,
+ get_cast_values::<Float64Type>(&i8_array, &DataType::Float64)
+ );
+
+ let f32_expected = vec!["-128.0", "0.0", "127.0"];
+ assert_eq!(
+ f32_expected,
+ get_cast_values::<Float32Type>(&i8_array, &DataType::Float32)
+ );
+
+ let i64_expected = vec!["-128", "0", "127"];
+ assert_eq!(
+ i64_expected,
+ get_cast_values::<Int64Type>(&i8_array, &DataType::Int64)
+ );
+
+ let i32_expected = vec!["-128", "0", "127"];
+ assert_eq!(
+ i32_expected,
+ get_cast_values::<Int32Type>(&i8_array, &DataType::Int32)
+ );
+
+ let i16_expected = vec!["-128", "0", "127"];
+ assert_eq!(
+ i16_expected,
+ get_cast_values::<Int16Type>(&i8_array, &DataType::Int16)
+ );
+
+ let i8_expected = vec!["-128", "0", "127"];
+ assert_eq!(
+ i8_expected,
+ get_cast_values::<Int8Type>(&i8_array, &DataType::Int8)
+ );
+
+ let u64_expected = vec!["null", "0", "127"];
+ assert_eq!(
+ u64_expected,
+ get_cast_values::<UInt64Type>(&i8_array, &DataType::UInt64)
+ );
+
+ let u32_expected = vec!["null", "0", "127"];
+ assert_eq!(
+ u32_expected,
+ get_cast_values::<UInt32Type>(&i8_array, &DataType::UInt32)
+ );
+
+ let u16_expected = vec!["null", "0", "127"];
+ assert_eq!(
+ u16_expected,
+ get_cast_values::<UInt16Type>(&i8_array, &DataType::UInt16)
+ );
+
+ let u8_expected = vec!["null", "0", "127"];
+ assert_eq!(
+ u8_expected,
+ get_cast_values::<UInt8Type>(&i8_array, &DataType::UInt8)
+ );
+ }
+
+ fn get_cast_values<T>(array: &ArrayRef, dt: &DataType) -> Vec<String>
+ where
+ T: ArrowNumericType,
+ {
+ let c = cast(&array, dt).unwrap();
+ let a = c.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+ let mut v: Vec<String> = vec![];
+ for i in 0..array.len() {
+ if a.is_null(i) {
+ v.push("null".to_string())
+ } else {
+ v.push(format!("{:?}", a.value(i)));
+ }
+ }
+ v
+ }
}
diff --git a/rust/datafusion/src/execution/context.rs
b/rust/datafusion/src/execution/context.rs
index a18d769..1618e6a 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -127,7 +127,7 @@ impl ExecutionContext {
}
/// Optimize the logical plan by applying optimizer rules
- fn optimize(&self, plan: &LogicalPlan) -> Result<Arc<LogicalPlan>> {
+ pub fn optimize(&self, plan: &LogicalPlan) -> Result<Arc<LogicalPlan>> {
let rules: Vec<Box<OptimizerRule>> = vec![
Box::new(ProjectionPushDown::new()),
Box::new(TypeCoercionRule::new()),
diff --git a/rust/datafusion/src/execution/expression.rs
b/rust/datafusion/src/execution/expression.rs
index f152a14..45adaac 100644
--- a/rust/datafusion/src/execution/expression.rs
+++ b/rust/datafusion/src/execution/expression.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! Runtime expression support
+//! Evaluation of expressions against RecordBatch instances.
use std::rc::Rc;
use std::sync::Arc;
@@ -46,7 +46,7 @@ pub enum AggregateType {
Avg,
}
-/// Compiled expression
+/// Compiled expression that can be invoked against a RecordBatch to produce
an Array
pub(super) struct CompiledExpr {
name: String,
f: ArrayFunction,
@@ -180,7 +180,10 @@ macro_rules! math_ops {
(DataType::Float64, DataType::Float64) => {
binary_op!(left_values, right_values, $OP, Float64Array)
}
- _ => Err(ExecutionError::ExecutionError(format!("math_ops"))),
+ (l, r) => Err(ExecutionError::ExecutionError(format!(
+ "Cannot perform math operation on {:?} and {:?}",
+ l, r
+ ))),
}
}};
}
@@ -220,8 +223,10 @@ macro_rules! comparison_ops {
(DataType::Float64, DataType::Float64) => {
binary_op!(left_values, right_values, $OP, Float64Array)
}
- //TODO other types
- _ =>
Err(ExecutionError::ExecutionError(format!("comparison_ops"))),
+ (l, r) => Err(ExecutionError::ExecutionError(format!(
+ "Cannot compare {:?} with {:?}",
+ l, r
+ ))),
}
}};
}
@@ -285,15 +290,26 @@ pub(super) fn compile_expr(
ScalarValue::Float32(n) => literal_array!(n, Float32Array,
Float32),
ScalarValue::Float64(n) => literal_array!(n, Float64Array,
Float64),
other => Err(ExecutionError::ExecutionError(format!(
- "No support for literal type {:?}",
+ "Unsupported literal type {:?}",
other
))),
},
- &Expr::Column(index) => Ok(CompiledExpr {
- name: input_schema.field(index).name().clone(),
- f: Rc::new(move |batch: &RecordBatch|
Ok((*batch.column(index)).clone())),
- t: input_schema.field(index).data_type().clone(),
- }),
+ &Expr::Column(index) => {
+ if index < input_schema.fields().len() {
+ Ok(CompiledExpr {
+ name: input_schema.field(index).name().clone(),
+ f: Rc::new(move |batch: &RecordBatch| {
+ Ok((*batch.column(index)).clone())
+ }),
+ t: input_schema.field(index).data_type().clone(),
+ })
+ } else {
+ Err(ExecutionError::InvalidColumn(format!(
+ "Column index {} out of bounds",
+ index
+ )))
+ }
+ }
&Expr::Cast {
ref expr,
ref data_type,
@@ -310,40 +326,21 @@ pub(super) fn compile_expr(
}),
})
}
- &Expr::Literal(ref value) => {
- //NOTE this is all very inefficient and needs to be optimized
- tracking
- // issue is https://github.com/andygrove/datafusion/issues/191
- match value {
- ScalarValue::Int64(n) => {
- let nn = *n;
- match data_type {
- DataType::Float64 => Ok(CompiledExpr {
- name: "lit".to_string(),
- f: Rc::new(move |batch: &RecordBatch| {
- let mut b =
Float64Array::builder(batch.num_rows());
- for _ in 0..batch.num_rows() {
- b.append_value(nn as f64)?;
- }
- Ok(Arc::new(b.finish()) as ArrayRef)
- }),
- t: data_type.clone(),
- }),
- other =>
Err(ExecutionError::NotImplemented(format!(
- "CAST from Int64 to {:?}",
- other
- ))),
- }
- }
- other => Err(ExecutionError::NotImplemented(format!(
- "CAST from {:?} to {:?}",
- other, data_type
- ))),
- }
+ other => {
+ let compiled_expr = compile_expr(ctx, other, input_schema)?;
+ let dt = data_type.clone();
+ Ok(CompiledExpr {
+ name: "CAST".to_string(),
+ t: data_type.clone(),
+ f: Rc::new(move |batch: &RecordBatch| {
+ // evaluate the expression
+ let array = compiled_expr.invoke(batch)?;
+ // cast the result
+ compute::cast(&array, &dt)
+ .map_err(|e| ExecutionError::ArrowError(e))
+ }),
+ })
}
- other => Err(ExecutionError::General(format!(
- "CAST not implemented for expression {:?}",
- other
- ))),
},
&Expr::BinaryExpr {
ref left,
@@ -439,14 +436,14 @@ pub(super) fn compile_expr(
}),
t: op_type,
}),
- other => Err(ExecutionError::ExecutionError(format!(
- "operator: {:?}",
+ other => Err(ExecutionError::NotImplemented(format!(
+ "Unsupported operator: {:?}",
other
))),
}
}
- other => Err(ExecutionError::ExecutionError(format!(
- "expression {:?}",
+ other => Err(ExecutionError::NotImplemented(format!(
+ "Unsupported expression {:?}",
other
))),
}
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 9c64a44..6a61eb5 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -90,6 +90,16 @@ fn csv_query_cast() {
}
#[test]
+fn csv_query_cast_literal() {
+ let mut ctx = ExecutionContext::new();
+ register_aggregate_csv(&mut ctx);
+ let sql = "SELECT c12, CAST(1 AS float) FROM aggregate_test_100 WHERE c12
> CAST(0 AS float) LIMIT 2";
+ let actual = execute(&mut ctx, sql);
+ let expected =
"0.9294097332465232\t1.0\n0.3114712539863804\t1.0\n".to_string();
+ assert_eq!(expected, actual);
+}
+
+#[test]
fn csv_query_limit() {
let mut ctx = ExecutionContext::new();
register_aggregate_csv(&mut ctx);