This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6354df683f Port `arrow_typeof` to datafusion-function (#9524)
6354df683f is described below
commit 6354df683fc9dce4b47863e81f45d47a58e7d9dc
Author: Junhao Liu <[email protected]>
AuthorDate: Mon Mar 11 15:02:45 2024 -0600
Port `arrow_typeof` to datafusion-function (#9524)
* Port arrowtypeof
* fmt
* fix test case
* revert test change
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/expr/src/built_in_function.rs | 9 ----
datafusion/expr/src/expr_fn.rs | 2 -
datafusion/functions/src/core/arrowtypeof.rs | 66 +++++++++++++++++++++++++
datafusion/functions/src/core/mod.rs | 3 ++
datafusion/functions/src/core/nullif.rs | 2 -
datafusion/physical-expr/src/functions.rs | 13 -----
datafusion/proto/proto/datafusion.proto | 2 +-
datafusion/proto/src/generated/pbjson.rs | 3 --
datafusion/proto/src/generated/prost.rs | 4 +-
datafusion/proto/src/logical_plan/from_proto.rs | 12 ++---
datafusion/proto/src/logical_plan/to_proto.rs | 1 -
11 files changed, 75 insertions(+), 42 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index e04bd585d4..d211728fb7 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -232,8 +232,6 @@ pub enum BuiltinScalarFunction {
Upper,
/// uuid
Uuid,
- /// arrow_typeof
- ArrowTypeof,
/// overlay
OverLay,
/// levenshtein
@@ -387,7 +385,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Trim => Volatility::Immutable,
BuiltinScalarFunction::Upper => Volatility::Immutable,
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
- BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
BuiltinScalarFunction::OverLay => Volatility::Immutable,
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -612,8 +609,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Iszero => Ok(Boolean),
- BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
-
BuiltinScalarFunction::OverLay => {
utf8_to_str_type(&input_expr_types[0], "overlay")
}
@@ -898,7 +893,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
Signature::uniform(2, vec![Int64], self.volatility())
}
- BuiltinScalarFunction::ArrowTypeof => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::OverLay => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8, Int64, Int64]),
@@ -1087,9 +1081,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SHA384 => &["sha384"],
BuiltinScalarFunction::SHA512 => &["sha512"],
- // other functions
- BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
-
BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
BuiltinScalarFunction::ArrayDistinct => &["array_distinct",
"list_distinct"],
BuiltinScalarFunction::ArrayElement => &[
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 38e8e36f1a..95b14c20ce 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -822,7 +822,6 @@ scalar_expr!(
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
);
-scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");
scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the
Levenshtein distance between the two given strings");
scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the
substring from str before count occurrences of the delimiter");
scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the
range of 1 to N if the string str is in the string list strlist consisting of N
substrings");
@@ -1292,7 +1291,6 @@ mod test {
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to,
max);
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
- test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position,
len);
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
diff --git a/datafusion/functions/src/core/arrowtypeof.rs
b/datafusion/functions/src/core/arrowtypeof.rs
new file mode 100644
index 0000000000..89702d3267
--- /dev/null
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct ArrowTypeOfFunc {
+ signature: Signature,
+}
+
+impl ArrowTypeOfFunc {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::any(1, Volatility::Immutable),
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrowTypeOfFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "arrow_typeof"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+ Ok(DataType::Utf8)
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 1 {
+ return exec_err!(
+ "arrow_typeof function requires 1 arguments, got {}",
+ args.len()
+ );
+ }
+
+ let input_data_type = args[0].data_type();
+ Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
+ "{input_data_type}"
+ ))))
+ }
+}
diff --git a/datafusion/functions/src/core/mod.rs
b/datafusion/functions/src/core/mod.rs
index 0eef084c61..3f13067a4a 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -17,6 +17,7 @@
//! "core" DataFusion functions
+mod arrowtypeof;
mod nullif;
mod nvl;
mod nvl2;
@@ -26,6 +27,7 @@ pub mod r#struct;
make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
make_udf_function!(nvl::NVLFunc, NVL, nvl);
make_udf_function!(nvl2::NVL2Func, NVL2, nvl2);
+make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof);
make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
// Export the functions out of this package, both as expr_fn as well as a list
of functions
@@ -33,5 +35,6 @@ export_functions!(
(nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it
returns value1. This can be used to perform the inverse operation of the
COALESCE expression."),
(nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns
value1"),
(nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL;
otherwise, it returns value3."),
+ (arrow_typeof, arg_1, "Returns the Arrow type of the input expression."),
(r#struct, args, "Returns a struct with the given arguments")
);
diff --git a/datafusion/functions/src/core/nullif.rs
b/datafusion/functions/src/core/nullif.rs
index 3ff8dbd942..1e903d7a88 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -15,8 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-//! Encoding expressions
-
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
use datafusion_expr::ColumnarValue;
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 8e0eebcd26..27af763ccf 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -691,19 +691,6 @@ pub fn create_physical_fun(
}),
BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
- BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
- if args.len() != 1 {
- return exec_err!(
- "arrow_typeof function requires 1 arguments, got {}",
- args.len()
- );
- }
-
- let input_data_type = args[0].data_type();
- Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
- "{input_data_type}"
- ))))
- }),
BuiltinScalarFunction::OverLay => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::overlay::<i32>)(args)
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 474783c4e1..24ba8b0102 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -618,7 +618,7 @@ enum ScalarFunction {
FromUnixtime = 66;
Atan2 = 67;
// 68 was DateBin
- ArrowTypeof = 69;
+ // 69 was ArrowTypeof
CurrentDate = 70;
CurrentTime = 71;
Uuid = 72;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index d90c2fe994..bb8d40c63b 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22152,7 +22152,6 @@ impl serde::Serialize for ScalarFunction {
Self::Power => "Power",
Self::FromUnixtime => "FromUnixtime",
Self::Atan2 => "Atan2",
- Self::ArrowTypeof => "ArrowTypeof",
Self::CurrentDate => "CurrentDate",
Self::CurrentTime => "CurrentTime",
Self::Uuid => "Uuid",
@@ -22265,7 +22264,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Power",
"FromUnixtime",
"Atan2",
- "ArrowTypeof",
"CurrentDate",
"CurrentTime",
"Uuid",
@@ -22407,7 +22405,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Power" => Ok(ScalarFunction::Power),
"FromUnixtime" => Ok(ScalarFunction::FromUnixtime),
"Atan2" => Ok(ScalarFunction::Atan2),
- "ArrowTypeof" => Ok(ScalarFunction::ArrowTypeof),
"CurrentDate" => Ok(ScalarFunction::CurrentDate),
"CurrentTime" => Ok(ScalarFunction::CurrentTime),
"Uuid" => Ok(ScalarFunction::Uuid),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index d858b35cd7..9742c55474 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2690,7 +2690,7 @@ pub enum ScalarFunction {
FromUnixtime = 66,
Atan2 = 67,
/// 68 was DateBin
- ArrowTypeof = 69,
+ /// 69 was ArrowTypeof
CurrentDate = 70,
CurrentTime = 71,
Uuid = 72,
@@ -2822,7 +2822,6 @@ impl ScalarFunction {
ScalarFunction::Power => "Power",
ScalarFunction::FromUnixtime => "FromUnixtime",
ScalarFunction::Atan2 => "Atan2",
- ScalarFunction::ArrowTypeof => "ArrowTypeof",
ScalarFunction::CurrentDate => "CurrentDate",
ScalarFunction::CurrentTime => "CurrentTime",
ScalarFunction::Uuid => "Uuid",
@@ -2929,7 +2928,6 @@ impl ScalarFunction {
"Power" => Some(Self::Power),
"FromUnixtime" => Some(Self::FromUnixtime),
"Atan2" => Some(Self::Atan2),
- "ArrowTypeof" => Some(Self::ArrowTypeof),
"CurrentDate" => Some(Self::CurrentDate),
"CurrentTime" => Some(Self::CurrentTime),
"Uuid" => Some(Self::Uuid),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index e75c6c3532..7260135516 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -50,10 +50,10 @@ use datafusion_expr::{
acosh, array_distinct, array_element, array_except, array_intersect,
array_pop_back,
array_pop_front, array_position, array_positions, array_remove,
array_remove_all,
array_remove_n, array_repeat, array_replace, array_replace_all,
array_replace_n,
- array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii,
asinh, atan,
- atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr,
coalesce,
- concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time,
degrees,
- digest, ends_with, exp,
+ array_resize, array_slice, array_sort, array_union, ascii, asinh, atan,
atan2, atanh,
+ bit_length, btrim, cbrt, ceil, character_length, chr, coalesce,
concat_expr,
+ concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees,
digest,
+ ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, floor, from_unixtime, gcd, initcap, iszero, lcm,
left,
levenshtein, ln, log, log10, log2,
@@ -538,7 +538,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::Atan2 => Self::Atan2,
ScalarFunction::Nanvl => Self::Nanvl,
ScalarFunction::Iszero => Self::Iszero,
- ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
ScalarFunction::OverLay => Self::OverLay,
ScalarFunction::Levenshtein => Self::Levenshtein,
ScalarFunction::SubstrIndex => Self::SubstrIndex,
@@ -1736,9 +1735,6 @@ pub fn parse_expr(
ScalarFunction::Iszero => {
Ok(iszero(parse_expr(&args[0], registry, codec)?))
}
- ScalarFunction::ArrowTypeof => {
- Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::OverLay => Ok(overlay(
args.to_owned()
.iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 92e3b0d6c4..9484b9e0ad 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::Atan2 => Self::Atan2,
BuiltinScalarFunction::Nanvl => Self::Nanvl,
BuiltinScalarFunction::Iszero => Self::Iszero,
- BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
BuiltinScalarFunction::OverLay => Self::OverLay,
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,