This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6354df683f Port `arrow_typeof` to datafusion-function (#9524)
6354df683f is described below

commit 6354df683fc9dce4b47863e81f45d47a58e7d9dc
Author: Junhao Liu <[email protected]>
AuthorDate: Mon Mar 11 15:02:45 2024 -0600

    Port `arrow_typeof` to datafusion-function (#9524)
    
    * Port arrowtypeof
    
    * fmt
    
    * fix test case
    
    * revert test change
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/expr/src/built_in_function.rs        |  9 ----
 datafusion/expr/src/expr_fn.rs                  |  2 -
 datafusion/functions/src/core/arrowtypeof.rs    | 66 +++++++++++++++++++++++++
 datafusion/functions/src/core/mod.rs            |  3 ++
 datafusion/functions/src/core/nullif.rs         |  2 -
 datafusion/physical-expr/src/functions.rs       | 13 -----
 datafusion/proto/proto/datafusion.proto         |  2 +-
 datafusion/proto/src/generated/pbjson.rs        |  3 --
 datafusion/proto/src/generated/prost.rs         |  4 +-
 datafusion/proto/src/logical_plan/from_proto.rs | 12 ++---
 datafusion/proto/src/logical_plan/to_proto.rs   |  1 -
 11 files changed, 75 insertions(+), 42 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index e04bd585d4..d211728fb7 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -232,8 +232,6 @@ pub enum BuiltinScalarFunction {
     Upper,
     /// uuid
     Uuid,
-    /// arrow_typeof
-    ArrowTypeof,
     /// overlay
     OverLay,
     /// levenshtein
@@ -387,7 +385,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Trim => Volatility::Immutable,
             BuiltinScalarFunction::Upper => Volatility::Immutable,
             BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
-            BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
             BuiltinScalarFunction::OverLay => Volatility::Immutable,
             BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
             BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -612,8 +609,6 @@ impl BuiltinScalarFunction {
 
             BuiltinScalarFunction::Iszero => Ok(Boolean),
 
-            BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
-
             BuiltinScalarFunction::OverLay => {
                 utf8_to_str_type(&input_expr_types[0], "overlay")
             }
@@ -898,7 +893,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
                 Signature::uniform(2, vec![Int64], self.volatility())
             }
-            BuiltinScalarFunction::ArrowTypeof => Signature::any(1, 
self.volatility()),
             BuiltinScalarFunction::OverLay => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8, Int64, Int64]),
@@ -1087,9 +1081,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::SHA384 => &["sha384"],
             BuiltinScalarFunction::SHA512 => &["sha512"],
 
-            // other functions
-            BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
-
             BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
             BuiltinScalarFunction::ArrayDistinct => &["array_distinct", 
"list_distinct"],
             BuiltinScalarFunction::ArrayElement => &[
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 38e8e36f1a..95b14c20ce 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -822,7 +822,6 @@ scalar_expr!(
     "returns true if a given number is +0.0 or -0.0 otherwise returns false"
 );
 
-scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");
 scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the 
Levenshtein distance between the two given strings");
 scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the 
substring from str before count occurrences of the delimiter");
 scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the 
range of 1 to N if the string str is in the string list strlist consisting of N 
substrings");
@@ -1292,7 +1291,6 @@ mod test {
         test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, 
max);
         test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
 
-        test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position, 
len);
         test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
         test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
diff --git a/datafusion/functions/src/core/arrowtypeof.rs 
b/datafusion/functions/src/core/arrowtypeof.rs
new file mode 100644
index 0000000000..89702d3267
--- /dev/null
+++ b/datafusion/functions/src/core/arrowtypeof.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct ArrowTypeOfFunc {
+    signature: Signature,
+}
+
+impl ArrowTypeOfFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::any(1, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for ArrowTypeOfFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "arrow_typeof"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        if args.len() != 1 {
+            return exec_err!(
+                "arrow_typeof function requires 1 arguments, got {}",
+                args.len()
+            );
+        }
+
+        let input_data_type = args[0].data_type();
+        Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
+            "{input_data_type}"
+        ))))
+    }
+}
diff --git a/datafusion/functions/src/core/mod.rs 
b/datafusion/functions/src/core/mod.rs
index 0eef084c61..3f13067a4a 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -17,6 +17,7 @@
 
 //! "core" DataFusion functions
 
+mod arrowtypeof;
 mod nullif;
 mod nvl;
 mod nvl2;
@@ -26,6 +27,7 @@ pub mod r#struct;
 make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
 make_udf_function!(nvl::NVLFunc, NVL, nvl);
 make_udf_function!(nvl2::NVL2Func, NVL2, nvl2);
+make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof);
 make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
 
 // Export the functions out of this package, both as expr_fn as well as a list 
of functions
@@ -33,5 +35,6 @@ export_functions!(
     (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it 
returns value1. This can be used to perform the inverse operation of the 
COALESCE expression."),
     (nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns 
value1"),
     (nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; 
otherwise, it returns value3."),
+    (arrow_typeof, arg_1, "Returns the Arrow type of the input expression."),
     (r#struct, args, "Returns a struct with the given arguments")
 );
diff --git a/datafusion/functions/src/core/nullif.rs 
b/datafusion/functions/src/core/nullif.rs
index 3ff8dbd942..1e903d7a88 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Encoding expressions
-
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 8e0eebcd26..27af763ccf 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -691,19 +691,6 @@ pub fn create_physical_fun(
         }),
         BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
         BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
-        BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
-            if args.len() != 1 {
-                return exec_err!(
-                    "arrow_typeof function requires 1 arguments, got {}",
-                    args.len()
-                );
-            }
-
-            let input_data_type = args[0].data_type();
-            Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
-                "{input_data_type}"
-            ))))
-        }),
         BuiltinScalarFunction::OverLay => Arc::new(|args| match 
args[0].data_type() {
             DataType::Utf8 => {
                 
make_scalar_function_inner(string_expressions::overlay::<i32>)(args)
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index 474783c4e1..24ba8b0102 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -618,7 +618,7 @@ enum ScalarFunction {
   FromUnixtime = 66;
   Atan2 = 67;
   // 68 was DateBin
-  ArrowTypeof = 69;
+  // 69 was ArrowTypeof
   CurrentDate = 70;
   CurrentTime = 71;
   Uuid = 72;
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index d90c2fe994..bb8d40c63b 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22152,7 +22152,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Power => "Power",
             Self::FromUnixtime => "FromUnixtime",
             Self::Atan2 => "Atan2",
-            Self::ArrowTypeof => "ArrowTypeof",
             Self::CurrentDate => "CurrentDate",
             Self::CurrentTime => "CurrentTime",
             Self::Uuid => "Uuid",
@@ -22265,7 +22264,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Power",
             "FromUnixtime",
             "Atan2",
-            "ArrowTypeof",
             "CurrentDate",
             "CurrentTime",
             "Uuid",
@@ -22407,7 +22405,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Power" => Ok(ScalarFunction::Power),
                     "FromUnixtime" => Ok(ScalarFunction::FromUnixtime),
                     "Atan2" => Ok(ScalarFunction::Atan2),
-                    "ArrowTypeof" => Ok(ScalarFunction::ArrowTypeof),
                     "CurrentDate" => Ok(ScalarFunction::CurrentDate),
                     "CurrentTime" => Ok(ScalarFunction::CurrentTime),
                     "Uuid" => Ok(ScalarFunction::Uuid),
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index d858b35cd7..9742c55474 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2690,7 +2690,7 @@ pub enum ScalarFunction {
     FromUnixtime = 66,
     Atan2 = 67,
     /// 68 was DateBin
-    ArrowTypeof = 69,
+    /// 69 was ArrowTypeof
     CurrentDate = 70,
     CurrentTime = 71,
     Uuid = 72,
@@ -2822,7 +2822,6 @@ impl ScalarFunction {
             ScalarFunction::Power => "Power",
             ScalarFunction::FromUnixtime => "FromUnixtime",
             ScalarFunction::Atan2 => "Atan2",
-            ScalarFunction::ArrowTypeof => "ArrowTypeof",
             ScalarFunction::CurrentDate => "CurrentDate",
             ScalarFunction::CurrentTime => "CurrentTime",
             ScalarFunction::Uuid => "Uuid",
@@ -2929,7 +2928,6 @@ impl ScalarFunction {
             "Power" => Some(Self::Power),
             "FromUnixtime" => Some(Self::FromUnixtime),
             "Atan2" => Some(Self::Atan2),
-            "ArrowTypeof" => Some(Self::ArrowTypeof),
             "CurrentDate" => Some(Self::CurrentDate),
             "CurrentTime" => Some(Self::CurrentTime),
             "Uuid" => Some(Self::Uuid),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index e75c6c3532..7260135516 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -50,10 +50,10 @@ use datafusion_expr::{
     acosh, array_distinct, array_element, array_except, array_intersect, 
array_pop_back,
     array_pop_front, array_position, array_positions, array_remove, 
array_remove_all,
     array_remove_n, array_repeat, array_replace, array_replace_all, 
array_replace_n,
-    array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, 
asinh, atan,
-    atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, 
coalesce,
-    concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, 
degrees,
-    digest, ends_with, exp,
+    array_resize, array_slice, array_sort, array_union, ascii, asinh, atan, 
atan2, atanh,
+    bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, 
concat_expr,
+    concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees, 
digest,
+    ends_with, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, floor, from_unixtime, gcd, initcap, iszero, lcm, 
left,
     levenshtein, ln, log, log10, log2,
@@ -538,7 +538,6 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Atan2 => Self::Atan2,
             ScalarFunction::Nanvl => Self::Nanvl,
             ScalarFunction::Iszero => Self::Iszero,
-            ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             ScalarFunction::OverLay => Self::OverLay,
             ScalarFunction::Levenshtein => Self::Levenshtein,
             ScalarFunction::SubstrIndex => Self::SubstrIndex,
@@ -1736,9 +1735,6 @@ pub fn parse_expr(
                 ScalarFunction::Iszero => {
                     Ok(iszero(parse_expr(&args[0], registry, codec)?))
                 }
-                ScalarFunction::ArrowTypeof => {
-                    Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?))
-                }
                 ScalarFunction::OverLay => Ok(overlay(
                     args.to_owned()
                         .iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 92e3b0d6c4..9484b9e0ad 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Atan2 => Self::Atan2,
             BuiltinScalarFunction::Nanvl => Self::Nanvl,
             BuiltinScalarFunction::Iszero => Self::Iszero,
-            BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
             BuiltinScalarFunction::OverLay => Self::OverLay,
             BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
             BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,

Reply via email to