This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b1d8082c62 port array_empty and array_length (#9510)
b1d8082c62 is described below
commit b1d8082c6271e49c1de6e20002b853ca1503ffde
Author: Alex Huang <[email protected]>
AuthorDate: Sat Mar 9 13:34:45 2024 +0800
port array_empty and array_length (#9510)
---
datafusion/expr/src/built_in_function.rs | 14 ---
datafusion/expr/src/expr_fn.rs | 13 --
datafusion/functions-array/src/kernels.rs | 105 +++++++++++++++-
datafusion/functions-array/src/lib.rs | 4 +
datafusion/functions-array/src/udf.rs | 133 +++++++++++++++++++--
datafusion/physical-expr/src/array_expressions.rs | 113 +----------------
datafusion/physical-expr/src/functions.rs | 6 -
datafusion/proto/proto/datafusion.proto | 4 +-
datafusion/proto/src/generated/pbjson.rs | 6 -
datafusion/proto/src/generated/prost.rs | 8 +-
datafusion/proto/src/logical_plan/from_proto.rs | 25 ++--
datafusion/proto/src/logical_plan/to_proto.rs | 2 -
.../proto/tests/cases/roundtrip_logical_plan.rs | 11 ++
13 files changed, 251 insertions(+), 193 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index be10da3669..89c5cfcce6 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -126,10 +126,6 @@ pub enum BuiltinScalarFunction {
ArrayDistinct,
/// array_element
ArrayElement,
- /// array_empty
- ArrayEmpty,
- /// array_length
- ArrayLength,
/// array_position
ArrayPosition,
/// array_positions
@@ -360,11 +356,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
BuiltinScalarFunction::ArraySort => Volatility::Immutable,
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
- BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable,
BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
- BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
@@ -527,7 +521,6 @@ impl BuiltinScalarFunction {
Ok(expr_type)
}
- BuiltinScalarFunction::ArrayEmpty => Ok(Boolean),
BuiltinScalarFunction::ArrayDistinct =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
List(field)
@@ -537,7 +530,6 @@ impl BuiltinScalarFunction {
"The {self} function can only accept List, LargeList or
FixedSizeList as the first argument"
),
},
- BuiltinScalarFunction::ArrayLength => Ok(UInt64),
BuiltinScalarFunction::ArrayPopFront =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPopBack =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
@@ -831,15 +823,11 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayEmpty =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayElement => {
Signature::array_and_index(self.volatility())
}
BuiltinScalarFunction::ArrayExcept => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::Flatten =>
Signature::array(self.volatility()),
- BuiltinScalarFunction::ArrayLength => {
- Signature::variadic_any(self.volatility())
- }
BuiltinScalarFunction::ArrayDistinct =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayPosition => {
Signature::array_and_element_and_optional_index(self.volatility())
@@ -1396,7 +1384,6 @@ impl BuiltinScalarFunction {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
BuiltinScalarFunction::ArrayDistinct => &["array_distinct",
"list_distinct"],
- BuiltinScalarFunction::ArrayEmpty => &["empty"],
BuiltinScalarFunction::ArrayElement => &[
"array_element",
"array_extract",
@@ -1405,7 +1392,6 @@ impl BuiltinScalarFunction {
],
BuiltinScalarFunction::ArrayExcept => &["array_except",
"list_except"],
BuiltinScalarFunction::Flatten => &["flatten"],
- BuiltinScalarFunction::ArrayLength => &["array_length",
"list_length"],
BuiltinScalarFunction::ArrayPopFront => {
&["array_pop_front", "list_pop_front"]
}
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index ad69208ce9..7e1c0e77d7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -611,12 +611,6 @@ scalar_expr!(
);
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
-scalar_expr!(
- ArrayEmpty,
- array_empty,
- array,
- "returns true for an empty array or false for a non-empty array."
-);
scalar_expr!(
Flatten,
flatten,
@@ -635,12 +629,6 @@ scalar_expr!(
first_array second_array,
"Returns an array of the elements that appear in the first array but not
in the second."
);
-scalar_expr!(
- ArrayLength,
- array_length,
- array dimension,
- "returns the length of the array dimension."
-);
scalar_expr!(
ArrayDistinct,
array_distinct,
@@ -1336,7 +1324,6 @@ mod test {
test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
test_scalar_expr!(ArrayPopFront, array_pop_front, array);
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
- test_scalar_expr!(ArrayLength, array_length, array, dimension);
test_scalar_expr!(ArrayPosition, array_position, array, element,
index);
test_scalar_expr!(ArrayPositions, array_positions, array, element);
test_scalar_expr!(ArrayPrepend, array_prepend, array, element);
diff --git a/datafusion/functions-array/src/kernels.rs
b/datafusion/functions-array/src/kernels.rs
index c22ddeb43a..3138843feb 100644
--- a/datafusion/functions-array/src/kernels.rs
+++ b/datafusion/functions-array/src/kernels.rs
@@ -17,19 +17,19 @@
//! implementation kernels for array functions
-use arrow::array::ListArray;
use arrow::array::{
Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array,
GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array,
LargeStringArray,
OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array,
UInt8Array,
};
+use arrow::array::{LargeListArray, ListArray};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::Field;
use arrow::datatypes::UInt64Type;
use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType};
use datafusion_common::cast::{
- as_date32_array, as_int64_array, as_interval_mdn_array,
as_large_list_array,
- as_list_array, as_string_array,
+ as_date32_array, as_generic_list_array, as_int64_array,
as_interval_mdn_array,
+ as_large_list_array, as_list_array, as_null_array, as_string_array,
};
use datafusion_common::{exec_err, not_impl_datafusion_err, DataFusionError,
Result};
use std::any::type_name;
@@ -517,3 +517,102 @@ pub fn gen_range_date(
)?);
Ok(arr)
}
+
+/// Array_empty SQL function
+pub fn array_empty(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("array_empty expects one argument");
+ }
+
+ if as_null_array(&args[0]).is_ok() {
+ // Make sure to return Boolean type.
+ return Ok(Arc::new(BooleanArray::new_null(args[0].len())));
+ }
+ let array_type = args[0].data_type();
+
+ match array_type {
+ DataType::List(_) => general_array_empty::<i32>(&args[0]),
+ DataType::LargeList(_) => general_array_empty::<i64>(&args[0]),
+ _ => exec_err!("array_empty does not support type '{array_type:?}'."),
+ }
+}
+
+fn general_array_empty<O: OffsetSizeTrait>(array: &ArrayRef) ->
Result<ArrayRef> {
+ let array = as_generic_list_array::<O>(array)?;
+ let builder = array
+ .iter()
+ .map(|arr| arr.map(|arr| arr.len() == arr.null_count()))
+ .collect::<BooleanArray>();
+ Ok(Arc::new(builder))
+}
+
+/// Returns the length of a concrete array dimension
+fn compute_array_length(
+ arr: Option<ArrayRef>,
+ dimension: Option<i64>,
+) -> Result<Option<u64>> {
+ let mut current_dimension: i64 = 1;
+ let mut value = match arr {
+ Some(arr) => arr,
+ None => return Ok(None),
+ };
+ let dimension = match dimension {
+ Some(value) => {
+ if value < 1 {
+ return Ok(None);
+ }
+
+ value
+ }
+ None => return Ok(None),
+ };
+
+ loop {
+ if current_dimension == dimension {
+ return Ok(Some(value.len() as u64));
+ }
+
+ match value.data_type() {
+ DataType::List(..) => {
+ value = downcast_arg!(value, ListArray).value(0);
+ current_dimension += 1;
+ }
+ DataType::LargeList(..) => {
+ value = downcast_arg!(value, LargeListArray).value(0);
+ current_dimension += 1;
+ }
+ _ => return Ok(None),
+ }
+ }
+}
+
+/// Dispatch array length computation based on the offset type.
+fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) ->
Result<ArrayRef> {
+ let list_array = as_generic_list_array::<O>(&array[0])?;
+ let dimension = if array.len() == 2 {
+ as_int64_array(&array[1])?.clone()
+ } else {
+ Int64Array::from_value(1, list_array.len())
+ };
+
+ let result = list_array
+ .iter()
+ .zip(dimension.iter())
+ .map(|(arr, dim)| compute_array_length(arr, dim))
+ .collect::<Result<UInt64Array>>()?;
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Array_length SQL function
+pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 && args.len() != 2 {
+ return exec_err!("array_length expects one or two arguments");
+ }
+
+ match &args[0].data_type() {
+ DataType::List(_) => general_array_length::<i32>(args),
+ DataType::LargeList(_) => general_array_length::<i64>(args),
+ array_type => exec_err!("array_length does not support type
'{array_type:?}'"),
+ }
+}
diff --git a/datafusion/functions-array/src/lib.rs
b/datafusion/functions-array/src/lib.rs
index 710f49761f..6f0f2beca7 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -45,6 +45,8 @@ pub mod expr_fn {
pub use super::array_has::array_has_all;
pub use super::array_has::array_has_any;
pub use super::udf::array_dims;
+ pub use super::udf::array_empty;
+ pub use super::udf::array_length;
pub use super::udf::array_ndims;
pub use super::udf::array_to_string;
pub use super::udf::cardinality;
@@ -64,6 +66,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
array_has::array_has_udf(),
array_has::array_has_all_udf(),
array_has::array_has_any_udf(),
+ udf::array_empty_udf(),
+ udf::array_length_udf(),
];
functions.into_iter().try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions-array/src/udf.rs
b/datafusion/functions-array/src/udf.rs
index 6c69553962..8dc4f722c0 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -22,6 +22,7 @@ use arrow::datatypes::Field;
use arrow::datatypes::IntervalUnit::MonthDayNano;
use datafusion_common::exec_err;
use datafusion_common::plan_err;
+use datafusion_common::Result;
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::Expr;
use datafusion_expr::TypeSignature::Exact;
@@ -68,7 +69,7 @@ impl ScalarUDFImpl for ArrayToString {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
@@ -78,7 +79,7 @@ impl ScalarUDFImpl for ArrayToString {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
crate::kernels::array_to_string(&args).map(ColumnarValue::Array)
}
@@ -129,7 +130,7 @@ impl ScalarUDFImpl for Range {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
@@ -138,7 +139,7 @@ impl ScalarUDFImpl for Range {
))))
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
match args[0].data_type() {
arrow::datatypes::DataType::Int64 => {
@@ -199,7 +200,7 @@ impl ScalarUDFImpl for GenSeries {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
@@ -208,7 +209,7 @@ impl ScalarUDFImpl for GenSeries {
))))
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
match args[0].data_type() {
arrow::datatypes::DataType::Int64 => {
@@ -263,7 +264,7 @@ impl ScalarUDFImpl for ArrayDims {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => {
@@ -275,7 +276,7 @@ impl ScalarUDFImpl for ArrayDims {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
crate::kernels::array_dims(&args).map(ColumnarValue::Array)
}
@@ -319,7 +320,7 @@ impl ScalarUDFImpl for Cardinality {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
@@ -329,7 +330,7 @@ impl ScalarUDFImpl for Cardinality {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
crate::kernels::cardinality(&args).map(ColumnarValue::Array)
}
@@ -373,7 +374,7 @@ impl ScalarUDFImpl for ArrayNdims {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
@@ -383,7 +384,7 @@ impl ScalarUDFImpl for ArrayNdims {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
crate::kernels::array_ndims(&args).map(ColumnarValue::Array)
}
@@ -392,3 +393,111 @@ impl ScalarUDFImpl for ArrayNdims {
&self.aliases
}
}
+
+make_udf_function!(
+ ArrayEmpty,
+ array_empty,
+ array,
+ "returns true for an empty array or false for a non-empty array.",
+ array_empty_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayEmpty {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+impl ArrayEmpty {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::array(Volatility::Immutable),
+ aliases: vec![String::from("empty")],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayEmpty {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "empty"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => Boolean,
+ _ => {
+ return plan_err!("The array_empty function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::array_empty(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
+
+make_udf_function!(
+ ArrayLength,
+ array_length,
+ array,
+ "returns the length of the array dimension.",
+ array_length_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayLength {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+impl ArrayLength {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::variadic_any(Volatility::Immutable),
+ aliases: vec![String::from("array_length"),
String::from("list_length")],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayLength {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "array_length"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+ _ => {
+ return plan_err!("The array_length function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::array_length(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 8d2a283a05..495de01c76 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -17,7 +17,6 @@
//! Array expressions
-use std::any::type_name;
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use std::sync::Arc;
@@ -32,7 +31,7 @@ use arrow_buffer::{ArrowNativeType, NullBuffer};
use arrow_schema::{FieldRef, SortOptions};
use datafusion_common::cast::{
as_generic_list_array, as_generic_string_array, as_int64_array,
as_large_list_array,
- as_list_array, as_null_array, as_string_array,
+ as_list_array, as_string_array,
};
use datafusion_common::utils::{array_into_list_array, list_ndims};
use datafusion_common::{
@@ -41,17 +40,6 @@ use datafusion_common::{
};
use itertools::Itertools;
-macro_rules! downcast_arg {
- ($ARG:expr, $ARRAY_TYPE:ident) => {{
- $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
- DataFusionError::Internal(format!(
- "could not cast to {}",
- type_name::<$ARRAY_TYPE>()
- ))
- })?
- }};
-}
-
/// Computes a BooleanArray indicating equality or inequality between elements
in a list array and a specified element array.
///
/// # Arguments
@@ -153,46 +141,6 @@ fn compare_element_to_list(
Ok(res)
}
-/// Returns the length of a concrete array dimension
-fn compute_array_length(
- arr: Option<ArrayRef>,
- dimension: Option<i64>,
-) -> Result<Option<u64>> {
- let mut current_dimension: i64 = 1;
- let mut value = match arr {
- Some(arr) => arr,
- None => return Ok(None),
- };
- let dimension = match dimension {
- Some(value) => {
- if value < 1 {
- return Ok(None);
- }
-
- value
- }
- None => return Ok(None),
- };
-
- loop {
- if current_dimension == dimension {
- return Ok(Some(value.len() as u64));
- }
-
- match value.data_type() {
- DataType::List(..) => {
- value = downcast_arg!(value, ListArray).value(0);
- current_dimension += 1;
- }
- DataType::LargeList(..) => {
- value = downcast_arg!(value, LargeListArray).value(0);
- current_dimension += 1;
- }
- _ => return Ok(None),
- }
- }
-}
-
fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> {
let data_type = args[0].data_type();
if !args.iter().all(|arg| {
@@ -1130,34 +1078,6 @@ pub fn array_concat(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-/// Array_empty SQL function
-pub fn array_empty(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 {
- return exec_err!("array_empty expects one argument");
- }
-
- if as_null_array(&args[0]).is_ok() {
- // Make sure to return Boolean type.
- return Ok(Arc::new(BooleanArray::new_null(args[0].len())));
- }
- let array_type = args[0].data_type();
-
- match array_type {
- DataType::List(_) => array_empty_dispatch::<i32>(&args[0]),
- DataType::LargeList(_) => array_empty_dispatch::<i64>(&args[0]),
- _ => exec_err!("array_empty does not support type '{array_type:?}'."),
- }
-}
-
-fn array_empty_dispatch<O: OffsetSizeTrait>(array: &ArrayRef) ->
Result<ArrayRef> {
- let array = as_generic_list_array::<O>(array)?;
- let builder = array
- .iter()
- .map(|arr| arr.map(|arr| arr.len() == arr.null_count()))
- .collect::<BooleanArray>();
- Ok(Arc::new(builder))
-}
-
/// Array_repeat SQL function
pub fn array_repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
@@ -1987,37 +1907,6 @@ pub fn flatten(args: &[ArrayRef]) -> Result<ArrayRef> {
// Ok(Arc::new(flattened_array) as ArrayRef)
}
-/// Dispatch array length computation based on the offset type.
-fn array_length_dispatch<O: OffsetSizeTrait>(array: &[ArrayRef]) ->
Result<ArrayRef> {
- let list_array = as_generic_list_array::<O>(&array[0])?;
- let dimension = if array.len() == 2 {
- as_int64_array(&array[1])?.clone()
- } else {
- Int64Array::from_value(1, list_array.len())
- };
-
- let result = list_array
- .iter()
- .zip(dimension.iter())
- .map(|(arr, dim)| compute_array_length(arr, dim))
- .collect::<Result<UInt64Array>>()?;
-
- Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Array_length SQL function
-pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 && args.len() != 2 {
- return exec_err!("array_length expects one or two arguments");
- }
-
- match &args[0].data_type() {
- DataType::List(_) => array_length_dispatch::<i32>(args),
- DataType::LargeList(_) => array_length_dispatch::<i64>(args),
- array_type => exec_err!("array_length does not support type
'{array_type:?}'"),
- }
-}
-
/// Splits string at occurrences of delimiter and returns an array of parts
/// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]'
pub fn string_to_array<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index eebbb1dbea..776f6315a4 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -311,9 +311,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayConcat => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_concat)(args)
}),
- BuiltinScalarFunction::ArrayEmpty => Arc::new(|args| {
- make_scalar_function_inner(array_expressions::array_empty)(args)
- }),
BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_distinct)(args)
}),
@@ -323,9 +320,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayExcept => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_except)(args)
}),
- BuiltinScalarFunction::ArrayLength => Arc::new(|args| {
- make_scalar_function_inner(array_expressions::array_length)(args)
- }),
BuiltinScalarFunction::Flatten => {
Arc::new(|args|
make_scalar_function_inner(array_expressions::flatten)(args))
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index c5b20986c3..d1fef7c1ce 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -639,7 +639,7 @@ enum ScalarFunction {
ArrayConcat = 87;
// 88 was ArrayDims
ArrayRepeat = 89;
- ArrayLength = 90;
+ // 90 was ArrayLength
// 91 was ArrayNdims
ArrayPosition = 92;
ArrayPositions = 93;
@@ -662,7 +662,7 @@ enum ScalarFunction {
Flatten = 112;
// 113 was IsNan
Iszero = 114;
- ArrayEmpty = 115;
+ // 115 was ArrayEmpty
ArrayPopBack = 116;
StringToArray = 117;
// 118 was ToTimestampNanos
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index b99e957406..e4da28ed44 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22401,7 +22401,6 @@ impl serde::Serialize for ScalarFunction {
Self::ArrayAppend => "ArrayAppend",
Self::ArrayConcat => "ArrayConcat",
Self::ArrayRepeat => "ArrayRepeat",
- Self::ArrayLength => "ArrayLength",
Self::ArrayPosition => "ArrayPosition",
Self::ArrayPositions => "ArrayPositions",
Self::ArrayPrepend => "ArrayPrepend",
@@ -22417,7 +22416,6 @@ impl serde::Serialize for ScalarFunction {
Self::Nanvl => "Nanvl",
Self::Flatten => "Flatten",
Self::Iszero => "Iszero",
- Self::ArrayEmpty => "ArrayEmpty",
Self::ArrayPopBack => "ArrayPopBack",
Self::StringToArray => "StringToArray",
Self::ArrayIntersect => "ArrayIntersect",
@@ -22526,7 +22524,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayAppend",
"ArrayConcat",
"ArrayRepeat",
- "ArrayLength",
"ArrayPosition",
"ArrayPositions",
"ArrayPrepend",
@@ -22542,7 +22539,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Nanvl",
"Flatten",
"Iszero",
- "ArrayEmpty",
"ArrayPopBack",
"StringToArray",
"ArrayIntersect",
@@ -22680,7 +22676,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayAppend" => Ok(ScalarFunction::ArrayAppend),
"ArrayConcat" => Ok(ScalarFunction::ArrayConcat),
"ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat),
- "ArrayLength" => Ok(ScalarFunction::ArrayLength),
"ArrayPosition" => Ok(ScalarFunction::ArrayPosition),
"ArrayPositions" => Ok(ScalarFunction::ArrayPositions),
"ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend),
@@ -22696,7 +22691,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Nanvl" => Ok(ScalarFunction::Nanvl),
"Flatten" => Ok(ScalarFunction::Flatten),
"Iszero" => Ok(ScalarFunction::Iszero),
- "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty),
"ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack),
"StringToArray" => Ok(ScalarFunction::StringToArray),
"ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 62b3d39580..30b76c16bc 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2727,7 +2727,7 @@ pub enum ScalarFunction {
ArrayConcat = 87,
/// 88 was ArrayDims
ArrayRepeat = 89,
- ArrayLength = 90,
+ /// 90 was ArrayLength
/// 91 was ArrayNdims
ArrayPosition = 92,
ArrayPositions = 93,
@@ -2750,7 +2750,7 @@ pub enum ScalarFunction {
Flatten = 112,
/// 113 was IsNan
Iszero = 114,
- ArrayEmpty = 115,
+ /// 115 was ArrayEmpty
ArrayPopBack = 116,
StringToArray = 117,
/// 118 was ToTimestampNanos
@@ -2863,7 +2863,6 @@ impl ScalarFunction {
ScalarFunction::ArrayAppend => "ArrayAppend",
ScalarFunction::ArrayConcat => "ArrayConcat",
ScalarFunction::ArrayRepeat => "ArrayRepeat",
- ScalarFunction::ArrayLength => "ArrayLength",
ScalarFunction::ArrayPosition => "ArrayPosition",
ScalarFunction::ArrayPositions => "ArrayPositions",
ScalarFunction::ArrayPrepend => "ArrayPrepend",
@@ -2879,7 +2878,6 @@ impl ScalarFunction {
ScalarFunction::Nanvl => "Nanvl",
ScalarFunction::Flatten => "Flatten",
ScalarFunction::Iszero => "Iszero",
- ScalarFunction::ArrayEmpty => "ArrayEmpty",
ScalarFunction::ArrayPopBack => "ArrayPopBack",
ScalarFunction::StringToArray => "StringToArray",
ScalarFunction::ArrayIntersect => "ArrayIntersect",
@@ -2982,7 +2980,6 @@ impl ScalarFunction {
"ArrayAppend" => Some(Self::ArrayAppend),
"ArrayConcat" => Some(Self::ArrayConcat),
"ArrayRepeat" => Some(Self::ArrayRepeat),
- "ArrayLength" => Some(Self::ArrayLength),
"ArrayPosition" => Some(Self::ArrayPosition),
"ArrayPositions" => Some(Self::ArrayPositions),
"ArrayPrepend" => Some(Self::ArrayPrepend),
@@ -2998,7 +2995,6 @@ impl ScalarFunction {
"Nanvl" => Some(Self::Nanvl),
"Flatten" => Some(Self::Flatten),
"Iszero" => Some(Self::Iszero),
- "ArrayEmpty" => Some(Self::ArrayEmpty),
"ArrayPopBack" => Some(Self::ArrayPopBack),
"StringToArray" => Some(Self::StringToArray),
"ArrayIntersect" => Some(Self::ArrayIntersect),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index c26b8acbf1..ece3caa094 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -47,14 +47,14 @@ use datafusion_common::{
use datafusion_expr::expr::Unnest;
use datafusion_expr::window_frame::{check_window_frame,
regularize_window_order_by};
use datafusion_expr::{
- acosh, array, array_append, array_concat, array_distinct, array_element,
array_empty,
- array_except, array_intersect, array_length, array_pop_back,
array_pop_front,
- array_position, array_positions, array_prepend, array_remove,
array_remove_all,
- array_remove_n, array_repeat, array_replace, array_replace_all,
array_replace_n,
- array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii,
asinh, atan,
- atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr,
coalesce,
- concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time,
date_bin,
- date_part, date_trunc, degrees, digest, ends_with, exp,
+ acosh, array, array_append, array_concat, array_distinct, array_element,
+ array_except, array_intersect, array_pop_back, array_pop_front,
array_position,
+ array_positions, array_prepend, array_remove, array_remove_all,
array_remove_n,
+ array_repeat, array_replace, array_replace_all, array_replace_n,
array_resize,
+ array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan,
atan2, atanh,
+ bit_length, btrim, cbrt, ceil, character_length, chr, coalesce,
concat_expr,
+ concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin,
date_part,
+ date_trunc, degrees, digest, ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap,
iszero, lcm,
left, levenshtein, ln, log, log10, log2,
@@ -480,12 +480,10 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayAppend => Self::ArrayAppend,
ScalarFunction::ArraySort => Self::ArraySort,
ScalarFunction::ArrayConcat => Self::ArrayConcat,
- ScalarFunction::ArrayEmpty => Self::ArrayEmpty,
ScalarFunction::ArrayExcept => Self::ArrayExcept,
ScalarFunction::ArrayDistinct => Self::ArrayDistinct,
ScalarFunction::ArrayElement => Self::ArrayElement,
ScalarFunction::Flatten => Self::Flatten,
- ScalarFunction::ArrayLength => Self::ArrayLength,
ScalarFunction::ArrayPopFront => Self::ArrayPopFront,
ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
ScalarFunction::ArrayPosition => Self::ArrayPosition,
@@ -1505,10 +1503,6 @@ pub fn parse_expr(
parse_expr(&args[2], registry, codec)?,
parse_expr(&args[3], registry, codec)?,
)),
- ScalarFunction::ArrayLength => Ok(array_length(
- parse_expr(&args[0], registry, codec)?,
- parse_expr(&args[1], registry, codec)?,
- )),
ScalarFunction::ArrayDistinct => {
Ok(array_distinct(parse_expr(&args[0], registry, codec)?))
}
@@ -1516,9 +1510,6 @@ pub fn parse_expr(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
)),
- ScalarFunction::ArrayEmpty => {
- Ok(array_empty(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::ArrayUnion => Ok(array_union(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 55c8542d97..43c8d7e4b2 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1459,12 +1459,10 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend,
BuiltinScalarFunction::ArraySort => Self::ArraySort,
BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat,
- BuiltinScalarFunction::ArrayEmpty => Self::ArrayEmpty,
BuiltinScalarFunction::ArrayExcept => Self::ArrayExcept,
BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct,
BuiltinScalarFunction::ArrayElement => Self::ArrayElement,
BuiltinScalarFunction::Flatten => Self::Flatten,
- BuiltinScalarFunction::ArrayLength => Self::ArrayLength,
BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront,
BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index fad50d3ecd..ef9f2b27aa 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -588,6 +588,17 @@ async fn roundtrip_expr_api() -> Result<()> {
cardinality(array(vec![lit(1), lit(2), lit(3)])),
range(lit(1), lit(10), lit(2)),
gen_series(lit(1), lit(10), lit(2)),
+ array_has(array(vec![lit(1), lit(2), lit(3)]), lit(1)),
+ array_has_all(
+ array(vec![lit(1), lit(2), lit(3)]),
+ array(vec![lit(1), lit(2)]),
+ ),
+ array_has_any(
+ array(vec![lit(1), lit(2), lit(3)]),
+ array(vec![lit(1), lit(4)]),
+ ),
+ array_empty(array(vec![lit(1), lit(2), lit(3)])),
+ array_length(array(vec![lit(1), lit(2), lit(3)])),
];
// ensure expressions created with the expr api can be round tripped