This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6281637082 feat: `array-empty` (#7313)
6281637082 is described below
commit 62816370822cb5e16a46d5050a41f62016af917f
Author: Alex Huang <[email protected]>
AuthorDate: Wed Aug 23 00:45:47 2023 +0800
feat: `array-empty` (#7313)
* feat: array-empty
* add definition in pbjson
* add definition
* remove useless tests
* add the function in proto
* add doc
* refactor
* format
* remove useless code
* fix format
* support NULL
* remove redundant code
* support NULL
* fix clippy
---
datafusion/expr/src/built_in_function.rs | 8 ++++-
datafusion/expr/src/expr_fn.rs | 6 ++++
datafusion/physical-expr/src/array_expressions.rs | 15 ++++++++++
datafusion/physical-expr/src/functions.rs | 3 ++
datafusion/proto/proto/datafusion.proto | 1 +
datafusion/proto/src/generated/pbjson.rs | 3 ++
datafusion/proto/src/generated/prost.rs | 3 ++
datafusion/proto/src/logical_plan/from_proto.rs | 9 +++++-
datafusion/proto/src/logical_plan/to_proto.rs | 1 +
datafusion/sqllogictest/test_files/array.slt | 36 +++++++++++++++++++++++
docs/source/user-guide/sql/scalar_functions.md | 27 +++++++++++++++++
11 files changed, 110 insertions(+), 2 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index e8b4654b97..9a4eb74c53 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -138,6 +138,8 @@ pub enum BuiltinScalarFunction {
ArrayDims,
/// array_element
ArrayElement,
+ /// array_empty
+ ArrayEmpty,
/// array_length
ArrayLength,
/// array_ndims
@@ -360,6 +362,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Trunc => Volatility::Immutable,
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
+ BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
@@ -536,7 +539,8 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::ArrayHasAll
| BuiltinScalarFunction::ArrayHasAny
- | BuiltinScalarFunction::ArrayHas => Ok(Boolean),
+ | BuiltinScalarFunction::ArrayHas
+ | BuiltinScalarFunction::ArrayEmpty => Ok(Boolean),
BuiltinScalarFunction::ArrayDims => {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
@@ -829,6 +833,7 @@ impl BuiltinScalarFunction {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayDims => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::ArrayEmpty => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::ArrayElement => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::Flatten => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::ArrayHasAll
@@ -1319,6 +1324,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static
[&'static str] {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
+ BuiltinScalarFunction::ArrayEmpty => &["empty"],
BuiltinScalarFunction::ArrayElement => &[
"array_element",
"array_extract",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 47692dfefb..e3fd5ceb20 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -552,6 +552,12 @@ scalar_expr!(
first_array second_array,
"returns true, if the element appears in the first array, otherwise false."
);
+scalar_expr!(
+ ArrayEmpty,
+ array_empty,
+ array,
+ "returns 1 for an empty array or 0 for a non-empty array."
+);
scalar_expr!(
ArrayHasAll,
array_has_all,
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 4ad55f76f8..97d7ee4610 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -982,6 +982,21 @@ macro_rules! general_repeat_list {
}};
}
+/// Array_empty SQL function
+pub fn array_empty(args: &[ArrayRef]) -> Result<ArrayRef> {
+ println!("args[0]: {:?}", &args[0]);
+ if args[0].as_any().downcast_ref::<NullArray>().is_some() {
+ return Ok(args[0].clone());
+ }
+
+ let array = as_list_array(&args[0])?;
+ let builder = array
+ .iter()
+ .map(|arr| arr.map(|arr| arr.len() == arr.null_count()))
+ .collect::<BooleanArray>();
+ Ok(Arc::new(builder))
+}
+
/// Array_repeat SQL function
pub fn array_repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
let element = &args[0];
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 82226ecfa1..2d6dbfdf52 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -425,6 +425,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayConcat => {
Arc::new(|args|
make_scalar_function(array_expressions::array_concat)(args))
}
+ BuiltinScalarFunction::ArrayEmpty => {
+ Arc::new(|args|
make_scalar_function(array_expressions::array_empty)(args))
+ }
BuiltinScalarFunction::ArrayHasAll => {
Arc::new(|args|
make_scalar_function(array_expressions::array_has_all)(args))
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index e4ef7b1bd4..f31a593ad5 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -597,6 +597,7 @@ enum ScalarFunction {
Flatten = 112;
Isnan = 113;
Iszero = 114;
+ ArrayEmpty = 115;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index f1a9e9c7bb..7d1a18349c 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -18947,6 +18947,7 @@ impl serde::Serialize for ScalarFunction {
Self::Flatten => "Flatten",
Self::Isnan => "Isnan",
Self::Iszero => "Iszero",
+ Self::ArrayEmpty => "ArrayEmpty",
};
serializer.serialize_str(variant)
}
@@ -19073,6 +19074,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Flatten",
"Isnan",
"Iszero",
+ "ArrayEmpty",
];
struct GeneratedVisitor;
@@ -19230,6 +19232,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Flatten" => Ok(ScalarFunction::Flatten),
"Isnan" => Ok(ScalarFunction::Isnan),
"Iszero" => Ok(ScalarFunction::Iszero),
+ "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 6cf402fe66..fc55b7e23a 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2377,6 +2377,7 @@ pub enum ScalarFunction {
Flatten = 112,
Isnan = 113,
Iszero = 114,
+ ArrayEmpty = 115,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2500,6 +2501,7 @@ impl ScalarFunction {
ScalarFunction::Flatten => "Flatten",
ScalarFunction::Isnan => "Isnan",
ScalarFunction::Iszero => "Iszero",
+ ScalarFunction::ArrayEmpty => "ArrayEmpty",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -2620,6 +2622,7 @@ impl ScalarFunction {
"Flatten" => Some(Self::Flatten),
"Isnan" => Some(Self::Isnan),
"Iszero" => Some(Self::Iszero),
+ "ArrayEmpty" => Some(Self::ArrayEmpty),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index d3329c6967..c5ab0c25f6 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -34,7 +34,6 @@ use datafusion_common::{
internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError,
OwnedTableReference, Result, ScalarValue,
};
-use datafusion_expr::expr::{Alias, Placeholder};
use datafusion_expr::{
abs, acos, acosh, array, array_append, array_concat, array_dims,
array_element,
array_has, array_has_all, array_has_any, array_length, array_ndims,
array_position,
@@ -59,6 +58,10 @@ use datafusion_expr::{
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame,
WindowFrameBound,
WindowFrameUnits,
};
+use datafusion_expr::{
+ array_empty,
+ expr::{Alias, Placeholder},
+};
use std::sync::Arc;
#[derive(Debug)]
@@ -452,6 +455,7 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ToTimestamp => Self::ToTimestamp,
ScalarFunction::ArrayAppend => Self::ArrayAppend,
ScalarFunction::ArrayConcat => Self::ArrayConcat,
+ ScalarFunction::ArrayEmpty => Self::ArrayEmpty,
ScalarFunction::ArrayHasAll => Self::ArrayHasAll,
ScalarFunction::ArrayHasAny => Self::ArrayHasAny,
ScalarFunction::ArrayHas => Self::ArrayHas,
@@ -1355,6 +1359,9 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
+ ScalarFunction::ArrayEmpty => {
+ Ok(array_empty(parse_expr(&args[0], registry)?))
+ }
ScalarFunction::ArrayNdims => {
Ok(array_ndims(parse_expr(&args[0], registry)?))
}
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index cb32964381..82df53af92 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1451,6 +1451,7 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ToTimestamp => Self::ToTimestamp,
BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend,
BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat,
+ BuiltinScalarFunction::ArrayEmpty => Self::ArrayEmpty,
BuiltinScalarFunction::ArrayHasAll => Self::ArrayHasAll,
BuiltinScalarFunction::ArrayHasAny => Self::ArrayHasAny,
BuiltinScalarFunction::ArrayHas => Self::ArrayHas,
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index eb949c4f86..bd16072b29 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -2363,6 +2363,42 @@ from flatten_table;
[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
[1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+# empty scalar function #1
+query B
+select empty(make_array(1));
+----
+false
+
+# empty scalar function #2
+query B
+select empty(make_array());
+----
+true
+
+# empty scalar function #3
+query B
+select empty(make_array(NULL));
+----
+true
+
+# empty scalar function #4
+query B
+select empty(NULL);
+----
+NULL
+
+# empty scalar function #5
+query B
+select empty(column1) from arrays;
+----
+false
+false
+false
+false
+NULL
+false
+false
+
### Delete tables
statement ok
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 6dbe5c05f6..c6e2f5ddd8 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1495,6 +1495,7 @@ from_unixtime(expression)
- [array_slice](#array_slice)
- [array_to_string](#array_to_string)
- [cardinality](#cardinality)
+- [empty](#empty)
- [list_append](#list_append)
- [list_cat](#list_cat)
- [list_concat](#list_concat)
@@ -1693,6 +1694,8 @@ array_element(array, index)
- list_element
- list_extract
+### `array_empty`
+
### `array_extract`
_Alias of [array_element](#array_element)._
@@ -2188,6 +2191,30 @@ cardinality(array)
+--------------------------------------+
```
+### `empty`
+
+Returns 1 for an empty array or 0 for a non-empty array.
+
+```
+empty(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+ Can be a constant, column, or function, and any combination of array
operators.
+
+#### Example
+
+```
+❯ select empty([1]);
++------------------+
+| empty(List([1])) |
++------------------+
+| 0 |
++------------------+
+```
+
### `list_append`
_Alias of [array_append](#array_append)._