This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 9a27d8490a feat: column support for `array_append`, `array_prepend`,
`array_position` and `array_positions` (#6805)
9a27d8490a is described below
commit 9a27d8490a04afec20d607d4880a1a465ba6b783
Author: Igor Izvekov <[email protected]>
AuthorDate: Wed Jul 5 22:34:18 2023 +0300
feat: column support for `array_append`, `array_prepend`, `array_position`
and `array_positions` (#6805)
* test: sqllogictests with columns for array_append, array_prepend,
array_position and array_positions
* feat: column support for array_append and array_prepend
* feat: column support for array_position and array_positions
* fix: error type
---
.../core/tests/sqllogictests/test_files/array.slt | 491 ++++++++++++++-------
datafusion/expr/src/built_in_function.rs | 4 +-
datafusion/physical-expr/src/array_expressions.rs | 338 ++++++++------
datafusion/physical-expr/src/functions.rs | 8 +-
4 files changed, 541 insertions(+), 300 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 1f43c5f8e1..7eebb23d9c 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -19,108 +19,294 @@
## Array expressions Tests
#############
+
+### Tables
+
+
+statement ok
+CREATE TABLE values(
+ a INT,
+ b INT,
+ c INT,
+ d FLOAT,
+ e VARCHAR
+) AS VALUES
+ (1, 1, 2, 1.1, 'Lorem'),
+ (2, 3, 4, 2.2, 'ipsum'),
+ (3, 5, 6, 3.3, 'dolor'),
+ (4, 7, 8, 4.4, 'sit'),
+ (NULL, 9, 10, 5.5, 'amet'),
+ (5, NULL, 12, 6.6, ','),
+ (6, 11, NULL, 7.7, 'consectetur'),
+ (7, 13, 14, NULL, 'adipiscing'),
+ (8, 15, 16, 8.8, NULL)
+;
+
+statement ok
+CREATE TABLE arrays
+AS VALUES
+ (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2,
3.3), make_array('L', 'o', 'r', 'e', 'm')),
+ (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6),
make_array('i', 'p', NULL, 'u', 'm')),
+ (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9),
make_array('d', NULL, 'l', 'o', 'r')),
+ (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL,
12.2), make_array('s', 'i', 't')),
+ (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
+ (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
+ (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7,
18.8), NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values
+AS VALUES
+ (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+ (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'),
+ (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+ (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'),
+ (NULL, 44, 5, '@'),
+ (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'),
+ (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'),
+ (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values_without_nulls
+AS VALUES
+ (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+ (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
+ (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+ (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+;
+
+# arrays table
+query ???
+select column1, column2, column3 from arrays;
+----
+[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m]
+[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m]
+[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r]
+[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t]
+NULL [13.3, 14.4, 15.5] [a, m, e, t]
+[[11, 12], [13, 14]] NULL [,]
+[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL
+
+# values table
+query IIIRT
+select a, b, c, d, e from values;
+----
+1 1 2 1.1 Lorem
+2 3 4 2.2 ipsum
+3 5 6 3.3 dolor
+4 7 8 4.4 sit
+NULL 9 10 5.5 amet
+5 NULL 12 6.6 ,
+6 11 NULL 7.7 consectetur
+7 13 14 NULL adipiscing
+8 15 16 8.8 NULL
+
+# arrays_values table
+query ?IIT
+select column1, column2, column3, column4 from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 .
+[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok
+NULL 44 5 @
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $
+[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL
+
+# arrays_values_without_nulls table
+query ?II
+select column1, column2, column3 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+
+### Array function tests
+
+
## make_array
-# array scalar function #1
-query ??? rowsort
+# make_array scalar function #1
+query ???
select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e',
'l', 'l', 'o');
----
[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o]
-# array scalar function #2
-query ??? rowsort
+# make_array scalar function #2
+query ???
select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)),
make_array([[[[1], [2]]]]);
----
[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]]
-# array scalar function #3
-query ?? rowsort
+# make_array scalar function #3
+query ??
select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3,
4]], [[5, 6], [7, 8]]);
----
[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-# array scalar function #4
-query ?? rowsort
+# make_array scalar function #4
+query ??
select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o');
----
[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o]
-# array scalar function #5
-query ? rowsort
+# make_array scalar function #5
+query ?
select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5,
6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12))))
----
[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
-# array scalar function #6
-query ? rowsort
+# make_array scalar function #6
+query ?
select make_array()
----
[]
-# array scalar function #7
-query ?? rowsort
+# make_array scalar function #7
+query ??
select make_array(make_array()), make_array(make_array(make_array()))
----
[[]] [[[]]]
-# array scalar function with nulls
-query ??? rowsort
+# make_array scalar function with nulls
+query ???
select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h',
NULL, 'l', NULL, 'o');
----
[1, , 3] [, 2.0, ] [h, , l, , o]
-# array scalar function with nulls #2
-query ?? rowsort
+# make_array scalar function with nulls #2
+query ??
select make_array(1, 2, NULL), make_array(make_array(NULL, 2),
make_array(NULL, 3));
----
[1, 2, ] [[, 2], [, 3]]
-# array scalar function with nulls #3
-query ??? rowsort
+# make_array scalar function with nulls #3
+query ???
select make_array(NULL), make_array(NULL, NULL, NULL),
make_array(make_array(NULL, NULL), make_array(NULL, NULL));
----
[] [] [[], []]
+# make_array with columns #1
+query ????
+select make_array(a), make_array(b, c), make_array(d), make_array(e) from
values;
+----
+[1] [1, 2] [1.1] [Lorem]
+[2] [3, 4] [2.2] [ipsum]
+[3] [5, 6] [3.3] [dolor]
+[4] [7, 8] [4.4] [sit]
+[0] [9, 10] [5.5] [amet]
+[5] [0, 12] [6.6] [,]
+[6] [11, 0] [7.7] [consectetur]
+[7] [13, 14] [0.0] [adipiscing]
+[8] [15, 16] [8.8] []
+
+# make_array with columns #2
+query ?
+select make_array(a, b, c, d) from values;
+----
+[1.0, 1.0, 2.0, 1.1]
+[2.0, 3.0, 4.0, 2.2]
+[3.0, 5.0, 6.0, 3.3]
+[4.0, 7.0, 8.0, 4.4]
+[0.0, 9.0, 10.0, 5.5]
+[5.0, 0.0, 12.0, 6.6]
+[6.0, 11.0, 0.0, 7.7]
+[7.0, 13.0, 14.0, 0.0]
+[8.0, 15.0, 16.0, 8.8]
+
## array_append
# array_append scalar function #2
-query ? rowsort
+query ?
select array_append(make_array(), 4);
----
[4]
# array_append scalar function #2
-query ?? rowsort
+query ??
select array_append(make_array(), make_array()), array_append(make_array(),
make_array(4));
----
[[]] [[4]]
# array_append scalar function #3
-query ??? rowsort
+query ???
select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0,
3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+# array_append with columns
+query ?
+select array_append(column1, column2) from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
+[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12]
+[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23]
+[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34]
+[44]
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ]
+[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+
+# array_append with columns and scalars
+query ??
+select array_append(column2, 100.1), array_append(column3, '.') from arrays;
+----
+[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .]
+[, 5.5, 6.6, 100.1] [i, p, , u, m, .]
+[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .]
+[10.1, , 12.2, 100.1] [s, i, t, .]
+[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
+[100.1] [,, .]
+[16.6, 17.7, 18.8, 100.1] [.]
+
## array_prepend
# array_prepend scalar function #1
-query ? rowsort
+query ?
select array_prepend(4, make_array());
----
[4]
# array_prepend scalar function #2
-query ?? rowsort
+query ??
select array_prepend(make_array(), make_array()), array_prepend(make_array(4),
make_array());
----
[[]] [[4]]
# array_prepend scalar function #3
-query ??? rowsort
+query ???
select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0,
make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+# array_prepend with columns
+query ?
+select array_prepend(column2, column1) from arrays_values;
+----
+[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40]
+[44]
+[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
+# array_prepend with columns and scalars
+query ??
+select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
+----
+[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m]
+[100.1, , 5.5, 6.6] [., i, p, , u, m]
+[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r]
+[100.1, 10.1, , 12.2] [., s, i, t]
+[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
+[100.1] [., ,]
+[100.1, 16.6, 17.7, 18.8] [.]
+
## array_fill
# array_fill scalar function #1
@@ -144,37 +330,37 @@ select array_fill(1, make_array())
## array_concat
# array_concat scalar function #1
-query ?? rowsort
+query ??
select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8,
9)), array_concat(make_array([1], [2]), make_array([3], [4]));
----
[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
# array_concat scalar function #2
-query ? rowsort
+query ?
select array_concat(make_array(make_array(1, 2), make_array(3, 4)),
make_array(make_array(5, 6), make_array(7, 8)));
----
[[1, 2], [3, 4], [5, 6], [7, 8]]
# array_concat scalar function #3
-query ? rowsort
+query ?
select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]),
make_array([7], [8], [9]));
----
[[1], [2], [3], [4], [5], [6], [7], [8], [9]]
# array_concat scalar function #4
-query ? rowsort
+query ?
select array_concat(make_array([[1]]), make_array([[2]]));
----
[[[1]], [[2]]]
# array_concat scalar function #5
-query ? rowsort
+query ?
select array_concat(make_array(2, 3), make_array());
----
[2, 3]
# array_concat scalar function #6
-query ? rowsort
+query ?
select array_concat(make_array(), make_array(2, 3));
----
[2, 3]
@@ -193,12 +379,50 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4),
array_position([1, 2,
----
4 5 2
+# array_position with columns
+query II
+select array_position(column1, column2), array_position(column1, column2,
column3) from arrays_values_without_nulls;
+----
+1 1
+2 2
+3 3
+4 4
+
+# array_position with columns and scalars
+query II
+select array_position(column1, 3), array_position(column1, 3, 5) from
arrays_values_without_nulls;
+----
+3 NULL
+NULL NULL
+NULL NULL
+NULL NULL
+
+## array_positions
+
# array_positions scalar function
-query ??? rowsort
+query ???
select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2,
3, 4, 5], 5), array_positions([1, 1, 1], 1);
----
[3, 4] [5] [1, 2, 3]
+# array_positions with columns
+query ?
+select array_positions(column1, column2) from arrays_values_without_nulls;
+----
+[1]
+[2]
+[3]
+[4]
+
+# array_positions with columns and scalars
+query ??
+select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33,
45], column2) from arrays_values_without_nulls;
+----
+[4] [1]
+[] []
+[] [3]
+[] []
+
## array_replace
# array_replace scalar function
@@ -210,7 +434,7 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3),
array_replace(make_array(1,
## array_to_string
# array_to_string scalar function
-query TTT rowsort
+query TTT
select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2,
3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|');
----
h,e,l,l,o 1-2-3-4-5 1|2|3
@@ -228,13 +452,13 @@ Error during planning: Cannot automatically convert Utf8
to List\(Field \{ name:
select array_to_string(make_array(), ',')
# array_to_string scalar function with nulls #1
-query TTT rowsort
+query TTT
select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','),
array_to_string(make_array(1, NULL, 3, NULL, 5), '-'),
array_to_string(make_array(NULL, 2.0, 3.0), '|');
----
h,l,o 1-3-5 2|3
# array_to_string scalar function with nulls #2
-query TTT rowsort
+query TTT
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'),
array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'),
array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
----
h,-,-,-,o nil-2-nil-4-5 1|0|3
@@ -288,19 +512,19 @@ select trim_array(make_array(), 0),
trim_array(make_array(), 1)
## array_length
# array_length scalar function
-query III rowsort
+query III
select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2,
3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
----
5 3 3
# array_length scalar function #2
-query III rowsort
+query III
select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1,
2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1);
----
5 3 3
# array_length scalar function #3
-query III rowsort
+query III
select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1,
2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2);
----
NULL NULL 2
@@ -312,7 +536,7 @@ Error during planning: Cannot automatically convert
List\(Field \{ name: "item",
select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3,
[3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3),
array_length(array_fill(3, [3, 2, 5]), 4);
# array_length scalar function #5
-query III rowsort
+query III
select array_length(make_array()), array_length(make_array(), 1),
array_length(make_array(), 2)
----
0 0 NULL
@@ -337,14 +561,14 @@ caused by
Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to UInt8
select array_dims(make_array()), array_dims(make_array(make_array()))
+## array_ndims
+
# array_ndims scalar function
-query III rowsort
+query III
select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3,
4])), array_ndims(make_array([[[[1], [2]]]]));
----
1 2 5
-## array_ndims
-
# array_ndims scalar function #2
query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
caused by
@@ -352,183 +576,106 @@ Error during planning: Cannot automatically convert
List\(Field \{ name: "item",
select array_ndims(array_fill(1, [1, 2, 3])),
array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
# array_ndims scalar function #3
-query II rowsort
+query II
select array_ndims(make_array()), array_ndims(make_array(make_array()))
----
1 2
-## array concatenate opeartor
-
-# array concatenate operator #1 (like array_concat scalar function)
-query ?? rowsort
-select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9),
make_array([1], [2]) || make_array([3], [4]);
-----
-[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
-
-# array concatenate operator #2 (like array_append scalar function)
-query ??? rowsort
-select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0,
make_array('h', 'e', 'l', 'l') || 'o';
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# array concatenate operator #3 (like array_prepend scalar function)
-query ??? rowsort
-select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' ||
make_array('e', 'l', 'l', 'o');
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# make_array
-
-query ?
-select make_array(1, 2.0)
-----
-[1.0, 2.0]
-
-query ?
-select make_array(null, 1.0)
-----
-[, 1.0]
-
-query ?
-select make_array(1, 2.0, null, 3)
-----
-[1.0, 2.0, , 3.0]
-
-query ?
-select make_array(1.0, '2', null)
-----
-[1.0, 2, ]
-
-statement ok
-create table foo1 (x int, y double) as values (1, 2.0);
-
-query ?
-select make_array(x, y) from foo1;
-----
-[1.0, 2.0]
-
-statement ok
-create table foo2 (x float, y varchar) as values (1.0, '1');
-
-query ?
-select make_array(x, y) from foo2;
-----
-[1.0, 1]
-
-# array_contains
+## array_contains
# array_contains scalar function #1
-query BBB rowsort
+query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)),
array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
----
true true true
# array_contains scalar function #2
-query BB rowsort
+query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]),
array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
----
true true
# array_contains scalar function #3
-query BBB rowsort
+query BBB
select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)),
array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
----
false false false
# array_contains scalar function #4
-query BB rowsort
+query BB
select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]),
array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
----
false false
# array_contains scalar function #5
-query BB rowsort
+query BB
select array_contains([true, true, false, true, false], [true, false, false]),
array_contains([true, false, true], [true, true]);
----
true true
# array_contains scalar function #6
-query BB rowsort
+query BB
select array_contains(make_array(true, true, true), make_array(false, false)),
array_contains([false, false, false], [true, true]);
----
false false
-## array_contains array
+### Array operators tests
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(1,2,3), make_array(1,2,3)),
-(make_array(1,2,3), make_array(2,3)),
-(make_array(2,3), make_array(1,2,3)),
-(null, make_array(1,2,3)),
-(make_array(2,3), null)
-;
+## array concatenate operator
+# array concatenate operator with scalars #1 (like array_concat scalar
function)
query ??
-SELECT
- column1,
- column2
-FROM t
+select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9),
make_array([1], [2]) || make_array([3], [4]);
----
-[1, 2, 3] [1, 2, 3]
-[1, 2, 3] [2, 3]
-[2, 3] [1, 2, 3]
-NULL [1, 2, 3]
-[2, 3] NULL
-
+[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
-# incorrect answer (one row) to
https://github.com/apache/arrow-datafusion/issues/6709
+# array concatenate operator with scalars #2 (like array_append scalar
function)
+query ???
+select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0,
make_array('h', 'e', 'l', 'l') || 'o';
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-query BB
-SELECT
- array_contains(column1, column2) as c12,
- array_contains(column1, column2) as c21
-FROM t
+# array concatenate operator with scalars #3 (like array_prepend scalar
function)
+query ???
+select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' ||
make_array('e', 'l', 'l', 'o');
----
-true true
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-statement ok
-drop table t
+### Array casting tests
-## array_contains array (nested)
+## make_array
+# make_array scalar function #1
+query ?
+select make_array(1, 2.0)
+----
+[1.0, 2.0]
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(make_array(2),make_array(3,4)), make_array(3,4)),
-(make_array(make_array(2),make_array(3,4)), make_array(4,3)),
-(make_array(make_array(2),make_array(3,4)), make_array(3)),
-(make_array(make_array(2),make_array(3,4)), make_array(2))
-;
+# make_array scalar function #2
+query ?
+select make_array(null, 1.0)
+----
+[, 1.0]
+# make_array scalar function #3
+query ?
+select make_array(1, 2.0, null, 3)
+----
+[1.0, 2.0, , 3.0]
-query ??
-SELECT
- column1,
- column2
-FROM t
+# make_array scalar function #4
+query ?
+select make_array(1.0, '2', null)
----
-[[2], [3, 4]] [3, 4]
-[[2], [3, 4]] [4, 3]
-[[2], [3, 4]] [3]
-[[2], [3, 4]] [2]
+[1.0, 2, ]
-# incorrect answer (one row) to
https://github.com/apache/arrow-datafusion/issues/6709
+### FixedSizeListArray
-query BB
-SELECT
- array_contains(column1, column2) as c12,
- array_contains(column1, column2) as c21
-FROM t
-----
-true true
statement ok
CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION
'tests/data/fixed_size_list_array.parquet';
@@ -566,3 +713,19 @@ query ?
select make_array(f0) from fixed_size_list_array
----
[[1, 2], [3, 4]]
+
+
+### Delete tables
+
+
+statement ok
+drop table values;
+
+statement ok
+drop table arrays;
+
+statement ok
+drop table arrays_values;
+
+statement ok
+drop table arrays_values_without_nulls;
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 6905462275..103f1047fa 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -488,9 +488,9 @@ impl BuiltinScalarFunction {
)))),
BuiltinScalarFunction::ArrayLength => Ok(UInt8),
BuiltinScalarFunction::ArrayNdims => Ok(UInt8),
- BuiltinScalarFunction::ArrayPosition => Ok(UInt8),
+ BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
BuiltinScalarFunction::ArrayPositions => {
- Ok(List(Arc::new(Field::new("item", UInt8, true))))
+ Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
BuiltinScalarFunction::ArrayPrepend => Ok(List(Arc::new(Field::new(
"item",
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index bddeef526a..cd174918db 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -18,7 +18,7 @@
//! Array expressions
use arrow::array::*;
-use arrow::buffer::Buffer;
+use arrow::buffer::{Buffer, OffsetBuffer};
use arrow::compute;
use arrow::datatypes::{DataType, Field};
use core::any::type_name;
@@ -197,15 +197,53 @@ pub fn make_array(values: &[ColumnarValue]) ->
Result<ColumnarValue> {
macro_rules! append {
($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
- let child_array =
- downcast_arg!(downcast_arg!($ARRAY, ListArray).values(),
$ARRAY_TYPE);
+ let mut offsets: Vec<i32> = vec![0];
+ let mut values =
+ downcast_arg!(new_empty_array($ELEMENT.data_type()),
$ARRAY_TYPE).clone();
+
let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
- let cat = compute::concat(&[child_array, element])?;
- let mut scalars = vec![];
- for i in 0..cat.len() {
-
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+ for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+ let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+ DataFusionError::Internal(format!("offsets should not be
empty",))
+ })?;
+ match arr {
+ Some(arr) => {
+ let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+ values = downcast_arg!(
+ compute::concat(&[
+ &values,
+ child_array,
+ &$ARRAY_TYPE::from(vec![el])
+ ])?
+ .clone(),
+ $ARRAY_TYPE
+ )
+ .clone();
+ offsets.push(last_offset + child_array.len() as i32 +
1i32);
+ }
+ None => {
+ values = downcast_arg!(
+ compute::concat(&[
+ &values,
+ &$ARRAY_TYPE::from(vec![el.clone()])
+ ])?
+ .clone(),
+ $ARRAY_TYPE
+ )
+ .clone();
+ offsets.push(last_offset + 1i32);
+ }
+ }
}
- scalars
+
+ let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(),
true));
+
+ Arc::new(ListArray::try_new(
+ field,
+ OffsetBuffer::new(offsets.into()),
+ Arc::new(values),
+ None,
+ )?)
}};
}
@@ -221,7 +259,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
let arr = as_list_array(&args[0])?;
let element = &args[1];
- let scalars = match (arr.value_type(), element.data_type()) {
+ let res = match (arr.value_type(), element.data_type()) {
(DataType::Utf8, DataType::Utf8) => append!(arr, element,
StringArray),
(DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr,
element, LargeStringArray),
(DataType::Boolean, DataType::Boolean) => append!(arr,
element, BooleanArray),
@@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
};
- Ok(array(scalars.as_slice())?.into_array(1))
+ Ok(res)
}
macro_rules! prepend {
($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
- let child_array =
- downcast_arg!(downcast_arg!($ARRAY, ListArray).values(),
$ARRAY_TYPE);
+ let mut offsets: Vec<i32> = vec![0];
+ let mut values =
+ downcast_arg!(new_empty_array($ELEMENT.data_type()),
$ARRAY_TYPE).clone();
+
let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
- let cat = compute::concat(&[element, child_array])?;
- let mut scalars = vec![];
- for i in 0..cat.len() {
-
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+ for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+ let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+ DataFusionError::Internal(format!("offsets should not be
empty",))
+ })?;
+ match arr {
+ Some(arr) => {
+ let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+ values = downcast_arg!(
+ compute::concat(&[
+ &values,
+ &$ARRAY_TYPE::from(vec![el]),
+ child_array
+ ])?
+ .clone(),
+ $ARRAY_TYPE
+ )
+ .clone();
+ offsets.push(last_offset + child_array.len() as i32 +
1i32);
+ }
+ None => {
+ values = downcast_arg!(
+ compute::concat(&[
+ &values,
+ &$ARRAY_TYPE::from(vec![el.clone()])
+ ])?
+ .clone(),
+ $ARRAY_TYPE
+ )
+ .clone();
+ offsets.push(last_offset + 1i32);
+ }
+ }
}
- scalars
+
+ let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(),
true));
+
+ Arc::new(ListArray::try_new(
+ field,
+ OffsetBuffer::new(offsets.into()),
+ Arc::new(values),
+ None,
+ )?)
}};
}
@@ -272,7 +348,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef>
{
let element = &args[0];
let arr = as_list_array(&args[1])?;
- let scalars = match (arr.value_type(), element.data_type()) {
+ let res = match (arr.value_type(), element.data_type()) {
(DataType::Utf8, DataType::Utf8) => prepend!(arr, element,
StringArray),
(DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr,
element, LargeStringArray),
(DataType::Boolean, DataType::Boolean) => prepend!(arr,
element, BooleanArray),
@@ -294,7 +370,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef>
{
}
};
- Ok(array(scalars.as_slice())?.into_array(1))
+ Ok(res)
}
/// Array_concat/Array_cat SQL function
@@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
macro_rules! position {
($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{
- let child_array =
- downcast_arg!(downcast_arg!($ARRAY, ListArray).values(),
$ARRAY_TYPE);
- let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
- match child_array
+ let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+ $ARRAY
.iter()
- .skip($INDEX)
- .position(|x| x == Some(element))
- {
- Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some(
- (value + $INDEX + 1) as u8,
- )))),
- None => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
- }
+ .zip(element.iter())
+ .zip($INDEX.iter())
+ .map(|((arr, el), i)| {
+ let index = match i {
+ Some(i) => {
+ if i <= 0 {
+ 0
+ } else {
+ i - 1
+ }
+ }
+ None => {
+ return Err(DataFusionError::Execution(
+ "initial position must not be null".to_string(),
+ ))
+ }
+ };
+
+ match arr {
+ Some(arr) => {
+ let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+
+ match child_array
+ .iter()
+ .skip(index as usize)
+ .position(|x| x == el)
+ {
+ Some(value) => Ok(Some(value as u64 + index as u64
+ 1u64)),
+ None => Ok(None),
+ }
+ }
+ None => Ok(None),
+ }
+ })
+ .collect::<Result<UInt64Array>>()?
}};
}
/// Array_position SQL function
-pub fn array_position(args: &[ColumnarValue]) -> Result<ColumnarValue> {
- let arr = match &args[0] {
- ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
- ColumnarValue::Array(arr) => arr.clone(),
- };
-
- let element = match &args[1] {
- ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
- _ => {
- return Err(DataFusionError::Internal(
- "Array_position function requires scalar element".to_string(),
- ))
- }
- };
+pub fn array_position(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let arr = as_list_array(&args[0])?;
+ let element = &args[1];
- let mut index: usize = 0;
+ let mut index = Int64Array::from_value(0, arr.len());
if args.len() == 3 {
- let scalar =
- match &args[2] {
- ColumnarValue::Scalar(scalar) => scalar.clone(),
- _ => return Err(DataFusionError::Internal(
- "Array_position function requires positive integer scalar
element"
- .to_string(),
- )),
- };
-
- index =
- match scalar {
- ScalarValue::Int8(Some(value)) => value as usize,
- ScalarValue::Int16(Some(value)) => value as usize,
- ScalarValue::Int32(Some(value)) => value as usize,
- ScalarValue::Int64(Some(value)) => value as usize,
- ScalarValue::UInt8(Some(value)) => value as usize,
- ScalarValue::UInt16(Some(value)) => value as usize,
- ScalarValue::UInt32(Some(value)) => value as usize,
- ScalarValue::UInt64(Some(value)) => value as usize,
- _ => return Err(DataFusionError::Internal(
- "Array_position function requires positive integer scalar
element"
- .to_string(),
- )),
- };
-
- if index == 0 {
- index = 0;
- } else {
- index -= 1;
- }
+ index = as_int64_array(&args[2])?.clone();
}
- match arr.data_type() {
+ let res = match arr.data_type() {
DataType::List(field) => match field.data_type() {
DataType::Utf8 => position!(arr, element, index, StringArray),
DataType::LargeUtf8 => position!(arr, element, index,
LargeStringArray),
@@ -502,50 +562,75 @@ pub fn array_position(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
DataType::UInt16 => position!(arr, element, index, UInt16Array),
DataType::UInt32 => position!(arr, element, index, UInt32Array),
DataType::UInt64 => position!(arr, element, index, UInt64Array),
- data_type => Err(DataFusionError::NotImplemented(format!(
- "Array_position is not implemented for types '{data_type:?}'."
- ))),
+ data_type => {
+ return Err(DataFusionError::NotImplemented(format!(
+ "Array_position is not implemented for types
'{data_type:?}'."
+ )))
+ }
},
- data_type => Err(DataFusionError::NotImplemented(format!(
- "Array is not type '{data_type:?}'."
- ))),
- }
+ data_type => {
+ return Err(DataFusionError::NotImplemented(format!(
+ "Array is not type '{data_type:?}'."
+ )))
+ }
+ };
+
+ Ok(Arc::new(res))
}
macro_rules! positions {
($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
- let child_array =
- downcast_arg!(downcast_arg!($ARRAY, ListArray).values(),
$ARRAY_TYPE);
- let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
- let mut res = vec![];
- for (i, x) in child_array.iter().enumerate() {
- if x == Some(element) {
- res.push(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![
- Some((i + 1) as u8),
- ]))));
- }
+ let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+ let mut offsets: Vec<i32> = vec![0];
+ let mut values =
+ downcast_arg!(new_empty_array(&DataType::UInt64),
UInt64Array).clone();
+ for comp in $ARRAY
+ .iter()
+ .zip(element.iter())
+ .map(|(arr, el)| match arr {
+ Some(arr) => {
+ let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+ let res = child_array
+ .iter()
+ .enumerate()
+ .filter(|(_, x)| *x == el)
+ .flat_map(|(i, _)| Some((i + 1) as u64))
+ .collect::<UInt64Array>();
+
+ Ok(res)
+ }
+ None => Ok(downcast_arg!(
+ new_empty_array(&DataType::UInt64),
+ UInt64Array
+ )
+ .clone()),
+ })
+ .collect::<Result<Vec<UInt64Array>>>()?
+ {
+ let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+ DataFusionError::Internal(format!("offsets should not be
empty",))
+ })?;
+ values =
+ downcast_arg!(compute::concat(&[&values, &comp,])?.clone(),
UInt64Array)
+ .clone();
+ offsets.push(last_offset + comp.len() as i32);
}
- res
+ let field = Arc::new(Field::new("item", DataType::UInt64, true));
+
+ Arc::new(ListArray::try_new(
+ field,
+ OffsetBuffer::new(offsets.into()),
+ Arc::new(values),
+ None,
+ )?)
}};
}
/// Array_positions SQL function
-pub fn array_positions(args: &[ColumnarValue]) -> Result<ColumnarValue> {
- let arr = match &args[0] {
- ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
- ColumnarValue::Array(arr) => arr.clone(),
- };
-
- let element = match &args[1] {
- ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
- _ => {
- return Err(DataFusionError::Internal(
- "Array_positions function requires scalar element".to_string(),
- ))
- }
- };
+pub fn array_positions(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let arr = as_list_array(&args[0])?;
+ let element = &args[1];
let res = match arr.data_type() {
DataType::List(field) => match field.data_type() {
@@ -575,7 +660,7 @@ pub fn array_positions(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
}
};
- array(res.as_slice())
+ Ok(res)
}
macro_rules! remove {
@@ -1465,29 +1550,22 @@ mod tests {
#[test]
fn test_array_position() {
// array_position([1, 2, 3, 4], 3) = 3
- let list_array = return_array();
- let array = array_position(&[
- list_array,
- ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
- ])
- .expect("failed to initialize function array_position")
- .into_array(1);
- let result =
- as_uint8_array(&array).expect("failed to initialize function
array_position");
+ let list_array = return_array().into_array(1);
+ let array = array_position(&[list_array,
Arc::new(Int64Array::from_value(3, 1))])
+ .expect("failed to initialize function array_position");
+ let result = as_uint64_array(&array)
+ .expect("failed to initialize function array_position");
- assert_eq!(result, &UInt8Array::from(vec![3]));
+ assert_eq!(result, &UInt64Array::from(vec![3]));
}
#[test]
fn test_array_positions() {
// array_positions([1, 2, 3, 4], 3) = [3]
- let list_array = return_array();
- let array = array_positions(&[
- list_array,
- ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
- ])
- .expect("failed to initialize function array_position")
- .into_array(1);
+ let list_array = return_array().into_array(1);
+ let array =
+ array_positions(&[list_array, Arc::new(Int64Array::from_value(3,
1))])
+ .expect("failed to initialize function array_position");
let result =
as_list_array(&array).expect("failed to initialize function
array_position");
@@ -1497,7 +1575,7 @@ mod tests {
result
.value(0)
.as_any()
- .downcast_ref::<UInt8Array>()
+ .downcast_ref::<UInt64Array>()
.unwrap()
.values()
);
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 08916d89c9..215582a1a8 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -423,11 +423,11 @@ pub fn create_physical_fun(
Arc::new(|args|
make_scalar_function(array_expressions::array_ndims)(args))
}
BuiltinScalarFunction::ArrayPosition => {
- Arc::new(array_expressions::array_position)
- }
- BuiltinScalarFunction::ArrayPositions => {
- Arc::new(array_expressions::array_positions)
+ Arc::new(|args|
make_scalar_function(array_expressions::array_position)(args))
}
+ BuiltinScalarFunction::ArrayPositions => Arc::new(|args| {
+ make_scalar_function(array_expressions::array_positions)(args)
+ }),
BuiltinScalarFunction::ArrayPrepend => {
Arc::new(|args|
make_scalar_function(array_expressions::array_prepend)(args))
}