This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch string-view
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/string-view by this push:
new 507d978a3b feat: Implement equality = and inequality <> support for
StringView (#10985)
507d978a3b is described below
commit 507d978a3b2b9fe873239ae2d4640286e423086a
Author: Alex Huang <[email protected]>
AuthorDate: Wed Jun 19 19:38:03 2024 +0800
feat: Implement equality = and inequality <> support for StringView (#10985)
* feat: Implement equality = and inequality <> support for StringView
* chore: Add tests for the StringView
* chore
* chore: Update tests for NULL
* fix: Used build_array_string!
* chore: Update string_coercion function to handle Utf8View type in
binary.rs
* chore: add tests
* chore: ci
---
Cargo.toml | 24 ++---
datafusion-cli/Cargo.lock | 30 +++---
datafusion-cli/Cargo.toml | 22 ++--
datafusion/common/src/scalar/mod.rs | 2 +-
datafusion/expr/src/type_coercion/binary.rs | 1 +
datafusion/sqllogictest/test_files/string_view.slt | 113 +++++++++++++++++++++
6 files changed, 153 insertions(+), 39 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 290dd64021..be6e0c672f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -157,15 +157,15 @@ unused_imports = "deny"
## Temporary arrow-rs patch until 52.1.0 is released
[patch.crates-io]
-arrow = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-parquet = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
+arrow = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+parquet = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b0b41a1232..15f7809ee5 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -131,7 +131,7 @@ checksum =
"96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "arrow"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -151,7 +151,7 @@ dependencies = [
[[package]]
name = "arrow-arith"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -165,7 +165,7 @@ dependencies = [
[[package]]
name = "arrow-array"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"ahash",
"arrow-buffer",
@@ -181,7 +181,7 @@ dependencies = [
[[package]]
name = "arrow-buffer"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"bytes",
"half",
@@ -191,7 +191,7 @@ dependencies = [
[[package]]
name = "arrow-cast"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -211,7 +211,7 @@ dependencies = [
[[package]]
name = "arrow-csv"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -229,7 +229,7 @@ dependencies = [
[[package]]
name = "arrow-data"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -240,7 +240,7 @@ dependencies = [
[[package]]
name = "arrow-ipc"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -254,7 +254,7 @@ dependencies = [
[[package]]
name = "arrow-json"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -273,7 +273,7 @@ dependencies = [
[[package]]
name = "arrow-ord"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -287,7 +287,7 @@ dependencies = [
[[package]]
name = "arrow-row"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"ahash",
"arrow-array",
@@ -301,12 +301,12 @@ dependencies = [
[[package]]
name = "arrow-schema"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
[[package]]
name = "arrow-select"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"ahash",
"arrow-array",
@@ -319,7 +319,7 @@ dependencies = [
[[package]]
name = "arrow-string"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -2704,7 +2704,7 @@ dependencies = [
[[package]]
name = "parquet"
version = "52.0.0"
-source =
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c"
+source =
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d"
dependencies = [
"ahash",
"arrow-array",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index b488326473..0e7b712d8b 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -66,14 +66,14 @@ rstest = "0.17"
## Temporary arrow-rs patch until 52.1.0 is released
[patch.crates-io]
-arrow = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
-parquet = { git = "https://github.com/apache/arrow-rs.git", rev =
"72467c670f8c38130e4743347407f1a542e59e0c" }
+arrow = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+parquet = { git = "https://github.com/apache/arrow-rs.git", rev =
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
diff --git a/datafusion/common/src/scalar/mod.rs
b/datafusion/common/src/scalar/mod.rs
index 96bf4216d9..86ac115cca 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1570,6 +1570,7 @@ impl ScalarValue {
DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
+ DataType::Utf8View => build_array_string!(StringViewArray,
Utf8View),
DataType::Utf8 => build_array_string!(StringArray, Utf8),
DataType::LargeUtf8 => build_array_string!(LargeStringArray,
LargeUtf8),
DataType::Binary => build_array_string!(BinaryArray, Binary),
@@ -1726,7 +1727,6 @@ impl ScalarValue {
| DataType::Time64(TimeUnit::Millisecond)
| DataType::Map(_, _)
| DataType::RunEndEncoded(_, _)
- | DataType::Utf8View
| DataType::BinaryView
| DataType::ListView(_)
| DataType::LargeListView(_) => {
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index d7cb4b1a3e..d57b5228cb 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -932,6 +932,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataType>
(LargeUtf8, Utf8) => Some(LargeUtf8),
(Utf8, LargeUtf8) => Some(LargeUtf8),
(LargeUtf8, LargeUtf8) => Some(LargeUtf8),
+ (Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) =>
Some(Utf8View),
_ => None,
}
}
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
new file mode 100644
index 0000000000..3be3c94770
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# test StringViewArray with Utf8View columns
+statement ok
+create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X',
'Utf8View')),
+ (arrow_cast('Xiangpeng', 'Utf8View'),
arrow_cast('Xiangpeng', 'Utf8View')),
+ (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R',
'Utf8View')),
+ (arrow_cast(NULL, 'Utf8View'), arrow_cast('R',
'Utf8View'));
+
+query B
+select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
+----
+false
+
+query B
+select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query B
+select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query B
+select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query ??
+select * from test where column1 = column2;
+----
+Xiangpeng Xiangpeng
+
+query ??
+select * from test where column1 <> column2;
+----
+Andrew X
+Raphael R
+
+query ??
+select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+----
+Andrew X
+
+query ??
+select * from test where column1 = 'Andrew';
+----
+Andrew X
+
+query ??
+select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+query ??
+select * from test where column1 <> 'Andrew';
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+statement ok
+drop table test;
+
+
+# test StringViewArray with Utf8 and Utf8View columns
+statement ok
+create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
+ ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
+ ('Raphael', arrow_cast('R', 'Utf8View')),
+ (NULL, arrow_cast('R', 'Utf8View'));
+
+query T?
+select * from test where column1 = column2;
+----
+Xiangpeng Xiangpeng
+
+query T?
+select * from test where column1 <> column2;
+----
+Andrew X
+Raphael R
+
+query T?
+select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+----
+Andrew X
+
+query T?
+select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+statement ok
+drop table test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]