This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b412436831 Automate sqllogictest for String, LargeString and
StringView behavior (#12525)
b412436831 is described below
commit b412436831cdb73d958ab78e1c5ef80c8fb20aa8
Author: Jax Liu <[email protected]>
AuthorDate: Sat Sep 21 03:28:14 2024 +0800
Automate sqllogictest for String, LargeString and StringView behavior
(#12525)
* implement the testing framework for string type
* move the test to new framework
* add readme for string test
* add license and invoke prettier
---
.../sqllogictest/test_files/string/README.md | 44 ++
.../init_data.slt.part} | 51 +-
.../substr_runner.slt => string/large_string.slt} | 54 +-
.../substr/substr_runner.slt => string/string.slt} | 52 +-
.../string_literal.slt} | 17 +-
.../test_files/string/string_query.slt.part | 326 ++++++++++++
.../sqllogictest/test_files/string/string_view.slt | 356 ++++++++++++++
.../string_functions/substr/substr_table.slt.part | 45 --
datafusion/sqllogictest/test_files/string_view.slt | 547 ---------------------
9 files changed, 777 insertions(+), 715 deletions(-)
diff --git a/datafusion/sqllogictest/test_files/string/README.md
b/datafusion/sqllogictest/test_files/string/README.md
new file mode 100644
index 0000000000..8693ef16f9
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/README.md
@@ -0,0 +1,44 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# String Test Files
+
+This directory contains test files for the `string` test suite.
+To ensure consistent behavior across different string types, we should run the
same tests with the same inputs on all string types.
+There is a framework in place to execute the same tests across different
string types.
+
+See [#12415](https://github.com/apache/datafusion/issues/12415) for more
background.
+
+## Directory Structure
+
+```
+string/
+ - init_data.slt.part // generate the testing data
+ - string_query.slt.part // the sharing tests for all string type
+ - string.slt // the entrypoint for string type
+ - large_string.slt // the entrypoint for large_string type
+ - string_view.slt // the entrypoint for string_view type and the
string_view specific tests
+ - string_literal.slt // the tests for string literal
+```
+
+## Pattern for Test Entry Point Files
+
+Any entry point file should include `init_data.slt.part` and
`string_query.slt.part`.
+
+Planning-related tests (e.g., EXPLAIN ...) should be placed in their own entry
point file (e.g., `string_view.slt`) as they are only used to assert planning
behavior specific to that type.
diff --git
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
b/datafusion/sqllogictest/test_files/string/init_data.slt.part
similarity index 57%
copy from
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
copy to datafusion/sqllogictest/test_files/string/init_data.slt.part
index d61cfa2b74..d99401f10d 100644
---
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -15,10 +15,12 @@
# specific language governing permissions and limitations
# under the License.
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+statement ok
+create table test_source as values
+ ('Andrew', 'X', 'datafusion📊🔥', '🔥'),
+ ('Xiangpeng', 'Xiangpeng', 'datafusion数据融合', 'datafusion数据融合'),
+ ('Raphael', 'R', 'datafusionДатаФусион', 'аФус'),
+ (NULL, 'R', NULL, '🔥');
# --------------------------------------
# Setup test tables with different physical string types
(Utf8/Utf8View/LargeUtf8)
@@ -29,44 +31,3 @@ create table test_substr_base (
col1 VARCHAR
) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'),
(''), (NULL);
-#
-# Run1: Utf8
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-#
-# Run2: Utf8View
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-#
-# Run3: LargeUtf8
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-# --------------------------------------
-# Cleanup
-# --------------------------------------
-statement ok
-drop table test_substr_base;
\ No newline at end of file
diff --git
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
b/datafusion/sqllogictest/test_files/string/large_string.slt
similarity index 50%
copy from
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
copy to datafusion/sqllogictest/test_files/string/large_string.slt
index d61cfa2b74..d90f2bffe0 100644
---
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/large_string.slt
@@ -15,58 +15,36 @@
# specific language governing permissions and limitations
# under the License.
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+include ./init_data.slt.part
# --------------------------------------
-# Setup test tables with different physical string types
(Utf8/Utf8View/LargeUtf8)
-# and repeat tests in `substr_table.slt.part`
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
# --------------------------------------
statement ok
-create table test_substr_base (
- col1 VARCHAR
-) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'),
(''), (NULL);
+create table test_basic_operator as
+select
+ arrow_cast(column1, 'LargeUtf8') as ascii_1,
+ arrow_cast(column2, 'LargeUtf8') as ascii_2,
+ arrow_cast(column3, 'LargeUtf8') as unicode_1,
+ arrow_cast(column4, 'LargeUtf8') as unicode_2
+from test_source;
-#
-# Run1: Utf8
-#
statement ok
create table test_substr as
-select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
#
-# Run2: Utf8View
+# common test for string-like functions and operators
#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+include ./string_query.slt.part
#
-# Run3: LargeUtf8
+# Clean up
#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
statement ok
-drop table test_substr;
+drop table test_basic_operator;
-# --------------------------------------
-# Cleanup
-# --------------------------------------
statement ok
-drop table test_substr_base;
\ No newline at end of file
+drop table test_substr_base;
diff --git
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
b/datafusion/sqllogictest/test_files/string/string.slt
similarity index 51%
rename from
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
rename to datafusion/sqllogictest/test_files/string/string.slt
index d61cfa2b74..435795309f 100644
---
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -15,58 +15,36 @@
# specific language governing permissions and limitations
# under the License.
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+include ./init_data.slt.part
# --------------------------------------
-# Setup test tables with different physical string types
(Utf8/Utf8View/LargeUtf8)
-# and repeat tests in `substr_table.slt.part`
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
# --------------------------------------
statement ok
-create table test_substr_base (
- col1 VARCHAR
-) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'),
(''), (NULL);
+create table test_basic_operator as
+select
+ arrow_cast(column1, 'Utf8') as ascii_1,
+ arrow_cast(column2, 'Utf8') as ascii_2,
+ arrow_cast(column3, 'Utf8') as unicode_1,
+ arrow_cast(column4, 'Utf8') as unicode_2
+from test_source;
-#
-# Run1: Utf8
-#
statement ok
create table test_substr as
select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
#
-# Run2: Utf8View
+# common test for string-like functions and operators
#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+include ./string_query.slt.part
#
-# Run3: LargeUtf8
+# Clean up
#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
statement ok
-drop table test_substr;
+drop table test_basic_operator;
-# --------------------------------------
-# Cleanup
-# --------------------------------------
statement ok
-drop table test_substr_base;
\ No newline at end of file
+drop table test_substr;
diff --git
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
b/datafusion/sqllogictest/test_files/string/string_literal.slt
similarity index 87%
rename from
datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
rename to datafusion/sqllogictest/test_files/string/string_literal.slt
index dac5901662..37dc1a8ce8 100644
---
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -56,8 +56,8 @@ SELECT substr('alphabet', 3, 20)
phabet
query TT
-select
- substr(arrow_cast('alphabet', 'LargeUtf8'), 3, 20),
+select
+ substr(arrow_cast('alphabet', 'LargeUtf8'), 3, 20),
substr(arrow_cast('alphabet', 'Utf8View'), 3, 20);
----
phabet phabet
@@ -99,7 +99,7 @@ SELECT
# Nulls
query TTTTTTTTTT
-SELECT
+SELECT
substr('alphabet', NULL),
substr(NULL, 1),
substr(NULL, NULL),
@@ -134,3 +134,14 @@ select substr(arrow_cast('foo', 'Utf8View'), 1, -1);
statement error Execution error: negative substring length not allowed
select substr('', 1, -1);
+
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+ arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+false true true true
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part
b/datafusion/sqllogictest/test_files/string/string_query.slt.part
new file mode 100644
index 0000000000..4f73795400
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -0,0 +1,326 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is intended to be run with tables already defined
+# with standard values, but different types in string columns
+# (String, StringView, etc.)
+
+# select
+query TTTT
+SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
+----
+Andrew X datafusion📊🔥 🔥
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
+Raphael R datafusionДатаФусион аФус
+NULL R NULL 🔥
+
+# --------------------------------------
+# column comparison as filters
+# --------------------------------------
+
+query TT
+select ascii_1, ascii_2 from test_basic_operator where ascii_1 = ascii_2
+----
+Xiangpeng Xiangpeng
+
+query TT
+select ascii_1, ascii_2 from test_basic_operator where ascii_1 <> ascii_2
+----
+Andrew X
+Raphael R
+
+query TT
+select unicode_1, unicode_2 from test_basic_operator where unicode_1 =
unicode_2
+----
+datafusion数据融合 datafusion数据融合
+
+query TT
+select unicode_1, unicode_2 from test_basic_operator where unicode_1 <>
unicode_2
+----
+datafusion📊🔥 🔥
+datafusionДатаФусион аФус
+
+query TT
+select ascii_1, unicode_1 from test_basic_operator where ascii_1 = unicode_1
+----
+
+query TT
+select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1
+----
+Andrew datafusion📊🔥
+Xiangpeng datafusion数据融合
+Raphael datafusionДатаФусион
+
+# --------------------------------------
+# column comparison
+# --------------------------------------
+query TTTTBBBBBB
+select
+ ascii_1, ascii_2, unicode_1, unicode_2,
+ ascii_1 = ascii_2,
+ ascii_1 <> ascii_2,
+ unicode_1 = unicode_2,
+ unicode_1 <> unicode_2,
+ ascii_1 = unicode_1,
+ ascii_1 <> unicode_1
+from test_basic_operator;
+----
+Andrew X datafusion📊🔥 🔥 false true false true false true
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true false true false false
true
+Raphael R datafusionДатаФусион аФус false true false true false true
+NULL R NULL 🔥 NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to StringView scalar comparison
+# --------------------------------------
+query TTBBBB
+select
+ ascii_1, unicode_1,
+ ascii_1 = arrow_cast('Andrew', 'Utf8View'),
+ ascii_1 <> arrow_cast('Andrew', 'Utf8View'),
+ unicode_1 = arrow_cast('datafusion数据融合', 'Utf8View'),
+ unicode_1 <> arrow_cast('datafusion数据融合', 'Utf8View')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to String scalar
+# --------------------------------------
+query TTBBBB
+select
+ ascii_1, unicode_1,
+ ascii_1 = arrow_cast('Andrew', 'Utf8'),
+ ascii_1 <> arrow_cast('Andrew', 'Utf8'),
+ unicode_1 = arrow_cast('datafusion数据融合', 'Utf8'),
+ unicode_1 <> arrow_cast('datafusion数据融合', 'Utf8')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to LargeString scalar
+# --------------------------------------
+query TTBBBB
+select
+ ascii_1, unicode_1,
+ ascii_1 = arrow_cast('Andrew', 'LargeUtf8'),
+ ascii_1 <> arrow_cast('Andrew', 'LargeUtf8'),
+ unicode_1 = arrow_cast('datafusion数据融合', 'LargeUtf8'),
+ unicode_1 <> arrow_cast('datafusion数据融合', 'LargeUtf8')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# substr function
+# --------------------------------------
+
+query TTTTTTTTTTTTTT
+select
+ substr(c1, 1),
+ substr(c1, 3),
+ substr(c1, 100),
+ substr(c1, -1),
+ substr(c1, 0, 0),
+ substr(c1, -1, 2),
+ substr(c1, -2, 10),
+ substr(c1, -100, 200),
+ substr(c1, -10, 10),
+ substr(c1, -100, 10),
+ substr(c1, 1, 100),
+ substr(c1, 5, 3),
+ substr(c1, 100, 200),
+ substr(c1, 8, 0)
+from test_substr;
+----
+foo o (empty) foo (empty) (empty) foo foo (empty) (empty) foo (empty) (empty)
(empty)
+hello🌏世界 llo🌏世界 (empty) hello🌏世界 (empty) (empty) hello🌏世 hello🌏世界 (empty)
(empty) hello🌏世界 o🌏世 (empty) (empty)
+💩 (empty) (empty) 💩 (empty) (empty) 💩 💩 (empty) (empty) 💩 (empty) (empty)
(empty)
+ThisIsAVeryLongASCIIString isIsAVeryLongASCIIString (empty)
ThisIsAVeryLongASCIIString (empty) (empty) ThisIsA ThisIsAVeryLongASCIIString
(empty) (empty) ThisIsAVeryLongASCIIString IsA (empty) (empty)
+(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty)
(empty) (empty) (empty) (empty) (empty)
+NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# test distinct aggregate
+# --------------------------------------
+query II
+SELECT
+ COUNT(DISTINCT ascii_1),
+ COUNT(DISTINCT unicode_1)
+FROM
+ test_basic_operator
+----
+3 3
+
+query II
+SELECT
+ COUNT(DISTINCT ascii_1),
+ COUNT(DISTINCT unicode_1)
+FROM
+ test_basic_operator
+GROUP BY ascii_2;
+----
+1 1
+1 1
+1 1
+
+query II
+SELECT
+ COUNT(DISTINCT ascii_1),
+ COUNT(DISTINCT unicode_1)
+FROM
+ test_basic_operator
+GROUP BY unicode_2;
+----
+1 1
+1 1
+1 1
+
+# --------------------------------------
+# STARTS_WITH function
+# --------------------------------------
+
+query BBBB
+SELECT
+ STARTS_WITH(ascii_1, ascii_2),
+ STARTS_WITH(unicode_1, unicode_2),
+ STARTS_WITH(ascii_1, unicode_2),
+ STARTS_WITH(unicode_1, ascii_2)
+FROM test_basic_operator
+----
+false false false false
+true true false false
+true false false false
+NULL NULL NULL NULL
+
+query BBBB
+SELECT
+ STARTS_WITH(ascii_1, 'And'),
+ STARTS_WITH(ascii_2, 'And'),
+ STARTS_WITH(unicode_1, 'data'),
+ STARTS_WITH(unicode_2, 'data')
+FROM test_basic_operator
+----
+true false true false
+false false true true
+false false true false
+NULL false NULL false
+
+# --------------------------------------
+# Test TRANSLATE
+# --------------------------------------
+
+query T
+SELECT
+ TRANSLATE(ascii_1, 'foo', 'bar') as c
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+query T
+SELECT
+ TRANSLATE(unicode_1, 'foo', 'bar') as c
+FROM test_basic_operator;
+----
+databusirn📊🔥
+databusirn数据融合
+databusirnДатаФусион
+NULL
+
+# --------------------------------------
+# Test REGEXP_REPLACE
+# --------------------------------------
+
+# Should run REGEXP_REPLACE with Scalar value for string
+query T
+SELECT
+ REGEXP_REPLACE(ascii_1, 'e', 'f') AS k
+FROM test_basic_operator;
+----
+Andrfw
+Xiangpfng
+Raphafl
+NULL
+
+# Should run REGEXP_REPLACE with Scalar value for string with flag
+query T
+SELECT
+ REGEXP_REPLACE(ascii_1, 'e', 'f', 'i') AS k
+FROM test_basic_operator;
+----
+Andrfw
+Xiangpfng
+Raphafl
+NULL
+
+# Should run REGEXP_REPLACE with ScalarArray value for string
+query T
+SELECT
+ REGEXP_REPLACE(ascii_1, lower(ascii_1), 'bar') AS k
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# Should run REGEXP_REPLACE with ScalarArray value for string with flag
+query T
+SELECT
+ REGEXP_REPLACE(ascii_1, lower(ascii_1), 'bar', 'g') AS k
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# --------------------------------------
+# Test Initcap
+# --------------------------------------
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+ lower(ascii_1) as ascii_1_lower,
+ lower(unicode_1) as unicode_1_lower
+FROM test_basic_operator;
+
+query TT
+SELECT
+ INITCAP(ascii_1_lower) as c1,
+ INITCAP(unicode_1_lower) as c2
+FROM test_lowercase;
+----
+Andrew Datafusion📊🔥
+Xiangpeng Datafusion数据融合
+Raphael Datafusionдатафусион
+NULL NULL
+
+statement ok
+drop table test_lowercase;
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
new file mode 100644
index 0000000000..ca08f647d2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -0,0 +1,356 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include ./init_data.slt.part
+
+# --------------------------------------
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
+# --------------------------------------
+statement ok
+create table test_basic_operator as
+select
+ arrow_cast(column1, 'Utf8View') as ascii_1,
+ arrow_cast(column2, 'Utf8View') as ascii_2,
+ arrow_cast(column3, 'Utf8View') as unicode_1,
+ arrow_cast(column4, 'Utf8View') as unicode_2
+from test_source;
+
+statement ok
+create table test_substr as
+select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
+
+statement ok
+drop table test_source
+
+#
+# common test for string-like functions and operators
+#
+include ./string_query.slt.part
+
+#
+# Clean up
+#
+statement ok
+drop table test_basic_operator;
+
+statement ok
+drop table test_substr_base;
+
+
+# --------------------------------------
+# String_view specific tests
+# --------------------------------------
+statement ok
+create table test_source as values
+ ('Andrew', 'X'),
+ ('Xiangpeng', 'Xiangpeng'),
+ ('Raphael', 'R'),
+ (NULL, 'R');
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+ arrow_cast(column1, 'Utf8') as column1_utf8,
+ arrow_cast(column2, 'Utf8') as column2_utf8,
+ arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+ arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+ arrow_cast(column1, 'Utf8View') as column1_utf8view,
+ arrow_cast(column2, 'Utf8View') as column2_utf8view,
+ arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+ arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## StringView to Other Types column
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_utf8,
+ column2_utf8 = column1_utf8view,
+ column1_utf8view <> column2_utf8,
+ column2_utf8 <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test StringViewArray with LargeUtf8 columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_large_utf8,
+ column2_large_utf8 = column1_utf8view,
+ column1_utf8view <> column2_large_utf8,
+ column2_large_utf8 <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_dict,
+ column2_dict = column1_utf8view,
+ column1_utf8view <> column2_dict,
+ column2_dict <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_dict = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_dict,
+ column1_dict <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## Coercion Rules
+########
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 =
arrow_cast('Andrew', 'Utf8View');
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') =
column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view =
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew',
'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+ COUNT(DISTINCT column1_utf8),
+ COUNT(DISTINCT column1_utf8view),
+ COUNT(DISTINCT column1_dict)
+FROM test;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8),
count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
+02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
+
+
+### `STARTS_WITH`
+
+# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
+# (should be no casts)
+query TT
+EXPLAIN SELECT
+ STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+ STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+ STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS
c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS
c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS
Utf8View)) AS c3
+02)--TableScan: test projection=[column2_utf8, column2_large_utf8,
column1_utf8view, column2_utf8view]
+
+query BBB
+SELECT
+ STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+ STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+ STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
+# Should work, but will have to cast to common types
+# should cast utf8 -> utf8view and largeutf8 -> utf8view
+query TT
+EXPLAIN SELECT
+ STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+ STARTS_WITH(column1_utf8, column2_utf8) as c3,
+ STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1,
starts_with(test.column1_utf8, test.column2_utf8) AS c3,
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
+02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1,
test.column1_utf8, test.column2_utf8, test.column2_large_utf8,
test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column2_large_utf8, column2_utf8view]
+
+query BBB
+ SELECT
+ STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+ STARTS_WITH(column1_utf8, column2_utf8) as c3,
+ STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+
+# Test STARTS_WITH with utf8view against literals
+# In this case, the literals should be cast to utf8view. The columns
+# should not be cast to utf8.
+query TT
+EXPLAIN SELECT
+ STARTS_WITH(column1_utf8view, 'äöüß') as c1,
+ STARTS_WITH(column1_utf8view, '') as c2,
+ STARTS_WITH(column1_utf8view, NULL) as c3,
+ STARTS_WITH(NULL, column1_utf8view) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1,
starts_with(test.column1_utf8view, Utf8View("")) AS c2,
starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3,
starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
+02)--TableScan: test projection=[column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+ INITCAP(column1_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: initcap(test.column1_utf8view) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+
+# Create a table with lowercase strings
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+ lower(column1_utf8) as column1_utf8_lower,
+ lower(column1_large_utf8) as column1_large_utf8_lower,
+ lower(column1_utf8view) as column1_utf8view_lower
+FROM test;
+
+# Test INITCAP with utf8view, utf8, and largeutf8
+# Should not cast anything
+query TT
+EXPLAIN SELECT
+ INITCAP(column1_utf8view_lower) as c1,
+ INITCAP(column1_utf8_lower) as c2,
+ INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+logical_plan
+01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1,
initcap(test_lowercase.column1_utf8_lower) AS c2,
initcap(test_lowercase.column1_large_utf8_lower) AS c3
+02)--TableScan: test_lowercase projection=[column1_utf8_lower,
column1_large_utf8_lower, column1_utf8view_lower]
+
+statement ok
+drop table test_lowercase
+
+statement ok
+drop table test
diff --git
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
b/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
deleted file mode 100644
index 0d8e62af84..0000000000
---
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This file is intended to be run with tables already defined
-# with standard values, but different types in string columns
-# (String, StringView, etc.)
-
-query TTTTTTTTTTTTTT
-select
- substr(c1, 1),
- substr(c1, 3),
- substr(c1, 100),
- substr(c1, -1),
- substr(c1, 0, 0),
- substr(c1, -1, 2),
- substr(c1, -2, 10),
- substr(c1, -100, 200),
- substr(c1, -10, 10),
- substr(c1, -100, 10),
- substr(c1, 1, 100),
- substr(c1, 5, 3),
- substr(c1, 100, 200),
- substr(c1, 8, 0)
-from test_substr;
-----
-foo o (empty) foo (empty) (empty) foo foo (empty) (empty) foo (empty) (empty)
(empty)
-hello🌏世界 llo🌏世界 (empty) hello🌏世界 (empty) (empty) hello🌏世 hello🌏世界 (empty)
(empty) hello🌏世界 o🌏世 (empty) (empty)
-💩 (empty) (empty) 💩 (empty) (empty) 💩 💩 (empty) (empty) 💩 (empty) (empty)
(empty)
-ThisIsAVeryLongASCIIString isIsAVeryLongASCIIString (empty)
ThisIsAVeryLongASCIIString (empty) (empty) ThisIsA ThisIsAVeryLongASCIIString
(empty) (empty) ThisIsAVeryLongASCIIString IsA (empty) (empty)
-(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty)
(empty) (empty) (empty) (empty) (empty)
-NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
index c3b5fa8fc4..4e99d8cfa6 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -43,556 +43,9 @@ FROM test_source;
statement ok
drop table test_source
-########
-## StringView to StringView
-########
-
-# StringView scalar to StringView scalar
-
-query BBBB
-select
- arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
-----
-false true true true
-
-
-# StringView column to StringView column comparison as filters
-
-query TT
-select column1_utf8, column2_utf8 from test where column1_utf8view =
column2_utf8view;
-----
-Xiangpeng Xiangpeng
-
-query TT
-select column1_utf8, column2_utf8 from test where column1_utf8view <>
column2_utf8view;
-----
-Andrew X
-Raphael R
-
-# StringView column to StringView column
-query TTBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = column2_utf8view,
- column1_utf8view <> column2_utf8view
-from test;
-----
-Andrew X false true
-Xiangpeng Xiangpeng true false
-Raphael R false true
-NULL R NULL NULL
-
-# StringView column to StringView scalar comparison
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
- column1_utf8view <> arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-########
-## StringView to String
-########
-
-# test StringViewArray with Utf8 columns
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = column2_utf8,
- column2_utf8 = column1_utf8view,
- column1_utf8view <> column2_utf8,
- column2_utf8 <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# test StringViewArray with LargeUtf8 columns
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = column2_large_utf8,
- column2_large_utf8 = column1_utf8view,
- column1_utf8view <> column2_large_utf8,
- column2_large_utf8 <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-
-# StringView column to String scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = arrow_cast('Andrew', 'Utf8'),
- arrow_cast('Andrew', 'Utf8') = column1_utf8view,
- column1_utf8view <> arrow_cast('Andrew', 'Utf8'),
- arrow_cast('Andrew', 'Utf8') <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# StringView column to LargeString scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = arrow_cast('Andrew', 'LargeUtf8'),
- arrow_cast('Andrew', 'LargeUtf8') = column1_utf8view,
- column1_utf8view <> arrow_cast('Andrew', 'LargeUtf8'),
- arrow_cast('Andrew', 'LargeUtf8') <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# String column to StringView scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8 = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') = column1_utf8,
- column1_utf8 <> arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') <> column1_utf8
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# LargeString column to StringView scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_large_utf8 = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') = column1_large_utf8,
- column1_large_utf8 <> arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-########
-## StringView to Dictionary
-########
-
-# test StringViewArray with Dictionary columns
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = column2_dict,
- column2_dict = column1_utf8view,
- column1_utf8view <> column2_dict,
- column2_dict <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# StringView column to Dict scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
- arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view,
- column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
- arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# Dict column to StringView scalar
-query TTBBBB
-select
- column1_utf8, column2_utf8,
- column1_dict = arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') = column1_dict,
- column1_dict <> arrow_cast('Andrew', 'Utf8View'),
- arrow_cast('Andrew', 'Utf8View') <> column1_dict
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-
-########
-## Coercion Rules
-########
-
-
statement ok
set datafusion.explain.logical_plan_only = true;
-
-# Filter should have a StringView literal and no column cast
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-# reverse order should be the same
-query TT
-explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where column1_utf8 =
arrow_cast('Andrew', 'Utf8View');
-----
-logical_plan
-01)Filter: test.column1_utf8 = Utf8("Andrew")
-02)--TableScan: test projection=[column1_utf8]
-
-query TT
-explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') =
column1_utf8;
-----
-logical_plan
-01)Filter: test.column1_utf8 = Utf8("Andrew")
-02)--TableScan: test projection=[column1_utf8]
-
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view =
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where arrow_cast('Andrew',
'Dictionary(Int32, Utf8)') = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-# compare string / stringview
-# Should cast string -> stringview (which is cheap), not stringview -> string
(which is not)
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
-03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
-
-## Test distinct aggregates
-query III
-SELECT
- COUNT(DISTINCT column1_utf8),
- COUNT(DISTINCT column1_utf8view),
- COUNT(DISTINCT column1_dict)
-FROM test;
-----
-3 3 3
-
-query III
-SELECT
- COUNT(DISTINCT column1_utf8),
- COUNT(DISTINCT column1_utf8view),
- COUNT(DISTINCT column1_dict)
-FROM test
-GROUP BY column2_utf8view;
-----
-1 1 1
-1 1 1
-1 1 1
-
-
-query TT
-EXPLAIN SELECT
- COUNT(DISTINCT column1_utf8),
- COUNT(DISTINCT column1_utf8view),
- COUNT(DISTINCT column1_dict)
-FROM test;
-----
-logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8),
count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
-02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
-
-### `STARTS_WITH`
-
-# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
-# (should be no casts)
-query TT
-EXPLAIN SELECT
- STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
- STARTS_WITH(column1_utf8view, column2_utf8) as c2,
- STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS
c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS
c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS
Utf8View)) AS c3
-02)--TableScan: test projection=[column2_utf8, column2_large_utf8,
column1_utf8view, column2_utf8view]
-
-query BBB
-SELECT
- STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
- STARTS_WITH(column1_utf8view, column2_utf8) as c2,
- STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
-FROM test;
-----
-false false false
-true true true
-true true true
-NULL NULL NULL
-
-# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
-# Should work, but will have to cast to common types
-# should cast utf8 -> utf8view and largeutf8 -> utf8view
-query TT
-EXPLAIN SELECT
- STARTS_WITH(column1_utf8, column2_utf8view) as c1,
- STARTS_WITH(column1_utf8, column2_utf8) as c3,
- STARTS_WITH(column1_utf8, column2_large_utf8) as c4
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1,
starts_with(test.column1_utf8, test.column2_utf8) AS c3,
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
-02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1,
test.column1_utf8, test.column2_utf8, test.column2_large_utf8,
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8,
column2_large_utf8, column2_utf8view]
-
-query BBB
- SELECT
- STARTS_WITH(column1_utf8, column2_utf8view) as c1,
- STARTS_WITH(column1_utf8, column2_utf8) as c3,
- STARTS_WITH(column1_utf8, column2_large_utf8) as c4
-FROM test;
-----
-false false false
-true true true
-true true true
-NULL NULL NULL
-
-
-# Test STARTS_WITH with utf8view against literals
-# In this case, the literals should be cast to utf8view. The columns
-# should not be cast to utf8.
-query TT
-EXPLAIN SELECT
- STARTS_WITH(column1_utf8view, 'äöüß') as c1,
- STARTS_WITH(column1_utf8view, '') as c2,
- STARTS_WITH(column1_utf8view, NULL) as c3,
- STARTS_WITH(NULL, column1_utf8view) as c4
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1,
starts_with(test.column1_utf8view, Utf8View("")) AS c2,
starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3,
starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
-02)--TableScan: test projection=[column1_utf8view]
-
-### Test TRANSLATE
-
-# Should run TRANSLATE using utf8view column successfully
-query T
-SELECT
- TRANSLATE(column1_utf8view, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run TRANSLATE using utf8 column successfully
-query T
-SELECT
- TRANSLATE(column1_utf8, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run TRANSLATE using large_utf8 column successfully
-query T
-SELECT
- TRANSLATE(column1_large_utf8, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-### Test REGEXP_REPLACE
-
-# Should run REGEXP_REPLACE with Scalar value for utf8view
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8view, 'e', 'f') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8view with flag
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8view, 'e', 'f', 'i') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8, 'e', 'f') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8 with flag
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8, 'e', 'f', 'i') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8view
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8view, lower(column1_utf8view), 'bar') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8view with flag
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8view, lower(column1_utf8view), 'bar', 'g') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8, lower(column1_utf8), 'bar') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8 with flag
-query T
-SELECT
- REGEXP_REPLACE(column1_utf8, lower(column1_utf8), 'bar', 'g') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-### Initcap
-
-query TT
-EXPLAIN SELECT
- INITCAP(column1_utf8view) as c
-FROM test;
-----
-logical_plan
-01)Projection: initcap(test.column1_utf8view) AS c
-02)--TableScan: test projection=[column1_utf8view]
-
-# Create a table with lowercase strings
-statement ok
-CREATE TABLE test_lowercase AS SELECT
- lower(column1_utf8) as column1_utf8_lower,
- lower(column1_large_utf8) as column1_large_utf8_lower,
- lower(column1_utf8view) as column1_utf8view_lower
-FROM test;
-
-# Test INITCAP with utf8view, utf8, and largeutf8
-# Should not cast anything
-query TT
-EXPLAIN SELECT
- INITCAP(column1_utf8view_lower) as c1,
- INITCAP(column1_utf8_lower) as c2,
- INITCAP(column1_large_utf8_lower) as c3
-FROM test_lowercase;
-----
-logical_plan
-01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1,
initcap(test_lowercase.column1_utf8_lower) AS c2,
initcap(test_lowercase.column1_large_utf8_lower) AS c3
-02)--TableScan: test_lowercase projection=[column1_utf8_lower,
column1_large_utf8_lower, column1_utf8view_lower]
-
-query TTT
-SELECT
- INITCAP(column1_utf8view_lower) as c1,
- INITCAP(column1_utf8_lower) as c2,
- INITCAP(column1_large_utf8_lower) as c3
-FROM test_lowercase;
-----
-Andrew Andrew Andrew
-Xiangpeng Xiangpeng Xiangpeng
-Raphael Raphael Raphael
-NULL NULL NULL
-
# Ensure string functions use native StringView implementation
# and do not fall back to Utf8 or LargeUtf8
# Should see no casts to Utf8 in the plans below
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]