This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b412436831 Automate sqllogictest for String, LargeString and 
StringView behavior (#12525)
b412436831 is described below

commit b412436831cdb73d958ab78e1c5ef80c8fb20aa8
Author: Jax Liu <[email protected]>
AuthorDate: Sat Sep 21 03:28:14 2024 +0800

    Automate sqllogictest for String, LargeString and StringView behavior 
(#12525)
    
    * implement the testing framework for string type
    
    * move the test to new framework
    
    * add readme for string test
    
    * add license and invoke prettier
---
 .../sqllogictest/test_files/string/README.md       |  44 ++
 .../init_data.slt.part}                            |  51 +-
 .../substr_runner.slt => string/large_string.slt}  |  54 +-
 .../substr/substr_runner.slt => string/string.slt} |  52 +-
 .../string_literal.slt}                            |  17 +-
 .../test_files/string/string_query.slt.part        | 326 ++++++++++++
 .../sqllogictest/test_files/string/string_view.slt | 356 ++++++++++++++
 .../string_functions/substr/substr_table.slt.part  |  45 --
 datafusion/sqllogictest/test_files/string_view.slt | 547 ---------------------
 9 files changed, 777 insertions(+), 715 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/string/README.md 
b/datafusion/sqllogictest/test_files/string/README.md
new file mode 100644
index 0000000000..8693ef16f9
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/README.md
@@ -0,0 +1,44 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# String Test Files
+
+This directory contains test files for the `string` test suite.
+To ensure consistent behavior across different string types, we should run the 
same tests with the same inputs on all string types.
+There is a framework in place to execute the same tests across different 
string types.
+
+See [#12415](https://github.com/apache/datafusion/issues/12415) for more 
background.
+
+## Directory Structure
+
+```
+string/
+    - init_data.slt.part        // generate the testing data
+    - string_query.slt.part     // the sharing tests for all string type
+    - string.slt                // the entrypoint for string type
+    - large_string.slt          // the entrypoint for large_string type
+    - string_view.slt           // the entrypoint for string_view type and the 
string_view specific tests
+    - string_literal.slt        // the tests for string literal
+```
+
+## Pattern for Test Entry Point Files
+
+Any entry point file should include `init_data.slt.part` and 
`string_query.slt.part`.
+
+Planning-related tests (e.g., EXPLAIN ...) should be placed in their own entry 
point file (e.g., `string_view.slt`) as they are only used to assert planning 
behavior specific to that type.
diff --git 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt 
b/datafusion/sqllogictest/test_files/string/init_data.slt.part
similarity index 57%
copy from 
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
copy to datafusion/sqllogictest/test_files/string/init_data.slt.part
index d61cfa2b74..d99401f10d 100644
--- 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/init_data.slt.part
@@ -15,10 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+statement ok
+create table test_source as values
+  ('Andrew', 'X', 'datafusion📊🔥', '🔥'),
+  ('Xiangpeng', 'Xiangpeng', 'datafusion数据融合', 'datafusion数据融合'),
+  ('Raphael', 'R', 'datafusionДатаФусион', 'аФус'),
+  (NULL, 'R', NULL, '🔥');
 
 # --------------------------------------
 # Setup test tables with different physical string types 
(Utf8/Utf8View/LargeUtf8)
@@ -29,44 +31,3 @@ create table test_substr_base (
     col1 VARCHAR
 ) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), 
(''), (NULL);
 
-#
-# Run1: Utf8
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-#
-# Run2: Utf8View
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-#
-# Run3: LargeUtf8
-#
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
-# --------------------------------------
-# Cleanup
-# --------------------------------------
-statement ok
-drop table test_substr_base;
\ No newline at end of file
diff --git 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt 
b/datafusion/sqllogictest/test_files/string/large_string.slt
similarity index 50%
copy from 
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
copy to datafusion/sqllogictest/test_files/string/large_string.slt
index d61cfa2b74..d90f2bffe0 100644
--- 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/large_string.slt
@@ -15,58 +15,36 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+include ./init_data.slt.part
 
 # --------------------------------------
-# Setup test tables with different physical string types 
(Utf8/Utf8View/LargeUtf8)
-# and repeat tests in `substr_table.slt.part`
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
 # --------------------------------------
 statement ok
-create table test_substr_base (
-    col1 VARCHAR
-) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), 
(''), (NULL);
+create table test_basic_operator as
+select
+    arrow_cast(column1, 'LargeUtf8') as ascii_1,
+    arrow_cast(column2, 'LargeUtf8') as ascii_2,
+    arrow_cast(column3, 'LargeUtf8') as unicode_1,
+    arrow_cast(column4, 'LargeUtf8') as unicode_2
+from test_source;
 
-#
-# Run1: Utf8
-#
 statement ok
 create table test_substr as
-select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
 
 #
-# Run2: Utf8View
+# common test for string-like functions and operators
 #
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+include ./string_query.slt.part
 
 #
-# Run3: LargeUtf8
+# Clean up
 #
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
 
 statement ok
-drop table test_substr;
+drop table test_basic_operator;
 
-# --------------------------------------
-# Cleanup
-# --------------------------------------
 statement ok
-drop table test_substr_base;
\ No newline at end of file
+drop table test_substr_base;
diff --git 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt 
b/datafusion/sqllogictest/test_files/string/string.slt
similarity index 51%
rename from 
datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
rename to datafusion/sqllogictest/test_files/string/string.slt
index d61cfa2b74..435795309f 100644
--- 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_runner.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -15,58 +15,36 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# --------------------------------------
-# Test `substr()` with literal arguments
-# --------------------------------------
-include ./substr_literal.slt.part
+include ./init_data.slt.part
 
 # --------------------------------------
-# Setup test tables with different physical string types 
(Utf8/Utf8View/LargeUtf8)
-# and repeat tests in `substr_table.slt.part`
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
 # --------------------------------------
 statement ok
-create table test_substr_base (
-    col1 VARCHAR
-) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), 
(''), (NULL);
+create table test_basic_operator as
+select
+    arrow_cast(column1, 'Utf8') as ascii_1,
+    arrow_cast(column2, 'Utf8') as ascii_2,
+    arrow_cast(column3, 'Utf8') as unicode_1,
+    arrow_cast(column4, 'Utf8') as unicode_2
+from test_source;
 
-#
-# Run1: Utf8
-#
 statement ok
 create table test_substr as
 select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
 
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
-
 #
-# Run2: Utf8View
+# common test for string-like functions and operators
 #
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
-
-statement ok
-drop table test_substr;
+include ./string_query.slt.part
 
 #
-# Run3: LargeUtf8
+# Clean up
 #
-statement ok
-create table test_substr as
-select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
-
-include ./substr_table.slt.part
 
 statement ok
-drop table test_substr;
+drop table test_basic_operator;
 
-# --------------------------------------
-# Cleanup
-# --------------------------------------
 statement ok
-drop table test_substr_base;
\ No newline at end of file
+drop table test_substr;
diff --git 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
 b/datafusion/sqllogictest/test_files/string/string_literal.slt
similarity index 87%
rename from 
datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
rename to datafusion/sqllogictest/test_files/string/string_literal.slt
index dac5901662..37dc1a8ce8 100644
--- 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_literal.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -56,8 +56,8 @@ SELECT substr('alphabet', 3, 20)
 phabet
 
 query TT
-select 
-    substr(arrow_cast('alphabet', 'LargeUtf8'), 3, 20), 
+select
+    substr(arrow_cast('alphabet', 'LargeUtf8'), 3, 20),
     substr(arrow_cast('alphabet', 'Utf8View'), 3, 20);
 ----
 phabet phabet
@@ -99,7 +99,7 @@ SELECT
 
 # Nulls
 query TTTTTTTTTT
-SELECT 
+SELECT
   substr('alphabet', NULL),
   substr(NULL, 1),
   substr(NULL, NULL),
@@ -134,3 +134,14 @@ select substr(arrow_cast('foo', 'Utf8View'), 1, -1);
 
 statement error Execution error: negative substring length not allowed
 select substr('', 1, -1);
+
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+  arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+false true true true
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part 
b/datafusion/sqllogictest/test_files/string/string_query.slt.part
new file mode 100644
index 0000000000..4f73795400
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -0,0 +1,326 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is intended to be run with tables already defined
+# with standard values, but different types in string columns
+# (String, StringView, etc.)
+
+# select
+query TTTT
+SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
+----
+Andrew X datafusion📊🔥 🔥
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合
+Raphael R datafusionДатаФусион аФус
+NULL R NULL 🔥
+
+# --------------------------------------
+# column comparison as filters
+# --------------------------------------
+
+query TT
+select ascii_1, ascii_2 from test_basic_operator where ascii_1 = ascii_2
+----
+Xiangpeng Xiangpeng
+
+query TT
+select ascii_1, ascii_2 from test_basic_operator where ascii_1 <> ascii_2
+----
+Andrew X
+Raphael R
+
+query TT
+select unicode_1, unicode_2 from test_basic_operator where unicode_1 = 
unicode_2
+----
+datafusion数据融合 datafusion数据融合
+
+query TT
+select unicode_1, unicode_2 from test_basic_operator where unicode_1 <> 
unicode_2
+----
+datafusion📊🔥 🔥
+datafusionДатаФусион аФус
+
+query TT
+select ascii_1, unicode_1 from test_basic_operator where ascii_1 = unicode_1
+----
+
+query TT
+select ascii_1, unicode_1 from test_basic_operator where ascii_1 <> unicode_1
+----
+Andrew datafusion📊🔥
+Xiangpeng datafusion数据融合
+Raphael datafusionДатаФусион
+
+# --------------------------------------
+# column comparison
+# --------------------------------------
+query TTTTBBBBBB
+select
+    ascii_1, ascii_2, unicode_1, unicode_2,
+    ascii_1 = ascii_2,
+    ascii_1 <> ascii_2,
+    unicode_1 = unicode_2,
+    unicode_1 <> unicode_2,
+    ascii_1 = unicode_1,
+    ascii_1 <> unicode_1
+from test_basic_operator;
+----
+Andrew X datafusion📊🔥 🔥 false true false true false true
+Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true false true false false 
true
+Raphael R datafusionДатаФусион аФус false true false true false true
+NULL R NULL 🔥 NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to StringView scalar comparison
+# --------------------------------------
+query TTBBBB
+select
+    ascii_1, unicode_1,
+    ascii_1 = arrow_cast('Andrew', 'Utf8View'),
+    ascii_1 <> arrow_cast('Andrew', 'Utf8View'),
+    unicode_1 = arrow_cast('datafusion数据融合', 'Utf8View'),
+    unicode_1 <> arrow_cast('datafusion数据融合', 'Utf8View')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to String scalar
+# --------------------------------------
+query TTBBBB
+select
+    ascii_1, unicode_1,
+    ascii_1 = arrow_cast('Andrew', 'Utf8'),
+    ascii_1 <> arrow_cast('Andrew', 'Utf8'),
+    unicode_1 = arrow_cast('datafusion数据融合', 'Utf8'),
+    unicode_1 <> arrow_cast('datafusion数据融合', 'Utf8')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# column to LargeString scalar
+# --------------------------------------
+query TTBBBB
+select
+    ascii_1, unicode_1,
+    ascii_1 = arrow_cast('Andrew', 'LargeUtf8'),
+    ascii_1 <> arrow_cast('Andrew', 'LargeUtf8'),
+    unicode_1 = arrow_cast('datafusion数据融合', 'LargeUtf8'),
+    unicode_1 <> arrow_cast('datafusion数据融合', 'LargeUtf8')
+from test_basic_operator;
+----
+Andrew datafusion📊🔥 true false false true
+Xiangpeng datafusion数据融合 false true true false
+Raphael datafusionДатаФусион false true false true
+NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# substr function
+# --------------------------------------
+
+query TTTTTTTTTTTTTT
+select
+    substr(c1, 1),
+    substr(c1, 3),
+    substr(c1, 100),
+    substr(c1, -1),
+    substr(c1, 0, 0),
+    substr(c1, -1, 2),
+    substr(c1, -2, 10),
+    substr(c1, -100, 200),
+    substr(c1, -10, 10),
+    substr(c1, -100, 10),
+    substr(c1, 1, 100),
+    substr(c1, 5, 3),
+    substr(c1, 100, 200),
+    substr(c1, 8, 0)
+from test_substr;
+----
+foo o (empty) foo (empty) (empty) foo foo (empty) (empty) foo (empty) (empty) 
(empty)
+hello🌏世界 llo🌏世界 (empty) hello🌏世界 (empty) (empty) hello🌏世 hello🌏世界 (empty) 
(empty) hello🌏世界 o🌏世 (empty) (empty)
+💩 (empty) (empty) 💩 (empty) (empty) 💩 💩 (empty) (empty) 💩 (empty) (empty) 
(empty)
+ThisIsAVeryLongASCIIString isIsAVeryLongASCIIString (empty) 
ThisIsAVeryLongASCIIString (empty) (empty) ThisIsA ThisIsAVeryLongASCIIString 
(empty) (empty) ThisIsAVeryLongASCIIString IsA (empty) (empty)
+(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) 
(empty) (empty) (empty) (empty) (empty)
+NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+
+# --------------------------------------
+# test distinct aggregate
+# --------------------------------------
+query II
+SELECT
+    COUNT(DISTINCT ascii_1),
+    COUNT(DISTINCT unicode_1)
+FROM
+    test_basic_operator
+----
+3 3
+
+query II
+SELECT
+    COUNT(DISTINCT ascii_1),
+    COUNT(DISTINCT unicode_1)
+FROM
+    test_basic_operator
+GROUP BY ascii_2;
+----
+1 1
+1 1
+1 1
+
+query II
+SELECT
+    COUNT(DISTINCT ascii_1),
+    COUNT(DISTINCT unicode_1)
+FROM
+    test_basic_operator
+GROUP BY unicode_2;
+----
+1 1
+1 1
+1 1
+
+# --------------------------------------
+# STARTS_WITH function
+# --------------------------------------
+
+query BBBB
+SELECT
+    STARTS_WITH(ascii_1, ascii_2),
+    STARTS_WITH(unicode_1, unicode_2),
+    STARTS_WITH(ascii_1, unicode_2),
+    STARTS_WITH(unicode_1, ascii_2)
+FROM test_basic_operator
+----
+false false false false
+true true false false
+true false false false
+NULL NULL NULL NULL
+
+query BBBB
+SELECT
+    STARTS_WITH(ascii_1, 'And'),
+    STARTS_WITH(ascii_2, 'And'),
+    STARTS_WITH(unicode_1, 'data'),
+    STARTS_WITH(unicode_2, 'data')
+FROM test_basic_operator
+----
+true false true false
+false false true true
+false false true false
+NULL false NULL false
+
+# --------------------------------------
+# Test TRANSLATE
+# --------------------------------------
+
+query T
+SELECT
+  TRANSLATE(ascii_1, 'foo', 'bar') as c
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+query T
+SELECT
+  TRANSLATE(unicode_1, 'foo', 'bar') as c
+FROM test_basic_operator;
+----
+databusirn📊🔥
+databusirn数据融合
+databusirnДатаФусион
+NULL
+
+# --------------------------------------
+# Test REGEXP_REPLACE
+# --------------------------------------
+
+# Should run REGEXP_REPLACE with Scalar value for string
+query T
+SELECT
+  REGEXP_REPLACE(ascii_1, 'e', 'f') AS k
+FROM test_basic_operator;
+----
+Andrfw
+Xiangpfng
+Raphafl
+NULL
+
+# Should run REGEXP_REPLACE with Scalar value for string with flag
+query T
+SELECT
+  REGEXP_REPLACE(ascii_1, 'e', 'f', 'i') AS k
+FROM test_basic_operator;
+----
+Andrfw
+Xiangpfng
+Raphafl
+NULL
+
+# Should run REGEXP_REPLACE with ScalarArray value for string
+query T
+SELECT
+  REGEXP_REPLACE(ascii_1, lower(ascii_1), 'bar') AS k
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# Should run REGEXP_REPLACE with ScalarArray value for string with flag
+query T
+SELECT
+  REGEXP_REPLACE(ascii_1, lower(ascii_1), 'bar', 'g') AS k
+FROM test_basic_operator;
+----
+Andrew
+Xiangpeng
+Raphael
+NULL
+
+# --------------------------------------
+# Test Initcap
+# --------------------------------------
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+  lower(ascii_1) as ascii_1_lower,
+  lower(unicode_1) as unicode_1_lower
+FROM test_basic_operator;
+
+query TT
+SELECT
+  INITCAP(ascii_1_lower) as c1,
+  INITCAP(unicode_1_lower) as c2
+FROM test_lowercase;
+----
+Andrew Datafusion📊🔥
+Xiangpeng Datafusion数据融合
+Raphael Datafusionдатафусион
+NULL NULL
+
+statement ok
+drop table test_lowercase;
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt 
b/datafusion/sqllogictest/test_files/string/string_view.slt
new file mode 100644
index 0000000000..ca08f647d2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -0,0 +1,356 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include ./init_data.slt.part
+
+# --------------------------------------
+# Setup test tables with different physical string types
+# and repeat tests in `string_query.slt.part`
+# --------------------------------------
+statement ok
+create table test_basic_operator as
+select
+    arrow_cast(column1, 'Utf8View') as ascii_1,
+    arrow_cast(column2, 'Utf8View') as ascii_2,
+    arrow_cast(column3, 'Utf8View') as unicode_1,
+    arrow_cast(column4, 'Utf8View') as unicode_2
+from test_source;
+
+statement ok
+create table test_substr as
+select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
+
+statement ok
+drop table test_source
+
+#
+# common test for string-like functions and operators
+#
+include ./string_query.slt.part
+
+#
+# Clean up
+#
+statement ok
+drop table test_basic_operator;
+
+statement ok
+drop table test_substr_base;
+
+
+# --------------------------------------
+# String_view specific tests
+# --------------------------------------
+statement ok
+create table test_source as values
+  ('Andrew', 'X'),
+  ('Xiangpeng', 'Xiangpeng'),
+  ('Raphael', 'R'),
+  (NULL, 'R');
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+  arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+  arrow_cast(column1, 'Utf8View') as column1_utf8view,
+  arrow_cast(column2, 'Utf8View') as column2_utf8view,
+  arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## StringView to Other Types column
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_utf8,
+  column2_utf8      = column1_utf8view,
+  column1_utf8view <> column2_utf8,
+  column2_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test StringViewArray with LargeUtf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_large_utf8,
+  column2_large_utf8      = column1_utf8view,
+  column1_utf8view <> column2_large_utf8,
+  column2_large_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_dict,
+  column2_dict      = column1_utf8view,
+  column1_utf8view <> column2_dict,
+  column2_dict     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_dict                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_dict,
+  column1_dict                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## Coercion Rules
+########
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 = 
arrow_cast('Andrew', 'Utf8View');
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = 
column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 
'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string 
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+  COUNT(DISTINCT column1_utf8),
+  COUNT(DISTINCT column1_utf8view),
+  COUNT(DISTINCT column1_dict)
+FROM test;
+----
+logical_plan
+01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8), 
count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
+02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
+
+
+### `STARTS_WITH`
+
+# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
+# (should be no casts)
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS 
c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS 
c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS 
Utf8View)) AS c3
+02)--TableScan: test projection=[column2_utf8, column2_large_utf8, 
column1_utf8view, column2_utf8view]
+
+query BBB
+SELECT
+  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
+  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
+# Should work, but will have to cast to common types
+# should cast utf8 -> utf8view and largeutf8 -> utf8view
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8, column2_utf8) as c3,
+  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1, 
starts_with(test.column1_utf8, test.column2_utf8) AS c3, 
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
+02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1, 
test.column1_utf8, test.column2_utf8, test.column2_large_utf8, 
test.column2_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column2_large_utf8, column2_utf8view]
+
+query BBB
+ SELECT
+  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
+  STARTS_WITH(column1_utf8, column2_utf8) as c3,
+  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
+FROM test;
+----
+false false false
+true true true
+true true true
+NULL NULL NULL
+
+
+# Test STARTS_WITH with utf8view against literals
+# In this case, the literals should be cast to utf8view. The columns
+# should not be cast to utf8.
+query TT
+EXPLAIN SELECT
+  STARTS_WITH(column1_utf8view, 'äöüß') as c1,
+  STARTS_WITH(column1_utf8view, '') as c2,
+  STARTS_WITH(column1_utf8view, NULL) as c3,
+  STARTS_WITH(NULL, column1_utf8view) as c4
+FROM test;
+----
+logical_plan
+01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, 
starts_with(test.column1_utf8view, Utf8View("")) AS c2, 
starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, 
starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
+02)--TableScan: test projection=[column1_utf8view]
+
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view) as c
+FROM test;
+----
+logical_plan
+01)Projection: initcap(test.column1_utf8view) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+
+# Create a table with lowercase strings
+statement ok
+CREATE TABLE test_lowercase AS SELECT
+  lower(column1_utf8) as column1_utf8_lower,
+  lower(column1_large_utf8) as column1_large_utf8_lower,
+  lower(column1_utf8view) as column1_utf8view_lower
+FROM test;
+
+# Test INITCAP with utf8view, utf8, and largeutf8
+# Should not cast anything
+query TT
+EXPLAIN SELECT
+  INITCAP(column1_utf8view_lower) as c1,
+  INITCAP(column1_utf8_lower) as c2,
+  INITCAP(column1_large_utf8_lower) as c3
+FROM test_lowercase;
+----
+logical_plan
+01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1, 
initcap(test_lowercase.column1_utf8_lower) AS c2, 
initcap(test_lowercase.column1_large_utf8_lower) AS c3
+02)--TableScan: test_lowercase projection=[column1_utf8_lower, 
column1_large_utf8_lower, column1_utf8view_lower]
+
+statement ok
+drop table test_lowercase
+
+statement ok
+drop table test
diff --git 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
 
b/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
deleted file mode 100644
index 0d8e62af84..0000000000
--- 
a/datafusion/sqllogictest/test_files/string_functions/substr/substr_table.slt.part
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This file is intended to be run with tables already defined
-# with standard values, but different types in string columns
-# (String, StringView, etc.)
-
-query TTTTTTTTTTTTTT
-select 
-    substr(c1, 1),
-    substr(c1, 3),
-    substr(c1, 100),
-    substr(c1, -1),
-    substr(c1, 0, 0),
-    substr(c1, -1, 2),
-    substr(c1, -2, 10),
-    substr(c1, -100, 200),
-    substr(c1, -10, 10),
-    substr(c1, -100, 10),
-    substr(c1, 1, 100),
-    substr(c1, 5, 3),
-    substr(c1, 100, 200),
-    substr(c1, 8, 0)
-from test_substr;
-----
-foo o (empty) foo (empty) (empty) foo foo (empty) (empty) foo (empty) (empty) 
(empty)
-hello🌏世界 llo🌏世界 (empty) hello🌏世界 (empty) (empty) hello🌏世 hello🌏世界 (empty) 
(empty) hello🌏世界 o🌏世 (empty) (empty)
-💩 (empty) (empty) 💩 (empty) (empty) 💩 💩 (empty) (empty) 💩 (empty) (empty) 
(empty)
-ThisIsAVeryLongASCIIString isIsAVeryLongASCIIString (empty) 
ThisIsAVeryLongASCIIString (empty) (empty) ThisIsA ThisIsAVeryLongASCIIString 
(empty) (empty) ThisIsAVeryLongASCIIString IsA (empty) (empty)
-(empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) (empty) 
(empty) (empty) (empty) (empty) (empty)
-NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index c3b5fa8fc4..4e99d8cfa6 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -43,556 +43,9 @@ FROM test_source;
 statement ok
 drop table test_source
 
-########
-## StringView to StringView
-########
-
-# StringView scalar to StringView scalar
-
-query BBBB
-select
-  arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
-----
-false true true true
-
-
-# StringView column to StringView column comparison as filters
-
-query TT
-select column1_utf8, column2_utf8 from test where column1_utf8view = 
column2_utf8view;
-----
-Xiangpeng Xiangpeng
-
-query TT
-select column1_utf8, column2_utf8 from test where column1_utf8view <> 
column2_utf8view;
-----
-Andrew X
-Raphael R
-
-# StringView column to StringView column
-query TTBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view = column2_utf8view,
-  column1_utf8view <> column2_utf8view
-from test;
-----
-Andrew X false true
-Xiangpeng Xiangpeng true false
-Raphael R false true
-NULL R NULL NULL
-
-# StringView column to StringView scalar comparison
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view                 = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
-  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-########
-## StringView to String
-########
-
-# test StringViewArray with Utf8 columns
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view  = column2_utf8,
-  column2_utf8      = column1_utf8view,
-  column1_utf8view <> column2_utf8,
-  column2_utf8     <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# test StringViewArray with LargeUtf8 columns
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view  = column2_large_utf8,
-  column2_large_utf8      = column1_utf8view,
-  column1_utf8view <> column2_large_utf8,
-  column2_large_utf8     <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-
-# StringView column to String scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view                 = arrow_cast('Andrew', 'Utf8'),
-  arrow_cast('Andrew', 'Utf8')     = column1_utf8view,
-  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8'),
-  arrow_cast('Andrew', 'Utf8')     <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# StringView column to LargeString scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view                 = arrow_cast('Andrew', 'LargeUtf8'),
-  arrow_cast('Andrew', 'LargeUtf8')     = column1_utf8view,
-  column1_utf8view                 <> arrow_cast('Andrew', 'LargeUtf8'),
-  arrow_cast('Andrew', 'LargeUtf8')     <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# String column to StringView scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8                     = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') = column1_utf8,
-  column1_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') <> column1_utf8
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# LargeString column to StringView scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_large_utf8                     = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') = column1_large_utf8,
-  column1_large_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-########
-## StringView to Dictionary
-########
-
-# test StringViewArray with Dictionary columns
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view  = column2_dict,
-  column2_dict      = column1_utf8view,
-  column1_utf8view <> column2_dict,
-  column2_dict     <> column1_utf8view
-from test;
-----
-Andrew X false false true true
-Xiangpeng Xiangpeng true true false false
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# StringView column to Dict scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_utf8view                 = arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
-  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     = column1_utf8view,
-  column1_utf8view                 <> arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
-  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     <> column1_utf8view
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-# Dict column to StringView scalar
-query TTBBBB
-select
-  column1_utf8, column2_utf8,
-  column1_dict                     = arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') = column1_dict,
-  column1_dict                    <> arrow_cast('Andrew', 'Utf8View'),
-  arrow_cast('Andrew', 'Utf8View') <> column1_dict
-from test;
-----
-Andrew X true true false false
-Xiangpeng Xiangpeng false false true true
-Raphael R false false true true
-NULL R NULL NULL NULL NULL
-
-
-########
-## Coercion Rules
-########
-
-
 statement ok
 set datafusion.explain.logical_plan_only = true;
 
-
-# Filter should have a StringView literal and no column cast
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-# reverse order should be the same
-query TT
-explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where column1_utf8 = 
arrow_cast('Andrew', 'Utf8View');
-----
-logical_plan
-01)Filter: test.column1_utf8 = Utf8("Andrew")
-02)--TableScan: test projection=[column1_utf8]
-
-query TT
-explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = 
column1_utf8;
-----
-logical_plan
-01)Filter: test.column1_utf8 = Utf8("Andrew")
-02)--TableScan: test projection=[column1_utf8]
-
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view = 
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where arrow_cast('Andrew', 
'Dictionary(Int32, Utf8)') = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = Utf8View("Andrew")
-03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-
-# compare string / stringview
-# Should cast string -> stringview (which is cheap), not stringview -> string 
(which is not)
-query TT
-explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
-03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
-
-query TT
-explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
-----
-logical_plan
-01)Projection: test.column1_utf8
-02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
-
-## Test distinct aggregates
-query III
-SELECT
-  COUNT(DISTINCT column1_utf8),
-  COUNT(DISTINCT column1_utf8view),
-  COUNT(DISTINCT column1_dict)
-FROM test;
-----
-3 3 3
-
-query III
-SELECT
-  COUNT(DISTINCT column1_utf8),
-  COUNT(DISTINCT column1_utf8view),
-  COUNT(DISTINCT column1_dict)
-FROM test
-GROUP BY column2_utf8view;
-----
-1 1 1
-1 1 1
-1 1 1
-
-
-query TT
-EXPLAIN SELECT
-  COUNT(DISTINCT column1_utf8),
-  COUNT(DISTINCT column1_utf8view),
-  COUNT(DISTINCT column1_dict)
-FROM test;
-----
-logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT test.column1_utf8), 
count(DISTINCT test.column1_utf8view), count(DISTINCT test.column1_dict)]]
-02)--TableScan: test projection=[column1_utf8, column1_utf8view, column1_dict]
-
-### `STARTS_WITH`
-
-# Test STARTS_WITH with utf8view against utf8view, utf8, and largeutf8
-# (should be no casts)
-query TT
-EXPLAIN SELECT
-  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
-  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
-  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(test.column1_utf8view, test.column2_utf8view) AS 
c1, starts_with(test.column1_utf8view, CAST(test.column2_utf8 AS Utf8View)) AS 
c2, starts_with(test.column1_utf8view, CAST(test.column2_large_utf8 AS 
Utf8View)) AS c3
-02)--TableScan: test projection=[column2_utf8, column2_large_utf8, 
column1_utf8view, column2_utf8view]
-
-query BBB
-SELECT
-  STARTS_WITH(column1_utf8view, column2_utf8view) as c1,
-  STARTS_WITH(column1_utf8view, column2_utf8) as c2,
-  STARTS_WITH(column1_utf8view, column2_large_utf8) as c3
-FROM test;
-----
-false false false
-true true true
-true true true
-NULL NULL NULL
-
-# Test STARTS_WITH with utf8 against utf8view, utf8, and largeutf8
-# Should work, but will have to cast to common types
-# should cast utf8 -> utf8view and largeutf8 -> utf8view
-query TT
-EXPLAIN SELECT
-  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
-  STARTS_WITH(column1_utf8, column2_utf8) as c3,
-  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(__common_expr_1, test.column2_utf8view) AS c1, 
starts_with(test.column1_utf8, test.column2_utf8) AS c3, 
starts_with(__common_expr_1, CAST(test.column2_large_utf8 AS Utf8View)) AS c4
-02)--Projection: CAST(test.column1_utf8 AS Utf8View) AS __common_expr_1, 
test.column1_utf8, test.column2_utf8, test.column2_large_utf8, 
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column2_large_utf8, column2_utf8view]
-
-query BBB
- SELECT
-  STARTS_WITH(column1_utf8, column2_utf8view) as c1,
-  STARTS_WITH(column1_utf8, column2_utf8) as c3,
-  STARTS_WITH(column1_utf8, column2_large_utf8) as c4
-FROM test;
-----
-false false false
-true true true
-true true true
-NULL NULL NULL
-
-
-# Test STARTS_WITH with utf8view against literals
-# In this case, the literals should be cast to utf8view. The columns
-# should not be cast to utf8.
-query TT
-EXPLAIN SELECT
-  STARTS_WITH(column1_utf8view, 'äöüß') as c1,
-  STARTS_WITH(column1_utf8view, '') as c2,
-  STARTS_WITH(column1_utf8view, NULL) as c3,
-  STARTS_WITH(NULL, column1_utf8view) as c4
-FROM test;
-----
-logical_plan
-01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, 
starts_with(test.column1_utf8view, Utf8View("")) AS c2, 
starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, 
starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
-02)--TableScan: test projection=[column1_utf8view]
-
-### Test TRANSLATE
-
-# Should run TRANSLATE using utf8view column successfully
-query T
-SELECT
-  TRANSLATE(column1_utf8view, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run TRANSLATE using utf8 column successfully
-query T
-SELECT
-  TRANSLATE(column1_utf8, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run TRANSLATE using large_utf8 column successfully
-query T
-SELECT
-  TRANSLATE(column1_large_utf8, 'foo', 'bar') as c
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-### Test REGEXP_REPLACE
-
-# Should run REGEXP_REPLACE with Scalar value for utf8view
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8view, 'e', 'f') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8view with flag
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8view, 'e', 'f', 'i') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8, 'e', 'f') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with Scalar value for utf8 with flag
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8, 'e', 'f', 'i') AS k
-FROM test;
-----
-Andrfw
-Xiangpfng
-Raphafl
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8view
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8view, lower(column1_utf8view), 'bar') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8view with flag
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8view, lower(column1_utf8view), 'bar', 'g') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8, lower(column1_utf8), 'bar') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-# Should run REGEXP_REPLACE with ScalarArray value for utf8 with flag
-query T
-SELECT
-  REGEXP_REPLACE(column1_utf8, lower(column1_utf8), 'bar', 'g') AS k
-FROM test;
-----
-Andrew
-Xiangpeng
-Raphael
-NULL
-
-### Initcap
-
-query TT
-EXPLAIN SELECT
-  INITCAP(column1_utf8view) as c
-FROM test;
-----
-logical_plan
-01)Projection: initcap(test.column1_utf8view) AS c
-02)--TableScan: test projection=[column1_utf8view]
-
-# Create a table with lowercase strings
-statement ok
-CREATE TABLE test_lowercase AS SELECT
-  lower(column1_utf8) as column1_utf8_lower,
-  lower(column1_large_utf8) as column1_large_utf8_lower,
-  lower(column1_utf8view) as column1_utf8view_lower
-FROM test;
-
-# Test INITCAP with utf8view, utf8, and largeutf8
-# Should not cast anything
-query TT
-EXPLAIN SELECT
-  INITCAP(column1_utf8view_lower) as c1,
-  INITCAP(column1_utf8_lower) as c2,
-  INITCAP(column1_large_utf8_lower) as c3
-FROM test_lowercase;
-----
-logical_plan
-01)Projection: initcap(test_lowercase.column1_utf8view_lower) AS c1, 
initcap(test_lowercase.column1_utf8_lower) AS c2, 
initcap(test_lowercase.column1_large_utf8_lower) AS c3
-02)--TableScan: test_lowercase projection=[column1_utf8_lower, 
column1_large_utf8_lower, column1_utf8view_lower]
-
-query TTT
-SELECT
-  INITCAP(column1_utf8view_lower) as c1,
-  INITCAP(column1_utf8_lower) as c2,
-  INITCAP(column1_large_utf8_lower) as c3
-FROM test_lowercase;
-----
-Andrew Andrew Andrew
-Xiangpeng Xiangpeng Xiangpeng
-Raphael Raphael Raphael
-NULL NULL NULL
-
 # Ensure string functions use native StringView implementation
 # and do not fall back to Utf8 or LargeUtf8
 # Should see no casts to Utf8 in the plans below


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to