This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch string-view
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/string-view by this push:
new 5b4c365e11 Add more StringView comparison test coverage (#10997)
5b4c365e11 is described below
commit 5b4c365e11f23fdd6f3aaee9c336bdfd8baa30f8
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jun 19 10:28:59 2024 -0400
Add more StringView comparison test coverage (#10997)
* Add more StringView comparison test coverage
* add reference
* Add another test showing casting on columns works correctly
---
datafusion/sqllogictest/test_files/string_view.slt | 270 ++++++++++++++++-----
1 file changed, 211 insertions(+), 59 deletions(-)
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
index 3be3c94770..f8824b23d1 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -15,99 +15,251 @@
# specific language governing permissions and limitations
# under the License.
+########
+## Test setup
+########
-# test StringViewArray with Utf8View columns
statement ok
-create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X',
'Utf8View')),
- (arrow_cast('Xiangpeng', 'Utf8View'),
arrow_cast('Xiangpeng', 'Utf8View')),
- (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R',
'Utf8View')),
- (arrow_cast(NULL, 'Utf8View'), arrow_cast('R',
'Utf8View'));
+create table test_source as values
+ ('Andrew', 'X'),
+ ('Xiangpeng', 'Xiangpeng'),
+ ('Raphael', 'R'),
+ (NULL, 'R')
+;
-query B
-select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
-----
-false
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+ arrow_cast(column1, 'Utf8') as column1_utf8,
+ arrow_cast(column2, 'Utf8') as column2_utf8,
+ arrow_cast(column1, 'Utf8View') as column1_utf8view,
+ arrow_cast(column2, 'Utf8View') as column2_utf8view,
+ arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+ arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
-query B
-select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
-----
-true
+statement ok
+drop table test_source
-query B
-select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
-----
-true
+########
+## StringView to StringView
+########
-query B
-select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+ arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
----
-true
+false true true true
+
+
+# StringView column to StringView column comparison as filters
-query ??
-select * from test where column1 = column2;
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view =
column2_utf8view;
----
Xiangpeng Xiangpeng
-query ??
-select * from test where column1 <> column2;
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view <>
column2_utf8view;
----
Andrew X
Raphael R
-query ??
-select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+# StringView column to StringView column
+query TTBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_utf8view,
+ column1_utf8view <> column2_utf8view
+from test;
----
-Andrew X
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
-query ??
-select * from test where column1 = 'Andrew';
+# StringView column to StringView scalar comparison
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
+from test;
----
-Andrew X
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
-query ??
-select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+########
+## StringView to String
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_utf8,
+ column2_utf8 = column1_utf8view,
+ column1_utf8view <> column2_utf8,
+ column2_utf8 <> column1_utf8view
+from test;
----
-Xiangpeng Xiangpeng
-Raphael R
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
-query ??
-select * from test where column1 <> 'Andrew';
+# StringView column to String scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Utf8'),
+ arrow_cast('Andrew', 'Utf8') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Utf8'),
+ arrow_cast('Andrew', 'Utf8') <> column1_utf8view
+from test;
----
-Xiangpeng Xiangpeng
-Raphael R
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
-statement ok
-drop table test;
+# String column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8 = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_utf8,
+ column1_utf8 <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_dict,
+ column2_dict = column1_utf8view,
+ column1_utf8view <> column2_dict,
+ column2_dict <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_dict = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_dict,
+ column1_dict <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## Coercion Rules
+########
-# test StringViewArray with Utf8 and Utf8View columns
statement ok
-create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
- ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
- ('Raphael', arrow_cast('R', 'Utf8View')),
- (NULL, arrow_cast('R', 'Utf8View'));
+set datafusion.explain.logical_plan_only = true;
-query T?
-select * from test where column1 = column2;
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
----
-Xiangpeng Xiangpeng
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-query T?
-select * from test where column1 <> column2;
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
----
-Andrew X
-Raphael R
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
-query T?
-select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+# should not be casting the column:
https://github.com/apache/datafusion/issues/10998
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 =
arrow_cast('Andrew', 'Utf8View');
----
-Andrew X
+logical_plan
+01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew")
+02)--TableScan: test projection=[column1_utf8]
-query T?
-select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view =
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
----
-Xiangpeng Xiangpeng
-Raphael R
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
statement ok
drop table test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]