This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 60d1d3a702 Cast `Utf8View` to `Utf8` to support `||` from
`StringViewArray` (#11796)
60d1d3a702 is described below
commit 60d1d3a702be9c95f14087337c693cd678d01dea
Author: Dharan Aditya <[email protected]>
AuthorDate: Thu Aug 8 03:35:57 2024 +0530
Cast `Utf8View` to `Utf8` to support `||` from `StringViewArray` (#11796)
* make query work
* hack string_concat_coercion
* more tests
---
datafusion/expr/src/type_coercion/binary.rs | 23 ++++++----
datafusion/sqllogictest/test_files/string_view.slt | 53 ++++++++++++++++++++++
2 files changed, 68 insertions(+), 8 deletions(-)
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 17280289ed..8da33081d6 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -890,15 +890,22 @@ fn dictionary_coercion(
/// 2. Data type of the other side should be able to cast to string type
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
- string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
- (Utf8, from_type) | (from_type, Utf8) => {
- string_concat_internal_coercion(from_type, &Utf8)
- }
- (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
- string_concat_internal_coercion(from_type, &LargeUtf8)
+ match (lhs_type, rhs_type) {
+ // If Utf8View is in any side, we coerce to Utf8.
+ // Ref: https://github.com/apache/datafusion/pull/11796
+ (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View)
=> {
+ Some(Utf8)
}
- _ => None,
- })
+ _ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type)
{
+ (Utf8, from_type) | (from_type, Utf8) => {
+ string_concat_internal_coercion(from_type, &Utf8)
+ }
+ (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
+ string_concat_internal_coercion(from_type, &LargeUtf8)
+ }
+ _ => None,
+ }),
+ }
}
fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
index 584d3b3306..4d3f72b1e8 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -447,3 +447,56 @@ select t.dt from dates t where arrow_cast('2024-01-01',
'Utf8View') < t.dt;
statement ok
drop table dates;
+
+statement ok
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool',
'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
+
+
+query T
+select column2 || ' is ' || column3 from temp;
+----
+rust is fast
+datafusion is cool
+
+query TT
+explain select column2 || 'is' || column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3
AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+
+query TT
+explain select column2||' is fast' from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
+02)--TableScan: temp projection=[column2]
+
+
+query T
+select column2||column3 from temp;
+----
+rustfast
+datafusioncool
+
+query TT
+explain select column2||column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+query T
+select column2|| ' ' ||column3 from temp;
+----
+rust fast
+datafusion cool
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]