This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7a5354fe59 fix(functions): support `Dictionary` for string and int 
functions (#7262)
7a5354fe59 is described below

commit 7a5354fe5908b8ac7db163d6c484dbf1d85a142e
Author: Chunchun Ye <[email protected]>
AuthorDate: Fri Aug 11 15:21:48 2023 -0500

    fix(functions): support `Dictionary` for string and int functions (#7262)
    
    * fix(functions): support `Dictionary` type for string functions and int 
functions
    
    * chore: add tests
    
    chore: add more test
---
 .../tests/sqllogictests/test_files/functions.slt   | 201 +++++++++++++++++++++
 datafusion/expr/src/built_in_function.rs           |  14 ++
 2 files changed, 215 insertions(+)

diff --git a/datafusion/core/tests/sqllogictests/test_files/functions.slt 
b/datafusion/core/tests/sqllogictests/test_files/functions.slt
index 301d73befb..f8dbf8a00d 100644
--- a/datafusion/core/tests/sqllogictests/test_files/functions.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/functions.slt
@@ -63,6 +63,11 @@ SELECT left('abcde', -2)
 ----
 abc
 
+query T
+SELECT left(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'), -2)
+----
+abc
+
 query T
 SELECT left('abcde', -200)
 ----
@@ -103,6 +108,11 @@ SELECT length('')
 ----
 0
 
+query I
+SELECT length(arrow_cast('', 'Dictionary(Int32, Utf8)'))
+----
+0
+
 query I
 SELECT length('chars')
 ----
@@ -113,6 +123,11 @@ SELECT length('josé')
 ----
 4
 
+query I
+SELECT length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
+----
+4
+
 query ?
 SELECT length(NULL)
 ----
@@ -158,6 +173,11 @@ SELECT lpad('hi', 5)
 ----
    hi
 
+query T
+SELECT lpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5)
+----
+   hi
+
 query T
 SELECT lpad('hi', CAST(NULL AS INT), 'xy')
 ----
@@ -188,6 +208,11 @@ SELECT reverse('abcde')
 ----
 edcba
 
+query T
+SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'))
+----
+edcba
+
 query T
 SELECT reverse('loẅks')
 ----
@@ -203,6 +228,11 @@ SELECT right('abcde', -2)
 ----
 cde
 
+query T
+SELECT right(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'), 1)
+----
+e
+
 query T
 SELECT right('abcde', -200)
 ----
@@ -268,6 +298,11 @@ SELECT rpad('hi', 5, 'xy')
 ----
 hixyx
 
+query T
+SELECT rpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5, 'xy')
+----
+hixyx
+
 query T
 SELECT rpad('hi', 5, NULL)
 ----
@@ -383,6 +418,11 @@ SELECT translate('12345', '143', 'ax')
 ----
 a2x5
 
+query T
+SELECT translate(arrow_cast('12345', 'Dictionary(Int32, Utf8)'), '143', 'ax')
+----
+a2x5
+
 query ?
 SELECT translate(NULL, '143', 'ax')
 ----
@@ -565,3 +605,164 @@ SELECT 
sqrt(column1),sqrt(column2),sqrt(column3),sqrt(column4),sqrt(column5),sqr
 
 statement ok
 drop table t
+
+
+query T
+SELECT upper('foo')
+----
+FOO
+
+query T
+select upper(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+FOO
+
+query T
+SELECT btrim('   foo  ')
+----
+foo
+
+query T
+SELECT btrim(arrow_cast('   foo  ', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query T
+SELECT initcap('foo')
+----
+Foo
+
+query T
+SELECT initcap(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+Foo
+
+query T
+SELECT lower('FOObar')
+----
+foobar
+
+query T
+SELECT lower(arrow_cast('FOObar', 'Dictionary(Int32, Utf8)'))
+----
+foobar
+
+query T
+SELECT ltrim('   foo')
+----
+foo
+
+query T
+SELECT ltrim(arrow_cast('    foo', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query T
+SELECT md5('foo')
+----
+acbd18db4cc2f85cedef654fccc4a4d8
+
+query T
+SELECT md5(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+acbd18db4cc2f85cedef654fccc4a4d8
+
+query T
+SELECT regexp_replace('foobar', 'bar', 'xx', 'gi')
+----
+fooxx
+
+query T
+SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 
'xx', 'gi')
+----
+fooxx
+
+query T
+SELECT repeat('foo', 3)
+----
+foofoofoo
+
+query T
+SELECT repeat(arrow_cast('foo', 'Dictionary(Int32, Utf8)'), 3)
+----
+foofoofoo
+
+query T
+SELECT replace('foobar', 'bar', 'hello')
+----
+foohello
+
+query T
+SELECT replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'hello')
+----
+foohello
+
+query T
+SELECT rtrim(' foo  ')
+----
+ foo
+
+query T
+SELECT rtrim(arrow_cast(' foo  ', 'Dictionary(Int32, Utf8)'))
+----
+ foo
+
+query T
+SELECT split_part('foo_bar', '_', 2)
+----
+bar
+
+query T
+SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2)
+----
+bar
+
+query T
+SELECT trim('  foo  ')
+----
+foo
+
+query T
+SELECT trim(arrow_cast('  foo  ', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query I
+SELECT bit_length('foo')
+----
+24
+
+query I
+SELECT bit_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+24
+
+query I
+SELECT character_length('foo')
+----
+3
+
+query I
+SELECT character_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+3
+
+query I
+SELECT octet_length('foo')
+----
+3
+
+query I
+SELECT octet_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+3
+
+query I
+SELECT strpos('helloworld', 'world')
+----
+6
+
+query I
+SELECT strpos(arrow_cast('helloworld', 'Dictionary(Int32, Utf8)'), 'world')
+----
+6
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index cf609135ae..2ad06b873b 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -1397,6 +1397,20 @@ macro_rules! make_utf8_to_return_type {
                 DataType::LargeUtf8 => $largeUtf8Type,
                 DataType::Utf8 => $utf8Type,
                 DataType::Null => DataType::Null,
+                DataType::Dictionary(_, value_type) => {
+                    match **value_type {
+                        DataType::LargeUtf8 => $largeUtf8Type,
+                        DataType::Utf8 => $utf8Type,
+                        DataType::Null => DataType::Null,
+                        _ => {
+                            // this error is internal as `data_types` should 
have captured this.
+                            return Err(DataFusionError::Internal(format!(
+                                "The {:?} function can only accept strings.",
+                                name
+                            )));
+                        }
+                    }
+                }
                 _ => {
                     // this error is internal as `data_types` should have 
captured this.
                     return Err(DataFusionError::Internal(format!(

Reply via email to