This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 663ab767b5 Fix Regex signature types (#12690)
663ab767b5 is described below
commit 663ab767b5d811256b568102c8fcdda62b82267b
Author: Dmitrii Blaginin <[email protected]>
AuthorDate: Wed Oct 2 10:59:39 2024 +0100
Fix Regex signature types (#12690)
* Fix Regex signature types
* Uncomment the shared tests in string_query.slt.part and removed tests
copies everywhere else
* Test `LIKE` and `MATCH` with flags; Remove new tests from regexp.slt
---
datafusion/functions/src/regex/regexplike.rs | 4 +-
datafusion/functions/src/regex/regexpmatch.rs | 6 +-
.../test_files/string/dictionary_utf8.slt | 30 ----------
.../sqllogictest/test_files/string/string.slt | 30 ----------
.../test_files/string/string_query.slt.part | 64 ++++++++++++----------
.../sqllogictest/test_files/string/string_view.slt | 30 ----------
6 files changed, 41 insertions(+), 123 deletions(-)
diff --git a/datafusion/functions/src/regex/regexplike.rs
b/datafusion/functions/src/regex/regexplike.rs
index 20029ba005..8cd26a824a 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -48,9 +48,9 @@ impl RegexpLikeFunc {
signature: Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8]),
Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
],
Volatility::Immutable,
),
diff --git a/datafusion/functions/src/regex/regexpmatch.rs
b/datafusion/functions/src/regex/regexpmatch.rs
index bf40eff11d..498b591620 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -54,9 +54,9 @@ impl RegexpMatchFunc {
// If that fails, it proceeds to `(LargeUtf8, Utf8)`.
// TODO: Native support Utf8View for regexp_match.
Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8]),
Exact(vec![Utf8, Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8, Utf8]),
+ Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
],
Volatility::Immutable,
),
@@ -131,7 +131,7 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
let flags = as_generic_string_array::<T>(&args[2])?;
if flags.iter().any(|s| s == Some("g")) {
- return plan_err!("regexp_match() does not support the
\"global\" option")
+ return plan_err!("regexp_match() does not support the
\"global\" option");
}
regexp::regexp_match(values, regex, Some(flags))
diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
index ea3c9b8eb6..c181f613ee 100644
--- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
+++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
@@ -53,36 +53,6 @@ Xiangpeng datafusion数据融合 false true false true
Raphael datafusionДатаФусион false false false false
NULL NULL NULL NULL NULL NULL
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
- REGEXP_LIKE(ascii_1, 'an'),
- REGEXP_LIKE(unicode_1, 'таФ'),
- REGEXP_LIKE(ascii_1, NULL),
- REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
- REGEXP_MATCH(ascii_1, 'an'),
- REGEXP_MATCH(unicode_1, 'таФ'),
- REGEXP_MATCH(ascii_1, NULL),
- REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
#
# common test for string-like functions and operators
#
diff --git a/datafusion/sqllogictest/test_files/string/string.slt
b/datafusion/sqllogictest/test_files/string/string.slt
index 6b89147c5c..f4e83966f7 100644
--- a/datafusion/sqllogictest/test_files/string/string.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -63,36 +63,6 @@ Xiangpeng datafusion数据融合 false true false true
Raphael datafusionДатаФусион false false false false
NULL NULL NULL NULL NULL NULL
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
- REGEXP_LIKE(ascii_1, 'an'),
- REGEXP_LIKE(unicode_1, 'таФ'),
- REGEXP_LIKE(ascii_1, NULL),
- REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
- REGEXP_MATCH(ascii_1, 'an'),
- REGEXP_MATCH(unicode_1, 'таФ'),
- REGEXP_MATCH(ascii_1, NULL),
- REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
# TODO: move it back to `string_query.slt.part` after fixing the issue
# see detail: https://github.com/apache/datafusion/issues/12670
query IIIIII
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part
b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index 0af0a6a642..3ba2b31bba 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -856,39 +856,47 @@ NULL NULL
# Test REGEXP_LIKE
# --------------------------------------
-# TODO: LargeString does not support REGEXP_LIKE. Enable this after fixing the
issue
-# see issue: https://github.com/apache/datafusion/issues/12664
-#query BBBB
-#SELECT
-# REGEXP_LIKE(ascii_1, 'an'),
-# REGEXP_LIKE(unicode_1, 'таФ'),
-# REGEXP_LIKE(ascii_1, NULL),
-# REGEXP_LIKE(unicode_1, NULL)
-#FROM test_basic_operator;
-#----
-#false false NULL NULL
-#true false NULL NULL
-#false true NULL NULL
-#NULL NULL NULL NULL
+query BBBBBBBB
+SELECT
+ -- without flags
+ REGEXP_LIKE(ascii_1, 'an'),
+ REGEXP_LIKE(unicode_1, 'таФ'),
+ REGEXP_LIKE(ascii_1, NULL),
+ REGEXP_LIKE(unicode_1, NULL),
+ -- with flags
+ REGEXP_LIKE(ascii_1, 'AN', 'i'),
+ REGEXP_LIKE(unicode_1, 'ТаФ', 'i'),
+ REGEXP_LIKE(ascii_1, NULL, 'i'),
+ REGEXP_LIKE(unicode_1, NULL, 'i')
+ FROM test_basic_operator;
+----
+false false NULL NULL true false NULL NULL
+true false NULL NULL true false NULL NULL
+false true NULL NULL false true NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
# --------------------------------------
# Test REGEXP_MATCH
# --------------------------------------
-# TODO: LargeString does not support REGEXP_MATCH. Enable this after fixing
the issue
-# see issue: https://github.com/apache/datafusion/issues/12664
-#query ????
-#SELECT
-# REGEXP_MATCH(ascii_1, 'an'),
-# REGEXP_MATCH(unicode_1, 'таФ'),
-# REGEXP_MATCH(ascii_1, NULL),
-# REGEXP_MATCH(unicode_1, NULL)
-#FROM test_basic_operator;
-#----
-#NULL NULL NULL NULL
-#[an] NULL NULL NULL
-#NULL [таФ] NULL NULL
-#NULL NULL NULL NULL
+query ????????
+SELECT
+ -- without flags
+ REGEXP_MATCH(ascii_1, 'an'),
+ REGEXP_MATCH(unicode_1, 'ТаФ'),
+ REGEXP_MATCH(ascii_1, NULL),
+ REGEXP_MATCH(unicode_1, NULL),
+ -- with flags
+ REGEXP_MATCH(ascii_1, 'AN', 'i'),
+ REGEXP_MATCH(unicode_1, 'таФ', 'i'),
+ REGEXP_MATCH(ascii_1, NULL, 'i'),
+ REGEXP_MATCH(unicode_1, NULL, 'i')
+FROM test_basic_operator;
+----
+NULL NULL NULL NULL [An] NULL NULL NULL
+[an] NULL NULL NULL [an] NULL NULL NULL
+NULL NULL NULL NULL NULL [таФ] NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
# --------------------------------------
# Test REPEAT
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
index fb82726e3a..4e7857ad80 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -50,36 +50,6 @@ false false
false true
NULL NULL
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
- REGEXP_LIKE(ascii_1, 'an'),
- REGEXP_LIKE(unicode_1, 'таФ'),
- REGEXP_LIKE(ascii_1, NULL),
- REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
- REGEXP_MATCH(ascii_1, 'an'),
- REGEXP_MATCH(unicode_1, 'таФ'),
- REGEXP_MATCH(ascii_1, NULL),
- REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
# TODO: move it back to `string_query.slt.part` after fixing the issue
# see detail: https://github.com/apache/datafusion/issues/12670
query IIIIII
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]