This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 59b63fe388d branch-2.1: [fix](Nereids) string constant folding process
regex delim by mistake #48783 (#48876)
59b63fe388d is described below
commit 59b63fe388d01968e7ef8524e91603c27789cfc3
Author: morrySnow <[email protected]>
AuthorDate: Tue Mar 11 12:13:12 2025 +0800
branch-2.1: [fix](Nereids) string constant folding process regex delim by
mistake #48783 (#48876)
pick from master #48783
---
.../functions/executable/StringArithmetic.java | 57 +++++-----
.../fold_constant_string_arithmatic.groovy | 123 ++++++++++++++++++++-
2 files changed, 149 insertions(+), 31 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index eed12d5e99b..df6d60ccc36 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -39,8 +39,11 @@ import
org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
+import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.qe.ConnectContext;
+import com.google.common.collect.ImmutableList;
+
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
@@ -51,6 +54,7 @@ import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
/**
* executable functions:
@@ -661,14 +665,18 @@ public class StringArithmetic {
}
/**
- * Executable arithmetic functions split_by_char
+ * Executable arithmetic functions split_by_string
*/
- @ExecFunction(name = "split_by_char")
- public static Expression splitByChar(StringLikeLiteral first,
StringLikeLiteral second) {
- String[] result = first.getValue().split(second.getValue(), -1);
+ @ExecFunction(name = "split_by_string")
+ public static Expression splitByString(StringLikeLiteral first,
StringLikeLiteral second) {
+ if (first.getValue().isEmpty()) {
+ return new ArrayLiteral(ImmutableList.of(),
ArrayType.of(first.getDataType()));
+ }
+ int limit = second.getValue().isEmpty() ? 0 : -1;
+ String[] result =
first.getValue().split(Pattern.quote(second.getValue()), limit);
List<Literal> items = new ArrayList<>();
- for (int i = 1; i < result.length; i++) {
- items.add((Literal) castStringLikeLiteral(first, result[i]));
+ for (String s : result) {
+ items.add((Literal) castStringLikeLiteral(first, s));
}
return new ArrayLiteral(items);
}
@@ -678,35 +686,34 @@ public class StringArithmetic {
*/
@ExecFunction(name = "split_part")
public static Expression splitPart(StringLikeLiteral first,
StringLikeLiteral chr, IntegerLiteral number) {
+ if (number.getValue() == 0) {
+ return new NullLiteral(first.getDataType());
+ }
+ if (chr.getValue().isEmpty()) {
+ return castStringLikeLiteral(first, "");
+ }
+ if (first.getValue().isEmpty()) {
+ return new NullLiteral(first.getDataType());
+ }
if (first.getValue().equals(chr.getValue())) {
if (Math.abs(number.getValue()) == 1 ||
Math.abs(number.getValue()) == 2) {
return castStringLikeLiteral(first, "");
+ } else {
+ return new NullLiteral(first.getDataType());
}
}
String separator = chr.getValue();
- String[] parts = null;
+ String[] parts;
if (number.getValue() < 0) {
StringBuilder sb = new StringBuilder(first.getValue());
- StringBuilder seperatorBuilder = new StringBuilder(separator);
- separator = seperatorBuilder.reverse().toString();
- if (".$|()[{^?*+\\".contains(separator) ||
separator.startsWith("\\")) {
- separator = "\\" + separator;
- }
- parts = sb.reverse().toString().split(separator, -1);
+ StringBuilder separatorBuilder = new StringBuilder(separator);
+ separator = separatorBuilder.reverse().toString();
+ parts = sb.reverse().toString().split(Pattern.quote(separator),
-1);
} else {
- if (".$|()[{^?*+\\".contains(separator) ||
separator.startsWith("\\")) {
- separator = "\\" + separator;
- }
- parts = first.getValue().split(separator, -1);
+ parts = first.getValue().split(Pattern.quote(separator), -1);
}
- if (parts.length < Math.abs(number.getValue()) || number.getValue() ==
0) {
- if (parts.length == Math.abs(number.getValue())) {
- if (number.getValue() < 0 &&
first.getValue().startsWith(chr.getValue())
- || number.getValue() > 0 &&
first.getValue().endsWith(chr.getValue())) {
- return castStringLikeLiteral(first, "");
- }
- }
+ if (parts.length < Math.abs(number.getValue())) {
return new NullLiteral(first.getDataType());
} else if (number.getValue() < 0) {
StringBuilder result = new
StringBuilder(parts[Math.abs(number.getValue()) - 1]);
@@ -724,7 +731,7 @@ public class StringArithmetic {
if (chr.getValue().isEmpty()) {
return chr;
}
- String[] parts = first.getValue().split(chr.getValue(), -1);
+ String[] parts = first.getValue().split(Pattern.quote(chr.getValue()),
-1);
if (Math.abs(number.getValue()) >= parts.length) {
return first;
}
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index b9614492436..64ed3e6ce65 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -451,18 +451,80 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select split_by_string(cast('abc' as string), cast('::' as
string))")
testFoldConst("select split_by_string('上海天津北京杭州', '北')")
testFoldConst("select split_by_string('abccccc', 'c')")
+ testFoldConst("select split_by_string('abcde','')")
+ testFoldConst("select split_by_string('你a好b世c界','')")
+ testFoldConst("select split_by_string('12553','')")
+ testFoldConst("select split_by_string('','')")
+ testFoldConst("select split_by_string('',',')")
+ testFoldConst("select split_by_string('','a')")
+ testFoldConst("select split_by_string('','abc')")
+ testFoldConst("select split_by_string('abc','')")
+ testFoldConst("select split_by_string('a1b1c1d','1')")
+ testFoldConst("select split_by_string(',,,',',')")
+ testFoldConst("select split_by_string('a,b,c,abcde',',')")
+ testFoldConst("select split_by_string(',,a,b,c,',',')")
+ testFoldConst("select split_by_string('null',',')")
+ testFoldConst("select split_by_string('1,,2,3,,4,5,,abcde', ',,')")
+ testFoldConst("select split_by_string('abcde','')")
+ testFoldConst("select split_by_string('1,,2,3,,,,,,4,5, abcde', ',,')")
+ testFoldConst("select split_by_string(',,,,',',,')")
+ testFoldConst("select split_by_string('a,,b,,c',',,')")
+ testFoldConst("select split_by_string('a,,b,,c,,',',,')")
+ testFoldConst("select split_by_string(',,a,,b,,c,,',',,')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++')")
+ testFoldConst("SELECT
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\')")
// split_part
- testFoldConst("select split_part('a,b,c', ',', -1)")
- testFoldConst("select split_part('abc##123###xyz', '##', 0)")
+ testFoldConst("select split_part('a,b,c', '', -2)")
+ testFoldConst("select split_part('a,b,c', '', -1)")
+ testFoldConst("select split_part('a,b,c', '', 0)")
+ testFoldConst("select split_part('a,b,c', '', 1)")
+ testFoldConst("select split_part('a,b,c', '', 2)")
+ testFoldConst("select split_part('', '', -2)")
+ testFoldConst("select split_part('', '', -1)")
+ testFoldConst("select split_part('', '', 0)")
+ testFoldConst("select split_part('', '', 1)")
+ testFoldConst("select split_part('', '', 2)")
+ testFoldConst("select split_part('', 'abc', -2)")
+ testFoldConst("select split_part('', 'abc', -1)")
+ testFoldConst("select split_part('', 'abc', 0)")
+ testFoldConst("select split_part('', 'abc', 1)")
+ testFoldConst("select split_part('', 'abc', 2)")
+ testFoldConst("select split_part('abc##123###xyz', '##', -10)")
+ testFoldConst("select split_part('abc##123###xyz', '##', -4)")
+ testFoldConst("select split_part('abc##123###xyz', '##', -3)")
+ testFoldConst("select split_part('abc##123###xyz', '##', -2)")
testFoldConst("select split_part('abc##123###xyz', '##', -1)")
+ testFoldConst("select split_part('abc##123###xyz', '##', 0)")
testFoldConst("select split_part('abc##123###xyz', '##', 1)")
- testFoldConst("select split_part('abc##123###xyz', '##', -2)")
+ testFoldConst("select split_part('abc##123###xyz', '##', 2)")
testFoldConst("select split_part('abc##123###xyz', '##', 3)")
- testFoldConst("select split_part('abc##123###xyz', '##', -4)")
- testFoldConst("select split_part('abc##123###xyz', '##', 5)")
+ testFoldConst("select split_part('abc##123###xyz', '##', 4)")
+ testFoldConst("select split_part('abc##123###xyz', '##', 10)")
+ testFoldConst("select split_part('a,b,c', ',', -100)")
+ testFoldConst("select split_part('a,b,c', ',', -5)")
+ testFoldConst("select split_part('a,b,c', ',', -4)")
+ testFoldConst("select split_part('a,b,c', ',', -3)")
+ testFoldConst("select split_part('a,b,c', ',', -2)")
+ testFoldConst("select split_part('a,b,c', ',', -1)")
+ testFoldConst("select split_part('a,b,c', ',', -0)")
+ testFoldConst("select split_part('a,b,c', ',', 0)")
+ testFoldConst("select split_part('a,b,c', ',', 1)")
testFoldConst("select split_part('a,b,c', ',', 2)")
+ testFoldConst("select split_part('a,b,c', ',', 3)")
+ testFoldConst("select split_part('a,b,c', ',', 4)")
testFoldConst("select split_part('a,b,c', ',', 5)")
+ testFoldConst("select split_part('a,b,c', ',', 100)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as
string), -1)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as
string), 2)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as
string), 5)")
@@ -475,6 +537,7 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select split_part('hello world', ' ', -2)")
testFoldConst("select split_part('hello world', ' ', 2)")
testFoldConst("select split_part('hello world', ' ', -3)")
+ testFoldConst("select split_part('hello world', ' ', -3)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -5)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -4)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -3)")
@@ -495,7 +558,31 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("SELECT split_part('哈哈哈AA+','A', 2)")
testFoldConst("SELECT split_part('哈哈哈AA+','A', 3)")
testFoldConst("SELECT split_part('哈哈哈AA+','A', 4)")
-
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
+ testFoldConst("SELECT
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")
+
// starts_with
testFoldConst("select starts_with('hello world','hello')")
testFoldConst("select starts_with('hello world',null)")
@@ -640,6 +727,30 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 2)")
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 3)")
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 4)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
+ testFoldConst("SELECT
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")
// trim
testFoldConst("select trim('11111', 11)")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]