This is an automated email from the ASF dual-hosted git repository.
jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new c6b3876ff4 [CALCITE-5979] Enable REGEXP_REPLACE function in BigQuery
library
c6b3876ff4 is described below
commit c6b3876ff4707527ca3c952036312a11bbc4ca4f
Author: Jerin John <[email protected]>
AuthorDate: Tue Oct 10 01:33:22 2023 -0500
[CALCITE-5979] Enable REGEXP_REPLACE function in BigQuery library
The REGEXP_REPLACE function was previously available in
MySQL and Oracle libraries; this change adds it to BigQuery
library. In BigQuery, the replacement string has a slightly
different syntax - using '$' rather than '\' to indicate
capturing groups. We add method
SqlConformance.isRegexReplaceCaptureGroupDollarIndexed() to
control this behavior.
Close apache/calcite#3460
---
babel/src/test/resources/sql/big-query.iq | 37 +++++++++
.../calcite/adapter/enumerable/RexImpTable.java | 38 +++++++--
.../apache/calcite/runtime/CalciteResource.java | 5 +-
.../org/apache/calcite/runtime/SqlFunctions.java | 97 +++++++++++++++++-----
.../calcite/sql/fun/SqlLibraryOperators.java | 5 +-
.../sql/validate/SqlAbstractConformance.java | 4 +
.../calcite/sql/validate/SqlConformance.java | 16 ++++
.../calcite/sql/validate/SqlConformanceEnum.java | 9 ++
.../sql/validate/SqlDelegatingConformance.java | 4 +
.../calcite/runtime/CalciteResource.properties | 3 +-
.../org/apache/calcite/test/SqlFunctionsTest.java | 66 +++++++++++++++
site/_docs/reference.md | 2 +-
.../org/apache/calcite/test/SqlOperatorTest.java | 29 ++++++-
13 files changed, 286 insertions(+), 29 deletions(-)
diff --git a/babel/src/test/resources/sql/big-query.iq
b/babel/src/test/resources/sql/big-query.iq
index ba9d034515..5ffa0bdf66 100755
--- a/babel/src/test/resources/sql/big-query.iq
+++ b/babel/src/test/resources/sql/big-query.iq
@@ -1413,6 +1413,43 @@ SELECT REGEXP_INSTR("abcadcabcaecghi", "a.c", 3, 2, -2);
Invalid integer input '-2' for argument 'occurrence_position' in REGEXP_INSTR
!error
+#####################################################################
+# REGEXP_REPLACE(value, regexp, replacement)
+#
+# Returns a STRING where all substrings of value that match regexp are
replaced with replacement.
+# Supports backslashed-escaped digits in replacement argument for
corresponding capturing groups
+# in regexp. Returns an exception if regex is invalid.
+
+SELECT REGEXP_REPLACE("qw1e1rt1y", "1", "X");
++-----------+
+| EXPR$0 |
++-----------+
+| qwXeXrtXy |
++-----------+
+(1 row)
+
+!ok
+
+SELECT REGEXP_REPLACE("a0b1c2d3", "a|d", "X");
++----------+
+| EXPR$0 |
++----------+
+| X0b1c2X3 |
++----------+
+(1 row)
+
+!ok
+
+SELECT REGEXP_REPLACE("1=00--20=0", "(-)", "#");
++------------+
+| EXPR$0 |
++------------+
+| 1=00##20=0 |
++------------+
+(1 row)
+
+!ok
+
#####################################################################
# REGEXP_SUBSTR(value, regexp[, position[, occurrence]])
#
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index c6cd136aba..eaf12af158 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -804,11 +804,8 @@ public class RexImpTable {
NotImplementor.of(insensitiveImplementor));
map.put(NEGATED_POSIX_REGEX_CASE_SENSITIVE,
NotImplementor.of(sensitiveImplementor));
- defineReflective(REGEXP_REPLACE,
- BuiltInMethod.REGEXP_REPLACE3.method,
- BuiltInMethod.REGEXP_REPLACE4.method,
- BuiltInMethod.REGEXP_REPLACE5.method,
- BuiltInMethod.REGEXP_REPLACE6.method);
+ map.put(REGEXP_REPLACE, new RegexpReplaceImplementor());
+
// Multisets & arrays
defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
@@ -2448,6 +2445,37 @@ public class RexImpTable {
}
}
+ /** Implementor for the {@code REGEXP_REPLACE} function. */
+ private static class RegexpReplaceImplementor extends
AbstractRexCallImplementor {
+ RegexpReplaceImplementor() {
+ super("regexp_replace", NullPolicy.STRICT, false);
+ }
+
+ @Override Expression implementSafe(final RexToLixTranslator translator,
+ final RexCall call, final List<Expression> argValueList) {
+ // Boolean indicating if dialect uses default $-based indexing for
+ // regex capturing group (false means double-backslash-based indexing)
+ final boolean dollarIndexed =
+ translator.conformance.isRegexReplaceCaptureGroupDollarIndexed();
+
+ // Standard REGEXP_REPLACE implementation for default indexing.
+ if (dollarIndexed) {
+ final ReflectiveImplementor implementor =
+ new ReflectiveImplementor(
+ ImmutableList.of(BuiltInMethod.REGEXP_REPLACE3.method,
+ BuiltInMethod.REGEXP_REPLACE4.method,
+ BuiltInMethod.REGEXP_REPLACE5.method,
+ BuiltInMethod.REGEXP_REPLACE6.method));
+ return implementor.implementSafe(translator, call, argValueList);
+ }
+
+ // Custom regexp replace method to preprocess double-backslashes into
$-based indices.
+ return
Expressions.call(Expressions.new_(SqlFunctions.RegexFunction.class),
+ "regexpReplaceNonDollarIndexed",
+ argValueList);
+ }
+ }
+
/** Implementor for the {@code MONTHNAME} and {@code DAYNAME} functions.
* Each takes a {@link java.util.Locale} argument. */
private static class PeriodNameImplementor extends
AbstractRexCallImplementor {
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index ee03bfe959..40373f9b01 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -1019,12 +1019,15 @@ public interface CalciteResource {
ExInst<RuntimeException> invalidRegexInputForRegexpFunctions(String value,
String methodName);
@BaseMessage("Multiple capturing groups (count={0}) not allowed in regex
input for {1}")
- ExInst<RuntimeException> multipleCapturingGroupsForRegexpExtract(String
value,
+ ExInst<RuntimeException> multipleCapturingGroupsForRegexpFunctions(String
value,
String methodName);
@BaseMessage("Invalid input for REGEXP_REPLACE: ''{0}''")
ExInst<CalciteException> invalidInputForRegexpReplace(String value);
+ @BaseMessage("Invalid replacement pattern for REGEXP_REPLACE: ''{0}''")
+ ExInst<CalciteException> invalidReplacePatternForRegexpReplace(String value);
+
@BaseMessage("Invalid input for JSON_INSERT: jsonDoc: ''{0}'', kvs: ''{1}''")
ExInst<CalciteException> invalidInputForJsonInsert(String jsonDoc, String
kvs);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 67fee0b5f3..a92e1a95a9 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -390,7 +390,13 @@ public class SqlFunctions {
.maximumSize(FUNCTION_LEVEL_CACHE_MAX_SIZE.value())
.build(CacheLoader.from(Key::toPattern));
- /** Helper for regex validation in REGEXP_* fns. */
+ private final LoadingCache<String, String> replacementStrCache =
+ CacheBuilder.newBuilder()
+ .maximumSize(FUNCTION_LEVEL_CACHE_MAX_SIZE.value())
+
.build(CacheLoader.from(RegexFunction::replaceNonDollarIndexedString));
+
+ /** Validate regex arguments in REGEXP_* fns, throws an exception
+ * for invalid regex patterns, else returns a Pattern object. */
private Pattern validateRegexPattern(String regex, String methodName) {
try {
// Uses java.util.regex as a standard for regex processing
@@ -406,36 +412,68 @@ public class SqlFunctions {
}
}
- /** Helper for multiple capturing group regex check in REGEXP_* fns. */
+ /** Checks for multiple capturing groups in regex arguments in REGEXP_*
+ * functions. Throws if the regex pattern has more than 1 capturing
+ * group. */
private static void checkMultipleCapturingGroupsInRegex(Matcher matcher,
String methodName) {
if (matcher.groupCount() > 1) {
- throw RESOURCE.multipleCapturingGroupsForRegexpExtract(
+ throw RESOURCE.multipleCapturingGroupsForRegexpFunctions(
Integer.toString(matcher.groupCount()), methodName).ex();
}
}
- /** Helper for checking values of position and occurrence arguments in
REGEXP_* fns.
- * Regex fns not using occurrencePosition param pass a default value of 0.
- * Throws an exception or returns true in case of failed value checks. */
- private static boolean checkPosOccurrenceParamValues(int position,
+ /** Validates the value ranges for position and occurrence arguments in
+ * REGEXP_* functions. Functions not using the {@code occurrencePosition}
+ * parameter pass a default value of 0. Throws an exception or returns
+ * false if any arguments are beyond accepted range; returns true if all
+ * argument values are valid. */
+ private static boolean validatePosOccurrenceParamValues(int position,
int occurrence, int occurrencePosition, String value, String
methodName) {
if (position <= 0) {
- throw
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(position),
- "position", methodName).ex();
+ throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+ Integer.toString(position), "position", methodName).ex();
}
if (occurrence <= 0) {
- throw
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrence),
- "occurrence", methodName).ex();
+ throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+ Integer.toString(occurrence), "occurrence", methodName).ex();
}
if (occurrencePosition != 0 && occurrencePosition != 1) {
- throw
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrencePosition),
- "occurrence_position", methodName).ex();
+ throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+ Integer.toString(occurrencePosition), "occurrence_position",
+ methodName).ex();
}
- if (position <= value.length()) {
- return false;
+ return position <= value.length();
+ }
+
+ /** Preprocesses double-backslash-based indexing for capturing groups into
+ * $-based indices recognized by java regex, throws an error for invalid
escapes. */
+ public static String replaceNonDollarIndexedString(String replacement) {
+ // Explicitly escaping any $ symbols coming from input
+ // to ignore them from being considered as capturing group index
+ String indexedReplacement =
+ replacement.replace("\\\\", "\\")
+ .replace("$", "\\$");
+
+ // Check each occurrence of escaped chars, convert '\<n>' integers into
'$<n>' indices,
+ // keep occurrences of '\\' and '\$', throw an error for any other
invalid escapes
+ int lastOccIdx = indexedReplacement.indexOf("\\");
+ while (lastOccIdx != -1 && lastOccIdx < indexedReplacement.length() - 1)
{
+ // Fetch escaped symbol following the current '\' occurrence
+ final char escapedChar = indexedReplacement.charAt(lastOccIdx + 1);
+
+ // Replace '\<n>' with '$<n>' if escaped char is an integer
+ if (Character.isDigit(escapedChar)) {
+ indexedReplacement = indexedReplacement.replaceFirst("\\\\(\\d)",
"\\$$1");
+ } else if (escapedChar != '\\' && escapedChar != '$') {
+ // Throw an error if escaped char is not an escaped '\' or an
escaped '$'
+ throw
RESOURCE.invalidReplacePatternForRegexpReplace(replacement).ex();
+ }
+ // Fetch next occurrence index after current escaped char
+ lastOccIdx = indexedReplacement.indexOf("\\", lastOccIdx + 2);
}
- return true;
+
+ return indexedReplacement;
}
/** SQL {@code REGEXP_CONTAINS(value, regexp)} function.
@@ -470,7 +508,7 @@ public class SqlFunctions {
final String methodName = "REGEXP_EXTRACT";
final Pattern pattern = validateRegexPattern(regex, methodName);
- if (checkPosOccurrenceParamValues(position, occurrence, 0, value,
methodName)) {
+ if (!validatePosOccurrenceParamValues(position, occurrence, 0, value,
methodName)) {
return null;
}
@@ -547,8 +585,9 @@ public class SqlFunctions {
final String methodName = "REGEXP_INSTR";
final Pattern pattern = validateRegexPattern(regex, methodName);
- if (checkPosOccurrenceParamValues(position, occurrence,
occurrencePosition, value,
- methodName) || regex.isEmpty()) {
+ if (regex.isEmpty()
+ || !validatePosOccurrenceParamValues(position, occurrence,
+ occurrencePosition, value, methodName)) {
return 0;
}
@@ -604,6 +643,26 @@ public class SqlFunctions {
return Unsafe.regexpReplace(s, pattern, replacement, pos, occurrence);
}
+ /** SQL {@code REGEXP_REPLACE} function with 3 arguments with
+ * {@code \\} based indexing for capturing groups. */
+ public String regexpReplaceNonDollarIndexed(String s, String regex,
+ String replacement) {
+ // Modify double-backslash capturing group indices in replacement
argument,
+ // retrieved from cache when available.
+ String indexedReplacement;
+ try {
+ indexedReplacement = replacementStrCache.getUnchecked(replacement);
+ } catch (UncheckedExecutionException e) {
+ if (e.getCause() instanceof CalciteException) {
+ throw
RESOURCE.invalidReplacePatternForRegexpReplace(replacement).ex();
+ }
+ throw e;
+ }
+
+ // Call generic regexp replace method with modified replacement pattern
+ return regexpReplace(s, regex, indexedReplacement, 1, 0, null);
+ }
+
private static int makeRegexpFlags(String stringFlags) {
int flags = 0;
for (int i = 0; i < stringFlags.length(); ++i) {
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 1cb7867690..e05c4fd1c4 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -516,7 +516,10 @@ public abstract class SqlLibraryOperators {
OperandTypes.STRING_STRING_OPTIONAL_INTEGER_OPTIONAL_INTEGER_OPTIONAL_INTEGER,
SqlFunctionCategory.STRING);
- @LibraryOperator(libraries = {MYSQL, ORACLE})
+ /** The "REGEXP_REPLACE(value, regexp, rep [, pos [, occurrence [,
matchType]]])"
+ * function. Replaces all substrings of value that match regexp with
+ * {@code rep} and returns modified value. */
+ @LibraryOperator(libraries = {BIG_QUERY, MYSQL, ORACLE})
public static final SqlFunction REGEXP_REPLACE = new
SqlRegexpReplaceFunction();
/** The "REGEXP_SUBSTR(value, regexp[, position[, occurrence]])" function.
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
index 3205a48b40..14c2c73e35 100644
---
a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
+++
b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
@@ -105,6 +105,10 @@ public abstract class SqlAbstractConformance implements
SqlConformance {
return SqlConformanceEnum.DEFAULT.isOffsetLimitAllowed();
}
+ @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+ return
SqlConformanceEnum.DEFAULT.isRegexReplaceCaptureGroupDollarIndexed();
+ }
+
@Override public boolean isPercentRemainderAllowed() {
return SqlConformanceEnum.DEFAULT.isPercentRemainderAllowed();
}
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
index b8501f02f1..896db041e8 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
@@ -256,6 +256,22 @@ public interface SqlConformance {
*/
boolean isMinusAllowed();
+ /**
+ * Whether this dialect uses {@code $} (dollar) for indexing capturing groups
+ * in the replacement string of regular expression functions such as
+ * {@code REGEXP_REPLACE}. If false, the dialect uses {@code \\} (backslash)
+ * for indexing capturing groups.
+ *
+ * <p>For example, {@code REGEXP_REPLACE("abc", "a(.)c", "X\\1")} in BigQuery
+ * is equivalent to {@code REGEXP_REPLACE("abc", "a(.)c", "X$1")} in MySQL;
+ * both produce the result "Xb".
+ *
+ * <p>Among the built-in conformance levels, false in
+ * {@link SqlConformanceEnum#BIG_QUERY};
+ * true otherwise.
+ */
+ boolean isRegexReplaceCaptureGroupDollarIndexed();
+
/**
* Whether {@code CROSS APPLY} and {@code OUTER APPLY} operators are allowed
* in the parser.
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
index a610f19f16..1b7bd1a811 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
@@ -240,6 +240,15 @@ public enum SqlConformanceEnum implements SqlConformance {
}
}
+ @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+ switch (this) {
+ case BIG_QUERY:
+ return false;
+ default:
+ return true;
+ }
+ }
+
@Override public boolean isPercentRemainderAllowed() {
switch (this) {
case BABEL:
diff --git
a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
index c8e2f7cdd2..7b60bb4445 100644
---
a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
+++
b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
@@ -87,6 +87,10 @@ public class SqlDelegatingConformance implements
SqlConformance {
return delegate.isMinusAllowed();
}
+ @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+ return delegate.isRegexReplaceCaptureGroupDollarIndexed();
+ }
+
@Override public boolean isApplyAllowed() {
return delegate.isApplyAllowed();
}
diff --git
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index 5475f2dafd..f55d4cd7b1 100644
---
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -332,8 +332,9 @@ InvalidInputForJsonRemove=Invalid input for JSON_REMOVE:
document: ''{0}'', json
InvalidInputForJsonStorageSize=Invalid input for JSON_STORAGE_SIZE: ''{0}''
InvalidIntegerInputForRegexpFunctions=Invalid integer input ''{0}'' for
argument ''{1}'' in {2}
InvalidRegexInputForRegexpFunctions=Invalid regular expression for {1}: ''{0}''
-MultipleCapturingGroupsForRegexpExtract=Multiple capturing groups (count={0})
not allowed in regex input for {1}
+MultipleCapturingGroupsForRegexpFunctions=Multiple capturing groups
(count={0}) not allowed in regex input for {1}
InvalidInputForRegexpReplace=Invalid input for REGEXP_REPLACE: ''{0}''
+InvalidReplacePatternForRegexpReplace=Invalid replacement pattern for
REGEXP_REPLACE: ''{0}''
InvalidInputForJsonInsert=Invalid input for JSON_INSERT: jsonDoc: ''{0}'',
kvs: ''{1}''
InvalidInputForJsonReplace=Invalid input for JSON_REPLACE: jsonDoc: ''{0}'',
kvs: ''{1}''
InvalidInputForJsonSet=Invalid input for JSON_SET: jsonDoc: ''{0}'', kvs:
''{1}''
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index f31a182979..a15f70cd41 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -482,6 +482,72 @@ class SqlFunctionsTest {
}
}
+ @Test void testReplaceNonDollarIndexedString() {
+
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\4_\\\\2"),
+ is("$4_$2"));
+
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("abc123"),
+ is("abc123"));
+
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("$007"),
+ is("\\$007"));
+
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\\\\\
\\\\\\\\"),
+ is("\\\\ \\\\"));
+
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\\\\\$
$\\\\\\\\"),
+ is("\\\\\\$ \\$\\\\"));
+ try {
+ SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\-x");
+ fail("'regexp_replace' with invalid replacement pattern is not
possible");
+ } catch (CalciteException e) {
+ assertThat(e.getMessage(),
+ is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\-x'"));
+ }
+ try {
+ SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\ \\\\");
+ fail("'regexp_replace' with invalid replacement pattern is not
possible");
+ } catch (CalciteException e) {
+ assertThat(e.getMessage(),
+ is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\ \\\\'"));
+ }
+ try {
+ SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\a");
+ fail("'regexp_replace' with invalid replacement pattern is not
possible");
+ } catch (CalciteException e) {
+ assertThat(e.getMessage(),
+ is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\a'"));
+ }
+ }
+
+ @Test void testRegexpReplaceNonDollarIndexed() {
+ final SqlFunctions.RegexFunction f = new SqlFunctions.RegexFunction();
+ assertThat(f.regexpReplaceNonDollarIndexed("abascusB", "b", "X"),
is("aXascusB"));
+ assertThat(f.regexpReplaceNonDollarIndexed("abc01def02ghi", "[a-z]+",
"X"), is("X01X02X"));
+ assertThat(f.regexpReplaceNonDollarIndexed("a0b1c2d3", "0|2", "X"),
is("aXb1cXd3"));
+
+ // Test double-backslash indexing for capturing groups
+ assertThat(f.regexpReplaceNonDollarIndexed("abc_defcon",
"([a-z])_([a-z])", "\\\\2_\\\\1"),
+ is("abd_cefcon"));
+ assertThat(f.regexpReplaceNonDollarIndexed("1\\2\\3\\4\\5", "2.(.).4",
"\\\\1"),
+ is("1\\3\\5"));
+ assertThat(f.regexpReplaceNonDollarIndexed("abc16", "b(.*)(\\d)",
"\\\\\\\\"),
+ is("a\\"));
+ assertThat(f.regexpReplaceNonDollarIndexed("qwerty123", "([0-9]+)",
"$147"),
+ is("qwerty$147"));
+
+ try {
+ f.regexpReplaceNonDollarIndexed("abcdefghijabc", "abc(.)", "\\\\-11x");
+ fail("'regexp_replace' with invalid replacement pattern is not
possible");
+ } catch (CalciteException e) {
+ assertThat(e.getMessage(),
+ is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\-11x'"));
+ }
+ try {
+ f.regexpReplaceNonDollarIndexed("abcdefghijabc", "abc(.)", "\\\11x");
+ fail("'regexp_replace' with invalid replacement pattern is not
possible");
+ } catch (CalciteException e) {
+ assertThat(e.getMessage(),
+ is("Invalid replacement pattern for REGEXP_REPLACE: '\\\tx'"));
+ }
+ }
+
@Test void testLower() {
assertThat(lower("A bCd Iijk"), is("a bcd iijk"));
}
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 93c6e615b9..d087340d53 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2795,7 +2795,7 @@ BigQuery's type system uses confusingly different names
for types and functions:
| b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the
substring in *string* that matches the *regexp*, starting search at *position*
(default 1), and until locating the nth *occurrence* (default 1). Returns NULL
if there is no match
| b | REGEXP_EXTRACT_ALL(string, regexp) | Returns an array of all
substrings in *string* that matches the *regexp*. Returns an empty array if
there is no match
| b | REGEXP_INSTR(string, regexp [, position [, occurrence [,
occurrence_position]]]) | Returns the lowest 1-based position of the substring
in *string* that matches the *regexp*, starting search at *position* (default
1), and until locating the nth *occurrence* (default 1). Setting
occurrence_position (default 0) to 1 returns the end position of substring + 1.
Returns 0 if there is no match
-| m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [,
matchType]]]) | Replaces all substrings of *string* that match *regexp* with
*rep* at the starting *pos* in expr (if omitted, the default is 1),
*occurrence* means which occurrence of a match to search for (if omitted, the
default is 1), *matchType* specifies how to perform matching
+| b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [,
matchType]]]) | Replaces all substrings of *string* that match *regexp* with
*rep* at the starting *pos* in expr (if omitted, the default is 1),
*occurrence* specifies which occurrence of a match to search for (if omitted,
the default is 1), *matchType* specifies how to perform matching
| b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for
REGEXP_EXTRACT
| b m p | REPEAT(string, integer) | Returns a string
consisting of *string* repeated of *integer* times; returns an empty string if
*integer* is less than 1
| b m | REVERSE(string) | Returns *string* with
the order of the characters reversed
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index df2ade110b..9b5caad9ad 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4892,6 +4892,8 @@ public class SqlOperatorTest {
final SqlOperatorFixture f0 = fixture();
final Consumer<SqlOperatorFixture> consumer = f -> {
f.setFor(SqlLibraryOperators.REGEXP_REPLACE);
+
+ // Tests for regexp replace generic functionality
f.checkString("regexp_replace('a b c', 'b', 'X')", "a X c",
"VARCHAR NOT NULL");
f.checkString("regexp_replace('abc def ghi', '[a-z]+', 'X')", "X X X",
@@ -4919,12 +4921,37 @@ public class SqlOperatorTest {
"VARCHAR NOT NULL");
f.checkString("regexp_replace('abc\t\ndef\t\nghi', '\\w+', '+')",
"+\t\n+\t\n+",
"VARCHAR NOT NULL");
+
f.checkQuery("select regexp_replace('a b c', 'b', 'X')");
f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1)");
f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3)");
f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3, 'i')");
};
- f0.forEachLibrary(list(SqlLibrary.MYSQL, SqlLibrary.ORACLE), consumer);
+ f0.forEachLibrary(list(SqlLibrary.MYSQL, SqlLibrary.ORACLE,
SqlLibrary.BIG_QUERY), consumer);
+
+ // Tests for double-backslash indexed capturing groups for regexp_replace
in BQ
+ final SqlOperatorFixture f1 =
+
f0.withLibrary(SqlLibrary.BIG_QUERY).withConformance(SqlConformanceEnum.BIG_QUERY);
+ f1.checkString("regexp_replace('abc16', 'b(.*)(\\d)', '\\\\2\\\\1X')",
"a6c1X",
+ "VARCHAR NOT NULL");
+ f1.checkString("regexp_replace('a\\bc56a\\bc37', 'b(.)(\\d)',
'\\\\2\\\\0X')",
+ "a\\5bc5X6a\\3bc3X7", "VARCHAR NOT NULL");
+ f1.checkString("regexp_replace('abcdefghijabc', 'abc(.)',
'\\\\\\\\123xyz')",
+ "\\123xyzefghijabc", "VARCHAR NOT NULL");
+ f1.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '$1xy')",
+ "$1xyefghijabc", "VARCHAR NOT NULL");
+ f1.checkString("regexp_replace('abc123', 'b(.*)(\\d)', '\\\\\\\\$
$\\\\\\\\')",
+ "a\\$ $\\", "VARCHAR NOT NULL");
+
+ // Tests to verify double-backslashes are ignored for indexing in other
dialects
+ final SqlOperatorFixture f2 =
+
f0.withLibrary(SqlLibrary.MYSQL).withConformance(SqlConformanceEnum.MYSQL_5);
+ f2.checkString("regexp_replace('abc16', 'b(.*)(\\d)', '\\\\2\\\\1X')",
"a\\2\\1X",
+ "VARCHAR NOT NULL");
+ f2.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '\\\\-11x')",
"\\-11xefghijabc",
+ "VARCHAR NOT NULL");
+ f2.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '$1x')",
"dxefghijabc",
+ "VARCHAR NOT NULL");
}
@Test void testRegexpSubstrFunc() {