This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new c6b3876ff4 [CALCITE-5979] Enable REGEXP_REPLACE function in BigQuery 
library
c6b3876ff4 is described below

commit c6b3876ff4707527ca3c952036312a11bbc4ca4f
Author: Jerin John <[email protected]>
AuthorDate: Tue Oct 10 01:33:22 2023 -0500

    [CALCITE-5979] Enable REGEXP_REPLACE function in BigQuery library
    
    The REGEXP_REPLACE function was previously available in
    MySQL and Oracle libraries; this change adds it to BigQuery
    library. In BigQuery, the replacement string has a slightly
    different syntax - using '$' rather than '\' to indicate
    capturing groups. We add method
    SqlConformance.isRegexReplaceCaptureGroupDollarIndexed() to
    control this behavior.
    
    Close apache/calcite#3460
---
 babel/src/test/resources/sql/big-query.iq          | 37 +++++++++
 .../calcite/adapter/enumerable/RexImpTable.java    | 38 +++++++--
 .../apache/calcite/runtime/CalciteResource.java    |  5 +-
 .../org/apache/calcite/runtime/SqlFunctions.java   | 97 +++++++++++++++++-----
 .../calcite/sql/fun/SqlLibraryOperators.java       |  5 +-
 .../sql/validate/SqlAbstractConformance.java       |  4 +
 .../calcite/sql/validate/SqlConformance.java       | 16 ++++
 .../calcite/sql/validate/SqlConformanceEnum.java   |  9 ++
 .../sql/validate/SqlDelegatingConformance.java     |  4 +
 .../calcite/runtime/CalciteResource.properties     |  3 +-
 .../org/apache/calcite/test/SqlFunctionsTest.java  | 66 +++++++++++++++
 site/_docs/reference.md                            |  2 +-
 .../org/apache/calcite/test/SqlOperatorTest.java   | 29 ++++++-
 13 files changed, 286 insertions(+), 29 deletions(-)

diff --git a/babel/src/test/resources/sql/big-query.iq 
b/babel/src/test/resources/sql/big-query.iq
index ba9d034515..5ffa0bdf66 100755
--- a/babel/src/test/resources/sql/big-query.iq
+++ b/babel/src/test/resources/sql/big-query.iq
@@ -1413,6 +1413,43 @@ SELECT REGEXP_INSTR("abcadcabcaecghi", "a.c", 3, 2, -2);
 Invalid integer input '-2' for argument 'occurrence_position' in REGEXP_INSTR
 !error
 
+#####################################################################
+# REGEXP_REPLACE(value, regexp, replacement)
+#
+# Returns a STRING where all substrings of value that match regexp are 
replaced with replacement.
+# Supports backslashed-escaped digits in replacement argument for 
corresponding capturing groups
+# in regexp. Returns an exception if regex is invalid.
+
+SELECT REGEXP_REPLACE("qw1e1rt1y", "1", "X");
++-----------+
+| EXPR$0    |
++-----------+
+| qwXeXrtXy |
++-----------+
+(1 row)
+
+!ok
+
+SELECT REGEXP_REPLACE("a0b1c2d3", "a|d", "X");
++----------+
+| EXPR$0   |
++----------+
+| X0b1c2X3 |
++----------+
+(1 row)
+
+!ok
+
+SELECT REGEXP_REPLACE("1=00--20=0", "(-)", "#");
++------------+
+| EXPR$0     |
++------------+
+| 1=00##20=0 |
++------------+
+(1 row)
+
+!ok
+
 #####################################################################
 # REGEXP_SUBSTR(value, regexp[, position[, occurrence]])
 #
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index c6cd136aba..eaf12af158 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -804,11 +804,8 @@ public class RexImpTable {
           NotImplementor.of(insensitiveImplementor));
       map.put(NEGATED_POSIX_REGEX_CASE_SENSITIVE,
           NotImplementor.of(sensitiveImplementor));
-      defineReflective(REGEXP_REPLACE,
-          BuiltInMethod.REGEXP_REPLACE3.method,
-          BuiltInMethod.REGEXP_REPLACE4.method,
-          BuiltInMethod.REGEXP_REPLACE5.method,
-          BuiltInMethod.REGEXP_REPLACE6.method);
+      map.put(REGEXP_REPLACE, new RegexpReplaceImplementor());
+
 
       // Multisets & arrays
       defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
@@ -2448,6 +2445,37 @@ public class RexImpTable {
     }
   }
 
+  /** Implementor for the {@code REGEXP_REPLACE} function. */
+  private static class RegexpReplaceImplementor extends 
AbstractRexCallImplementor {
+    RegexpReplaceImplementor() {
+      super("regexp_replace", NullPolicy.STRICT, false);
+    }
+
+    @Override Expression implementSafe(final RexToLixTranslator translator,
+        final RexCall call, final List<Expression> argValueList) {
+      // Boolean indicating if dialect uses default $-based indexing for
+      // regex capturing group (false means double-backslash-based indexing)
+      final boolean dollarIndexed =
+          translator.conformance.isRegexReplaceCaptureGroupDollarIndexed();
+
+      // Standard REGEXP_REPLACE implementation for default indexing.
+      if (dollarIndexed) {
+        final ReflectiveImplementor implementor =
+            new ReflectiveImplementor(
+                ImmutableList.of(BuiltInMethod.REGEXP_REPLACE3.method,
+                    BuiltInMethod.REGEXP_REPLACE4.method,
+                    BuiltInMethod.REGEXP_REPLACE5.method,
+                    BuiltInMethod.REGEXP_REPLACE6.method));
+        return implementor.implementSafe(translator, call, argValueList);
+      }
+
+      // Custom regexp replace method to preprocess double-backslashes into 
$-based indices.
+      return 
Expressions.call(Expressions.new_(SqlFunctions.RegexFunction.class),
+          "regexpReplaceNonDollarIndexed",
+          argValueList);
+    }
+  }
+
   /** Implementor for the {@code MONTHNAME} and {@code DAYNAME} functions.
    * Each takes a {@link java.util.Locale} argument. */
   private static class PeriodNameImplementor extends 
AbstractRexCallImplementor {
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java 
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index ee03bfe959..40373f9b01 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -1019,12 +1019,15 @@ public interface CalciteResource {
   ExInst<RuntimeException> invalidRegexInputForRegexpFunctions(String value, 
String methodName);
 
   @BaseMessage("Multiple capturing groups (count={0}) not allowed in regex 
input for {1}")
-  ExInst<RuntimeException> multipleCapturingGroupsForRegexpExtract(String 
value,
+  ExInst<RuntimeException> multipleCapturingGroupsForRegexpFunctions(String 
value,
       String methodName);
 
   @BaseMessage("Invalid input for REGEXP_REPLACE: ''{0}''")
   ExInst<CalciteException> invalidInputForRegexpReplace(String value);
 
+  @BaseMessage("Invalid replacement pattern for REGEXP_REPLACE: ''{0}''")
+  ExInst<CalciteException> invalidReplacePatternForRegexpReplace(String value);
+
   @BaseMessage("Invalid input for JSON_INSERT: jsonDoc: ''{0}'', kvs: ''{1}''")
   ExInst<CalciteException> invalidInputForJsonInsert(String jsonDoc, String 
kvs);
 
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 67fee0b5f3..a92e1a95a9 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -390,7 +390,13 @@ public class SqlFunctions {
             .maximumSize(FUNCTION_LEVEL_CACHE_MAX_SIZE.value())
             .build(CacheLoader.from(Key::toPattern));
 
-    /** Helper for regex validation in REGEXP_* fns. */
+    private final LoadingCache<String, String> replacementStrCache =
+        CacheBuilder.newBuilder()
+            .maximumSize(FUNCTION_LEVEL_CACHE_MAX_SIZE.value())
+            
.build(CacheLoader.from(RegexFunction::replaceNonDollarIndexedString));
+
+    /** Validate regex arguments in REGEXP_* fns, throws an exception
+     * for invalid regex patterns, else returns a Pattern object. */
     private Pattern validateRegexPattern(String regex, String methodName) {
       try {
         // Uses java.util.regex as a standard for regex processing
@@ -406,36 +412,68 @@ public class SqlFunctions {
       }
     }
 
-    /** Helper for multiple capturing group regex check in REGEXP_* fns. */
+    /** Checks for multiple capturing groups in regex arguments in REGEXP_*
+     * functions. Throws if the regex pattern has more than 1 capturing
+     * group. */
     private static void checkMultipleCapturingGroupsInRegex(Matcher matcher,
         String methodName) {
       if (matcher.groupCount() > 1) {
-        throw RESOURCE.multipleCapturingGroupsForRegexpExtract(
+        throw RESOURCE.multipleCapturingGroupsForRegexpFunctions(
             Integer.toString(matcher.groupCount()), methodName).ex();
       }
     }
 
-    /** Helper for checking values of position and occurrence arguments in 
REGEXP_* fns.
-     * Regex fns not using occurrencePosition param pass a default value of 0.
-     * Throws an exception or returns true in case of failed value checks. */
-    private static boolean checkPosOccurrenceParamValues(int position,
+    /** Validates the value ranges for position and occurrence arguments in
+     * REGEXP_* functions. Functions not using the {@code occurrencePosition}
+     * parameter pass a default value of 0. Throws an exception or returns
+     * false if any arguments are beyond accepted range; returns true if all
+     * argument values are valid. */
+    private static boolean validatePosOccurrenceParamValues(int position,
         int occurrence, int occurrencePosition, String value, String 
methodName) {
       if (position <= 0) {
-        throw 
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(position),
-            "position", methodName).ex();
+        throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+            Integer.toString(position), "position", methodName).ex();
       }
       if (occurrence <= 0) {
-        throw 
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrence),
-            "occurrence", methodName).ex();
+        throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+            Integer.toString(occurrence), "occurrence", methodName).ex();
       }
       if (occurrencePosition != 0 && occurrencePosition != 1) {
-        throw 
RESOURCE.invalidIntegerInputForRegexpFunctions(Integer.toString(occurrencePosition),
-            "occurrence_position", methodName).ex();
+        throw RESOURCE.invalidIntegerInputForRegexpFunctions(
+            Integer.toString(occurrencePosition), "occurrence_position",
+                methodName).ex();
       }
-      if (position <= value.length()) {
-        return false;
+      return position <= value.length();
+    }
+
+    /** Preprocesses double-backslash-based indexing for capturing groups into
+     * $-based indices recognized by java regex, throws an error for invalid 
escapes. */
+    public static String replaceNonDollarIndexedString(String replacement) {
+      // Explicitly escaping any $ symbols coming from input
+      // to ignore them from being considered as capturing group index
+      String indexedReplacement =
+          replacement.replace("\\\\", "\\")
+              .replace("$", "\\$");
+
+      // Check each occurrence of escaped chars, convert '\<n>' integers into 
'$<n>' indices,
+      // keep occurrences of '\\' and '\$', throw an error for any other 
invalid escapes
+      int lastOccIdx = indexedReplacement.indexOf("\\");
+      while (lastOccIdx != -1 && lastOccIdx < indexedReplacement.length() - 1) 
{
+        // Fetch escaped symbol following the current '\' occurrence
+        final char escapedChar = indexedReplacement.charAt(lastOccIdx + 1);
+
+        // Replace '\<n>' with '$<n>' if escaped char is an integer
+        if (Character.isDigit(escapedChar)) {
+          indexedReplacement = indexedReplacement.replaceFirst("\\\\(\\d)", 
"\\$$1");
+        } else if (escapedChar != '\\' && escapedChar != '$') {
+          // Throw an error if escaped char is not an escaped '\' or an 
escaped '$'
+          throw 
RESOURCE.invalidReplacePatternForRegexpReplace(replacement).ex();
+        }
+        // Fetch next occurrence index after current escaped char
+        lastOccIdx = indexedReplacement.indexOf("\\", lastOccIdx + 2);
       }
-      return true;
+
+      return indexedReplacement;
     }
 
     /** SQL {@code REGEXP_CONTAINS(value, regexp)} function.
@@ -470,7 +508,7 @@ public class SqlFunctions {
       final String methodName = "REGEXP_EXTRACT";
       final Pattern pattern = validateRegexPattern(regex, methodName);
 
-      if (checkPosOccurrenceParamValues(position, occurrence, 0, value, 
methodName)) {
+      if (!validatePosOccurrenceParamValues(position, occurrence, 0, value, 
methodName)) {
         return null;
       }
 
@@ -547,8 +585,9 @@ public class SqlFunctions {
       final String methodName = "REGEXP_INSTR";
       final Pattern pattern = validateRegexPattern(regex, methodName);
 
-      if (checkPosOccurrenceParamValues(position, occurrence, 
occurrencePosition, value,
-          methodName) || regex.isEmpty()) {
+      if (regex.isEmpty()
+          || !validatePosOccurrenceParamValues(position, occurrence,
+              occurrencePosition, value, methodName)) {
         return 0;
       }
 
@@ -604,6 +643,26 @@ public class SqlFunctions {
       return Unsafe.regexpReplace(s, pattern, replacement, pos, occurrence);
     }
 
+    /** SQL {@code REGEXP_REPLACE} function with 3 arguments with
+     * {@code \\} based indexing for capturing groups. */
+    public String regexpReplaceNonDollarIndexed(String s, String regex,
+        String replacement) {
+      // Modify double-backslash capturing group indices in replacement 
argument,
+      // retrieved from cache when available.
+      String indexedReplacement;
+      try {
+        indexedReplacement = replacementStrCache.getUnchecked(replacement);
+      } catch (UncheckedExecutionException e) {
+        if (e.getCause() instanceof CalciteException) {
+          throw 
RESOURCE.invalidReplacePatternForRegexpReplace(replacement).ex();
+        }
+        throw e;
+      }
+
+      // Call generic regexp replace method with modified replacement pattern
+      return regexpReplace(s, regex, indexedReplacement, 1, 0, null);
+    }
+
     private static int makeRegexpFlags(String stringFlags) {
       int flags = 0;
       for (int i = 0; i < stringFlags.length(); ++i) {
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 1cb7867690..e05c4fd1c4 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -516,7 +516,10 @@ public abstract class SqlLibraryOperators {
           
OperandTypes.STRING_STRING_OPTIONAL_INTEGER_OPTIONAL_INTEGER_OPTIONAL_INTEGER,
           SqlFunctionCategory.STRING);
 
-  @LibraryOperator(libraries = {MYSQL, ORACLE})
+  /** The "REGEXP_REPLACE(value, regexp, rep [, pos [, occurrence [, 
matchType]]])"
+   * function. Replaces all substrings of value that match regexp with
+   * {@code rep} and returns modified value. */
+  @LibraryOperator(libraries = {BIG_QUERY, MYSQL, ORACLE})
   public static final SqlFunction REGEXP_REPLACE = new 
SqlRegexpReplaceFunction();
 
   /** The "REGEXP_SUBSTR(value, regexp[, position[, occurrence]])" function.
diff --git 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
index 3205a48b40..14c2c73e35 100644
--- 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
+++ 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java
@@ -105,6 +105,10 @@ public abstract class SqlAbstractConformance implements 
SqlConformance {
     return SqlConformanceEnum.DEFAULT.isOffsetLimitAllowed();
   }
 
+  @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+    return 
SqlConformanceEnum.DEFAULT.isRegexReplaceCaptureGroupDollarIndexed();
+  }
+
   @Override public boolean isPercentRemainderAllowed() {
     return SqlConformanceEnum.DEFAULT.isPercentRemainderAllowed();
   }
diff --git 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
index b8501f02f1..896db041e8 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java
@@ -256,6 +256,22 @@ public interface SqlConformance {
    */
   boolean isMinusAllowed();
 
+  /**
+   * Whether this dialect uses {@code $} (dollar) for indexing capturing groups
+   * in the replacement string of regular expression functions such as
+   * {@code REGEXP_REPLACE}. If false, the dialect uses {@code \\} (backslash)
+   * for indexing capturing groups.
+   *
+   * <p>For example, {@code REGEXP_REPLACE("abc", "a(.)c", "X\\1")} in BigQuery
+   * is equivalent to {@code REGEXP_REPLACE("abc", "a(.)c", "X$1")} in MySQL;
+   * both produce the result "Xb".
+   *
+   * <p>Among the built-in conformance levels, false in
+   * {@link SqlConformanceEnum#BIG_QUERY};
+   * true otherwise.
+   */
+  boolean isRegexReplaceCaptureGroupDollarIndexed();
+
   /**
    * Whether {@code CROSS APPLY} and {@code OUTER APPLY} operators are allowed
    * in the parser.
diff --git 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
index a610f19f16..1b7bd1a811 100644
--- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
+++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java
@@ -240,6 +240,15 @@ public enum SqlConformanceEnum implements SqlConformance {
     }
   }
 
+  @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+    switch (this) {
+    case BIG_QUERY:
+      return false;
+    default:
+      return true;
+    }
+  }
+
   @Override public boolean isPercentRemainderAllowed() {
     switch (this) {
     case BABEL:
diff --git 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
index c8e2f7cdd2..7b60bb4445 100644
--- 
a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
+++ 
b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java
@@ -87,6 +87,10 @@ public class SqlDelegatingConformance implements 
SqlConformance {
     return delegate.isMinusAllowed();
   }
 
+  @Override public boolean isRegexReplaceCaptureGroupDollarIndexed() {
+    return delegate.isRegexReplaceCaptureGroupDollarIndexed();
+  }
+
   @Override public boolean isApplyAllowed() {
     return delegate.isApplyAllowed();
   }
diff --git 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index 5475f2dafd..f55d4cd7b1 100644
--- 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++ 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -332,8 +332,9 @@ InvalidInputForJsonRemove=Invalid input for JSON_REMOVE: 
document: ''{0}'', json
 InvalidInputForJsonStorageSize=Invalid input for JSON_STORAGE_SIZE: ''{0}''
 InvalidIntegerInputForRegexpFunctions=Invalid integer input ''{0}'' for 
argument ''{1}'' in {2}
 InvalidRegexInputForRegexpFunctions=Invalid regular expression for {1}: ''{0}''
-MultipleCapturingGroupsForRegexpExtract=Multiple capturing groups (count={0}) 
not allowed in regex input for {1}
+MultipleCapturingGroupsForRegexpFunctions=Multiple capturing groups 
(count={0}) not allowed in regex input for {1}
 InvalidInputForRegexpReplace=Invalid input for REGEXP_REPLACE: ''{0}''
+InvalidReplacePatternForRegexpReplace=Invalid replacement pattern for 
REGEXP_REPLACE: ''{0}''
 InvalidInputForJsonInsert=Invalid input for JSON_INSERT: jsonDoc: ''{0}'', 
kvs: ''{1}''
 InvalidInputForJsonReplace=Invalid input for JSON_REPLACE: jsonDoc: ''{0}'', 
kvs: ''{1}''
 InvalidInputForJsonSet=Invalid input for JSON_SET: jsonDoc: ''{0}'', kvs: 
''{1}''
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index f31a182979..a15f70cd41 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -482,6 +482,72 @@ class SqlFunctionsTest {
     }
   }
 
+  @Test void testReplaceNonDollarIndexedString() {
+    
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\4_\\\\2"),
+        is("$4_$2"));
+    
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("abc123"),
+        is("abc123"));
+    
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("$007"),
+        is("\\$007"));
+    
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\\\\\ 
\\\\\\\\"),
+        is("\\\\ \\\\"));
+    
assertThat(SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\\\\\$ 
$\\\\\\\\"),
+        is("\\\\\\$ \\$\\\\"));
+    try {
+      SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\-x");
+      fail("'regexp_replace' with invalid replacement pattern is not 
possible");
+    } catch (CalciteException e) {
+      assertThat(e.getMessage(),
+          is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\-x'"));
+    }
+    try {
+      SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\ \\\\");
+      fail("'regexp_replace' with invalid replacement pattern is not 
possible");
+    } catch (CalciteException e) {
+      assertThat(e.getMessage(),
+          is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\ \\\\'"));
+    }
+    try {
+      SqlFunctions.RegexFunction.replaceNonDollarIndexedString("\\\\a");
+      fail("'regexp_replace' with invalid replacement pattern is not 
possible");
+    } catch (CalciteException e) {
+      assertThat(e.getMessage(),
+          is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\a'"));
+    }
+  }
+
+  @Test void testRegexpReplaceNonDollarIndexed() {
+    final SqlFunctions.RegexFunction f = new SqlFunctions.RegexFunction();
+    assertThat(f.regexpReplaceNonDollarIndexed("abascusB", "b", "X"), 
is("aXascusB"));
+    assertThat(f.regexpReplaceNonDollarIndexed("abc01def02ghi", "[a-z]+", 
"X"), is("X01X02X"));
+    assertThat(f.regexpReplaceNonDollarIndexed("a0b1c2d3", "0|2", "X"), 
is("aXb1cXd3"));
+
+    // Test double-backslash indexing for capturing groups
+    assertThat(f.regexpReplaceNonDollarIndexed("abc_defcon", 
"([a-z])_([a-z])", "\\\\2_\\\\1"),
+        is("abd_cefcon"));
+    assertThat(f.regexpReplaceNonDollarIndexed("1\\2\\3\\4\\5", "2.(.).4", 
"\\\\1"),
+        is("1\\3\\5"));
+    assertThat(f.regexpReplaceNonDollarIndexed("abc16", "b(.*)(\\d)", 
"\\\\\\\\"),
+        is("a\\"));
+    assertThat(f.regexpReplaceNonDollarIndexed("qwerty123", "([0-9]+)", 
"$147"),
+        is("qwerty$147"));
+
+    try {
+      f.regexpReplaceNonDollarIndexed("abcdefghijabc", "abc(.)", "\\\\-11x");
+      fail("'regexp_replace' with invalid replacement pattern is not 
possible");
+    } catch (CalciteException e) {
+      assertThat(e.getMessage(),
+          is("Invalid replacement pattern for REGEXP_REPLACE: '\\\\-11x'"));
+    }
+    try {
+      f.regexpReplaceNonDollarIndexed("abcdefghijabc", "abc(.)", "\\\11x");
+      fail("'regexp_replace' with invalid replacement pattern is not 
possible");
+    } catch (CalciteException e) {
+      assertThat(e.getMessage(),
+          is("Invalid replacement pattern for REGEXP_REPLACE: '\\\tx'"));
+    }
+  }
+
   @Test void testLower() {
     assertThat(lower("A bCd Iijk"), is("a bcd iijk"));
   }
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 93c6e615b9..d087340d53 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2795,7 +2795,7 @@ BigQuery's type system uses confusingly different names 
for types and functions:
 | b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the 
substring in *string* that matches the *regexp*, starting search at *position* 
(default 1), and until locating the nth *occurrence* (default 1). Returns NULL 
if there is no match
 | b | REGEXP_EXTRACT_ALL(string, regexp)             | Returns an array of all 
substrings in *string* that matches the *regexp*. Returns an empty array if 
there is no match
 | b | REGEXP_INSTR(string, regexp [, position [, occurrence [, 
occurrence_position]]]) | Returns the lowest 1-based position of the substring 
in *string* that matches the *regexp*, starting search at *position* (default 
1), and until locating the nth *occurrence* (default 1). Setting 
occurrence_position (default 0) to 1 returns the end position of substring + 1. 
Returns 0 if there is no match
-| m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, 
matchType]]]) | Replaces all substrings of *string* that match *regexp* with 
*rep* at the starting *pos* in expr (if omitted, the default is 1), 
*occurrence* means which occurrence of a match to search for (if omitted, the 
default is 1), *matchType* specifies how to perform matching
+| b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, 
matchType]]]) | Replaces all substrings of *string* that match *regexp* with 
*rep* at the starting *pos* in expr (if omitted, the default is 1), 
*occurrence* specifies which occurrence of a match to search for (if omitted, 
the default is 1), *matchType* specifies how to perform matching
 | b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for 
REGEXP_EXTRACT
 | b m p | REPEAT(string, integer)                    | Returns a string 
consisting of *string* repeated of *integer* times; returns an empty string if 
*integer* is less than 1
 | b m | REVERSE(string)                              | Returns *string* with 
the order of the characters reversed
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index df2ade110b..9b5caad9ad 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4892,6 +4892,8 @@ public class SqlOperatorTest {
     final SqlOperatorFixture f0 = fixture();
     final Consumer<SqlOperatorFixture> consumer = f -> {
       f.setFor(SqlLibraryOperators.REGEXP_REPLACE);
+
+      // Tests for regexp replace generic functionality
       f.checkString("regexp_replace('a b c', 'b', 'X')", "a X c",
           "VARCHAR NOT NULL");
       f.checkString("regexp_replace('abc def ghi', '[a-z]+', 'X')", "X X X",
@@ -4919,12 +4921,37 @@ public class SqlOperatorTest {
           "VARCHAR NOT NULL");
       f.checkString("regexp_replace('abc\t\ndef\t\nghi', '\\w+', '+')", 
"+\t\n+\t\n+",
           "VARCHAR NOT NULL");
+
       f.checkQuery("select regexp_replace('a b c', 'b', 'X')");
       f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1)");
       f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3)");
       f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3, 'i')");
     };
-    f0.forEachLibrary(list(SqlLibrary.MYSQL, SqlLibrary.ORACLE), consumer);
+    f0.forEachLibrary(list(SqlLibrary.MYSQL, SqlLibrary.ORACLE, 
SqlLibrary.BIG_QUERY), consumer);
+
+    // Tests for double-backslash indexed capturing groups for regexp_replace 
in BQ
+    final SqlOperatorFixture f1 =
+        
f0.withLibrary(SqlLibrary.BIG_QUERY).withConformance(SqlConformanceEnum.BIG_QUERY);
+    f1.checkString("regexp_replace('abc16', 'b(.*)(\\d)', '\\\\2\\\\1X')", 
"a6c1X",
+        "VARCHAR NOT NULL");
+    f1.checkString("regexp_replace('a\\bc56a\\bc37', 'b(.)(\\d)', 
'\\\\2\\\\0X')",
+        "a\\5bc5X6a\\3bc3X7", "VARCHAR NOT NULL");
+    f1.checkString("regexp_replace('abcdefghijabc', 'abc(.)', 
'\\\\\\\\123xyz')",
+        "\\123xyzefghijabc", "VARCHAR NOT NULL");
+    f1.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '$1xy')",
+        "$1xyefghijabc", "VARCHAR NOT NULL");
+    f1.checkString("regexp_replace('abc123', 'b(.*)(\\d)', '\\\\\\\\$ 
$\\\\\\\\')",
+        "a\\$ $\\", "VARCHAR NOT NULL");
+
+    // Tests to verify double-backslashes are ignored for indexing in other 
dialects
+    final SqlOperatorFixture f2 =
+        
f0.withLibrary(SqlLibrary.MYSQL).withConformance(SqlConformanceEnum.MYSQL_5);
+    f2.checkString("regexp_replace('abc16', 'b(.*)(\\d)', '\\\\2\\\\1X')", 
"a\\2\\1X",
+        "VARCHAR NOT NULL");
+    f2.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '\\\\-11x')", 
"\\-11xefghijabc",
+        "VARCHAR NOT NULL");
+    f2.checkString("regexp_replace('abcdefghijabc', 'abc(.)', '$1x')", 
"dxefghijabc",
+        "VARCHAR NOT NULL");
   }
 
   @Test void testRegexpSubstrFunc() {

Reply via email to