This is an automated email from the ASF dual-hosted git repository. AHeise pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git
commit abee47a354596825d0d126872f0c6e2691b449eb Author: Ramin Gharib <[email protected]> AuthorDate: Mon May 18 14:48:31 2026 +0200 [FLINK-39650][table] Cache compiled patterns and drop hot-path log in regexpReplace SqlFunctionUtils.regexpReplace called str.replaceAll which compiles the regex inside the engine on every invocation, and caught any exception with LOG.error producing one stack trace per record processed. Switches to REGEXP_PATTERN_CACHE.get(regex).matcher(str).replaceAll(...), reusing the existing ThreadLocalCache shared with getRegexpMatcher and regExp. The pattern is now compiled at most once per regex value per thread. PatternSyntaxException is caught silently so non-literal invalid regex patterns preserve the prior null-return contract without flooding the log. Adds runtime IT cases for REGEXP_REPLACE in RegexpFunctionsITCase covering literal valid, column-ref invalid, and function-call regex paths. --- .../planner/functions/RegexpFunctionsITCase.java | 38 ++++++++++++++++++++++ .../table/runtime/functions/SqlFunctionUtils.java | 17 +++++----- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/RegexpFunctionsITCase.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/RegexpFunctionsITCase.java index 0a39f11028e..85c5ef49d61 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/RegexpFunctionsITCase.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/RegexpFunctionsITCase.java @@ -37,6 +37,7 @@ class RegexpFunctionsITCase extends BuiltInFunctionTestBase { regexpExtractTestCases(), regexpExtractAllTestCases(), regexpInstrTestCases(), + regexpReplaceTestCases(), regexpSubstrTestCases()) .flatMap(s -> s); } @@ -340,6 +341,43 @@ class RegexpFunctionsITCase extends BuiltInFunctionTestBase { + "REGEXP_INSTR(str <CHARACTER_STRING>, regex <CHARACTER_STRING>)")); } + private Stream<TestSetSpec> regexpReplaceTestCases() { + return Stream.of( + TestSetSpec.forFunction(BuiltInFunctionDefinitions.REGEXP_REPLACE) + .onFieldsWithData(null, "foobar", "(") + .andDataTypes(DataTypes.STRING(), DataTypes.STRING(), DataTypes.STRING()) + .testResult( + $("f0").regexpReplace("foo", "X"), + "REGEXP_REPLACE(f0, 'foo', 'X')", + null, + DataTypes.STRING().nullable()) + .testResult( + $("f1").regexpReplace("foo", "X"), + "REGEXP_REPLACE(f1, 'foo', 'X')", + "Xbar", + DataTypes.STRING().nullable()) + .testResult( + $("f1").regexpReplace("o+", "X"), + "REGEXP_REPLACE(f1, 'o+', 'X')", + "fXbar", + DataTypes.STRING().nullable()) + .testResult( + $("f1").regexpReplace($("f2"), "X"), + "REGEXP_REPLACE(f1, f2, 'X')", + null, + DataTypes.STRING().nullable()) + .testResult( + $("f1").regexpReplace(concat("fo", "o"), "X"), + "REGEXP_REPLACE(f1, 'fo' || 'o', 'X')", + "Xbar", + DataTypes.STRING().nullable()) + .testResult( + $("f1").regexpReplace(concat("(", ""), "X"), + "REGEXP_REPLACE(f1, '(' || '', 'X')", + null, + DataTypes.STRING().nullable())); + } + private Stream<TestSetSpec> regexpSubstrTestCases() { return Stream.of( TestSetSpec.forFunction(BuiltInFunctionDefinitions.REGEXP_SUBSTR) diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/SqlFunctionUtils.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/SqlFunctionUtils.java index 2f90133f744..1932a6db862 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/SqlFunctionUtils.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/SqlFunctionUtils.java @@ -422,21 +422,20 @@ public class SqlFunctionUtils { /** * Returns a string resulting from replacing all substrings that match the regular expression - * with replacement. + * with replacement. Literal regexes are validated at planning time by the input type strategy. */ public static String regexpReplace(String str, String regex, String replacement) { if (str == null || regex == null || replacement == null) { return null; } try { - return str.replaceAll(regex, Matcher.quoteReplacement(replacement)); - } catch (Exception e) { - LOG.error( - String.format( - "Exception in regexpReplace('%s', '%s', '%s')", - str, regex, replacement), - e); - // return null if exception in regex replace + return REGEXP_PATTERN_CACHE + .get(regex) + .matcher(str) + .replaceAll(Matcher.quoteReplacement(replacement)); + } catch (PatternSyntaxException e) { + // Literals are rejected at planning time; non-literal invalid regex + // returns null to preserve the prior runtime contract. return null; } }
