This is an automated email from the ASF dual-hosted git repository.
abhishekrb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new c89ec0c4b84 Common pattern validation for all regexp* functions
(#18762)
c89ec0c4b84 is described below
commit c89ec0c4b8454d2977ec5d3a7df354d74184611c
Author: Abhishek Radhakrishnan <[email protected]>
AuthorDate: Thu Nov 20 13:54:57 2025 -0800
Common pattern validation for all regexp* functions (#18762)
Nicer user-facing error messages for invalid patterns used in the regexp*
functions. Move existing compilePattern() utility to a shared place so the
functions use them.
---
.../druid/query/expression/RegexpExprUtils.java | 51 ++++++++++++++++++++++
.../query/expression/RegexpExtractExprMacro.java | 26 +----------
.../query/expression/RegexpLikeExprMacro.java | 2 +-
.../query/expression/RegexpReplaceExprMacro.java | 2 +-
.../expression/RegexpExtractExprMacroTest.java | 20 +++++++++
.../query/expression/RegexpLikeExprMacroTest.java | 17 ++++++++
.../expression/RegexpReplaceExprMacroTest.java | 17 ++++++++
.../apache/druid/sql/calcite/CalciteQueryTest.java | 2 +-
8 files changed, 109 insertions(+), 28 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
new file mode 100644
index 00000000000..0e0878045ad
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.expression;
+
+import org.apache.druid.error.InvalidInput;
+import org.apache.druid.java.util.common.StringUtils;
+
+import javax.annotation.Nullable;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+public class RegexpExprUtils
+{
+ /**
+ * Compile the provided pattern, or provide a nice error message if it
cannot be compiled.
+ */
+ public static Pattern compilePattern(@Nullable String pattern, String
functionName)
+ {
+ try {
+ return
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
+ }
+ catch (PatternSyntaxException e) {
+ throw InvalidInput.exception(
+ e,
+ StringUtils.format(
+ "An invalid pattern [%s] was provided for the [%s] function,
error: [%s]",
+ e.getPattern(),
+ functionName,
+ e.getMessage()
+ )
+ );
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
index 6a690787735..d7102936c14 100644
---
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
+++
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
@@ -19,8 +19,6 @@
package org.apache.druid.query.expression;
-import org.apache.druid.error.InvalidInput;
-import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
@@ -31,7 +29,6 @@ import javax.annotation.Nullable;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro
{
@@ -61,7 +58,7 @@ public class RegexpExtractExprMacro implements
ExprMacroTable.ExprMacro
}
// Precompile the pattern.
- final Pattern pattern = compilePattern((String)
patternExpr.getLiteralValue());
+ final Pattern pattern = RegexpExprUtils.compilePattern((String)
patternExpr.getLiteralValue(), FN_NAME);
final int index = indexExpr == null ? 0 : ((Number)
indexExpr.getLiteralValue()).intValue();
@@ -97,25 +94,4 @@ public class RegexpExtractExprMacro implements
ExprMacroTable.ExprMacro
}
return new RegexpExtractExpr(args);
}
-
- /**
- * Compile the provided pattern, or provide a nice error message if it
cannot be compiled.
- */
- private static Pattern compilePattern(@Nullable String pattern)
- {
- try {
- return
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
- }
- catch (PatternSyntaxException e) {
- throw InvalidInput.exception(
- e,
- StringUtils.format(
- "An invalid pattern [%s] was provided for the %s function,
error: [%s]",
- e.getPattern(),
- FN_NAME,
- e.getMessage()
- )
- );
- }
- }
}
diff --git
a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
index 3073cee447d..a2c8bdc3e9a 100644
---
a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
+++
b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
@@ -86,7 +86,7 @@ public class RegexpLikeExprMacro implements
ExprMacroTable.ExprMacro
final String patternString = (String) patternExpr.getLiteralValue();
this.arg = args.get(0);
- this.pattern =
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString));
+ this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME);
}
@Nonnull
diff --git
a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
index 835fdcd6c91..e8afa62ff0c 100644
---
a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
+++
b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
@@ -96,7 +96,7 @@ public class RegexpReplaceExprMacro implements
ExprMacroTable.ExprMacro
final String patternString = (String) patternExpr.getLiteralValue();
this.arg = args.get(0);
- this.pattern = patternString != null ? Pattern.compile(patternString) :
null;
+ this.pattern = patternString != null ?
RegexpExprUtils.compilePattern(patternString, FN_NAME) : null;
this.replacement = (String) replacementExpr.getLiteralValue();
}
diff --git
a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
index 8a0212f33d5..c88e3d77cd0 100644
---
a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
+++
b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
@@ -19,9 +19,12 @@
package org.apache.druid.query.expression;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
@@ -46,6 +49,23 @@ public class RegexpExtractExprMacroTest extends MacroTestBase
eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings());
}
+ @Test
+ public void testInvalidRegexpExtractPattern()
+ {
+ MatcherAssert.assertThat(
+ Assert.assertThrows(DruidException.class, () ->
+ eval(
+ "regexp_extract('pod-1234-node', '[ab-0-9]')",
+ InputBindings.forInputSupplier("a", ExpressionType.STRING, ()
-> "foo")
+ )
+ ),
+ DruidExceptionMatcher.invalidInput().expectMessageContains(
+ "An invalid pattern [[ab-0-9]] was provided for the
[regexp_extract] function,"
+ + " error: [Illegal character range near index 4"
+ )
+ );
+ }
+
@Test
public void testMatch()
{
diff --git
a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
index d6e31baad0b..75ea9b6a21e 100644
---
a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
+++
b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
@@ -19,9 +19,12 @@
package org.apache.druid.query.expression;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
@@ -46,6 +49,20 @@ public class RegexpLikeExprMacroTest extends MacroTestBase
eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings());
}
+ @Test
+ public void testInvalidRegexpLikePattern()
+ {
+ MatcherAssert.assertThat(
+ Assert.assertThrows(
+ DruidException.class,
+ () -> eval("regexp_like('a', '[Ab-C]')",
InputBindings.nilBindings())),
+ DruidExceptionMatcher.invalidInput().expectMessageContains(
+ "An invalid pattern [[Ab-C]] was provided for the [regexp_like]
function,"
+ + " error: [Illegal character range near index 4"
+ )
+ );
+ }
+
@Test
public void testMatch()
{
diff --git
a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
index 870bd5c8db8..57b7c3c9a89 100644
---
a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
+++
b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
@@ -20,9 +20,12 @@
package org.apache.druid.query.expression;
import com.google.common.collect.ImmutableMap;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
@@ -40,6 +43,20 @@ public class RegexpReplaceExprMacroTest extends MacroTestBase
eval("regexp_replace()", InputBindings.nilBindings());
}
+ @Test
+ public void testInvalidRegexpReplacePattern()
+ {
+ MatcherAssert.assertThat(
+ Assert.assertThrows(
+ DruidException.class,
+ () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')",
InputBindings.nilBindings())),
+ DruidExceptionMatcher.invalidInput().expectMessageContains(
+ "An invalid pattern [[Ab-cd-0]] was provided for the
[regexp_replace] function,"
+ + " error: [Illegal character range near index 7"
+ )
+ );
+ }
+
@Test
public void testErrorFourArguments()
{
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index af9d827af03..14ea8ddb4db 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -8413,7 +8413,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
+ " REGEXP_EXTRACT(dim1, '^(.))', 1)\n"
+ "FROM foo",
DruidExceptionMatcher.invalidInput().expectMessageContains(
- "An invalid pattern [^(.))] was provided for the regexp_extract
function, " +
+ "An invalid pattern [^(.))] was provided for the [regexp_extract]
function, " +
"error: [Unmatched closing ')' near index 3\n^(.))\n ^]"
)
);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]