This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 561e471a86 Add support for regexpReplace scalar function (#9123)
561e471a86 is described below

commit 561e471a86278e0e486cd9e602f8499fc8f8517c
Author: Vivek Iyer Vaidyanathan <[email protected]>
AuthorDate: Tue Aug 2 16:29:16 2022 -0700

    Add support for regexpReplace scalar function (#9123)
    
    * Add support for regexpReplace scalar function
    
    * Address minor review comments.
---
 .../LiteralOnlyBrokerRequestTest.java              |  17 +++
 .../common/function/scalar/StringFunctions.java    | 107 ++++++++++++++++++
 .../tests/OfflineClusterIntegrationTest.java       | 119 +++++++++++++++++++++
 3 files changed, 243 insertions(+)

diff --git 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
index bd807818ce..516fe5fb65 100644
--- 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
+++ 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
@@ -122,6 +122,23 @@ public class LiteralOnlyBrokerRequestTest {
         CalciteSqlParser.compileToPinotQuery("SELECT count(*) from foo " + 
"where bar = fromBase64('aGVsbG8h')")));
     
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
         "SELECT count(*) from foo " + "where bar = 
fromUtf8(fromBase64('aGVsbG8h'))")));
+    
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+        .compileToPinotQuery("SELECT count(*) from myTable where 
regexpReplace(col1, \"b(..)\", \"X$1Y\")  = "
+            + "\"fooXarYXazY\"")));
+    
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+        .compileToPinotQuery("SELECT count(*) from myTable where 
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10)  = "
+            + "\"fooXarYXazY\"")));
+    
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+        .compileToPinotQuery("SELECT count(*) from myTable where 
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1)  = "
+            + "\"fooXarYXazY\"")));
+    
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+        .compileToPinotQuery("SELECT count(*) from myTable where 
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+            + "\"i\")  = "
+            + "\"fooXarYXazY\"")));
+    
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+        .compileToPinotQuery("SELECT count(*) from myTable where 
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+            + "\"m\")  = "
+            + "\"fooXarYXazY\"")));
   }
 
   @Test
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 08b9c5ac22..4277cec152 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -588,4 +588,111 @@ public class StringFunctions {
   public static byte[] fromBase64(String input) {
     return Base64.getDecoder().decode(input);
   }
+
+  /**
+   * Replace a regular expression pattern. If matchStr is not found, inputStr 
will be returned. By default, all
+   * occurences of match pattern in the input string will be replaced. Default 
matching pattern is case sensitive.
+   *
+   * @param inputStr Input string to apply the regexpReplace
+   * @param matchStr Regexp or string to match against inputStr
+   * @param replaceStr Regexp or string to replace if matchStr is found
+   * @param matchStartPos Index of inputStr from where matching should start. 
Default is 0.
+   * @param occurence Controls which occurence of the matched pattern must be 
replaced. Counting starts at 0. Default
+   *                  is -1
+   * @param flag Single character flag that controls how the regex finds 
matches in inputStr. If an incorrect flag is
+   *            specified, the function applies default case sensitive match. 
Only one flag can be specified. Supported
+   *             flags:
+   *             i -> Case insensitive
+   * @return replaced input string
+   */
+  @ScalarFunction
+  public static String regexpReplace(String inputStr, String matchStr, String 
replaceStr, int matchStartPos,
+      int occurence, String flag) {
+    Integer patternFlag;
+
+    // TODO: Support more flags like MULTILINE, COMMENTS, etc.
+    switch (flag) {
+      case "i":
+        patternFlag = Pattern.CASE_INSENSITIVE;
+        break;
+      default:
+        patternFlag = null;
+        break;
+    }
+
+    Pattern p;
+    if (patternFlag != null) {
+      p = Pattern.compile(matchStr, patternFlag);
+    } else {
+      p = Pattern.compile(matchStr);
+    }
+
+    Matcher matcher = p.matcher(inputStr).region(matchStartPos, 
inputStr.length());
+    StringBuffer sb;
+
+    if (occurence >= 0) {
+      sb = new StringBuffer(inputStr);
+      while (occurence >= 0 && matcher.find()) {
+        if (occurence == 0) {
+          sb.replace(matcher.start(), matcher.end(), replaceStr);
+          break;
+        }
+        occurence--;
+      }
+    } else {
+      sb = new StringBuffer();
+      while (matcher.find()) {
+        matcher.appendReplacement(sb, replaceStr);
+      }
+      matcher.appendTail(sb);
+    }
+
+    return sb.toString();
+  }
+
+  /**
+   * See #regexpReplace(String, String, String, int, int, String). Matches 
against entire inputStr and replaces all
+   * occurences. Match is performed in case-sensitive mode.
+   *
+   * @param inputStr Input string to apply the regexpReplace
+   * @param matchStr Regexp or string to match against inputStr
+   * @param replaceStr Regexp or string to replace if matchStr is found
+   * @return replaced input string
+   */
+  @ScalarFunction
+  public static String regexpReplace(String inputStr, String matchStr, String 
replaceStr) {
+    return regexpReplace(inputStr, matchStr, replaceStr, 0, -1, "");
+  }
+
+  /**
+   * See #regexpReplace(String, String, String, int, int, String). Matches 
against entire inputStr and replaces all
+   * occurences. Match is performed in case-sensitive mode.
+   *
+   * @param inputStr Input string to apply the regexpReplace
+   * @param matchStr Regexp or string to match against inputStr
+   * @param replaceStr Regexp or string to replace if matchStr is found
+   * @param matchStartPos Index of inputStr from where matching should start. 
Default is 0.
+   * @return replaced input string
+   */
+  @ScalarFunction
+  public static String regexpReplace(String inputStr, String matchStr, String 
replaceStr, int matchStartPos) {
+    return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, -1, 
"");
+  }
+
+  /**
+   * See #regexpReplace(String, String, String, int, int, String). Match is 
performed in case-sensitive mode.
+   *
+   * @param inputStr Input string to apply the regexpReplace
+   * @param matchStr Regexp or string to match against inputStr
+   * @param replaceStr Regexp or string to replace if matchStr is found
+   * @param matchStartPos Index of inputStr from where matching should start. 
Default is 0.
+   * @param occurence Controls which occurence of the matched pattern must be 
replaced. Counting starts
+   *                    at 0. Default is -1
+   * @return replaced input string
+   */
+  @ScalarFunction
+  public static String regexpReplace(String inputStr, String matchStr, String 
replaceStr, int matchStartPos,
+      int occurence) {
+    return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, 
occurence, "");
+  }
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index c95e3d91e5..053af685ef 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -88,6 +88,7 @@ import static 
org.apache.pinot.common.function.scalar.StringFunctions.*;
 import static 
org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_CHECK_INTERVAL_MS;
 import static 
org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_ONLINE_SEGMENTS_MAX_WAIT_MS;
 import static org.testng.Assert.*;
+import static org.testng.Assert.assertEquals;
 
 
 /**
@@ -590,6 +591,124 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     assertEquals(oneHourAgoTodayStr, expectedOneHourAgoTodayStr);
   }
 
+  @Test
+  public void testRegexpReplace() throws Exception {
+    // Correctness tests of regexpReplace.
+
+    // Test replace all.
+    String sqlQuery = "SELECT regexpReplace('CA', 'C', 'TEST')";
+    JsonNode response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    String result = 
response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "TESTA");
+
+    sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'X')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "fooXarXaz");
+
+    sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'XY')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "fooXYarXYaz");
+
+    sqlQuery = "SELECT regexpReplace('Argentina', '(.)', '$1 ')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "A r g e n t i n a ");
+
+    sqlQuery = "SELECT regexpReplace('Pinot is  blazing  fast', '( ){2,}', ' 
')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "Pinot is blazing fast");
+
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, and wise','\\w+thy', 
'something')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "something, something, and wise");
+
+    sqlQuery = "SELECT 
regexpReplace('11234567898','(\\d)(\\d{3})(\\d{3})(\\d{4})', '$1-($2) $3-$4')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "1-(123) 456-7898");
+
+    // Test replace starting at index.
+
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+thy', 'something', 4)";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "healthy, something, something and wise");
+
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+thy', 'something', 1)";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "hsomething, something, something and wise");
+
+
+    // Test occurence
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+thy', 'something', 0, 2)";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "healthy, wealthy, something and wise");
+
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+thy', 'something', 0, 0)";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "something, wealthy, stealthy and wise");
+
+    // Test flags
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+tHy', 'something', 0, 0, 'i')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "something, wealthy, stealthy and wise");
+
+    // Negative test. Pattern match not found.
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+tHy', 'something')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+    // Negative test. Pattern match not found.
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+tHy', 'something', 3, 21, 'i')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+    // Negative test - incorrect flag
+    sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and 
wise','\\w+tHy', 'something', 3, 12, 'xyz')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    result = response.get("resultTable").get("rows").get(0).get(0).asText();
+    assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+    // Test in select clause with column values
+    sqlQuery = "SELECT regexpReplace(DestCityName, ' ', '', 0, -1, 'i') from 
myTable where OriginState = 'CA'";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    JsonNode rows = response.get("resultTable").get("rows");
+    for (int i = 0; i < rows.size(); i++) {
+      JsonNode row = rows.get(i);
+      boolean containsSpace = row.get(0).asText().contains(" ");
+      assertEquals(containsSpace, false);
+    }
+
+    // Test in where clause
+    sqlQuery = "SELECT count(*) from myTable where regexpReplace(originState, 
'[VC]A', 'TEST') = 'TEST'";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    int count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+    sqlQuery = "SELECT count(*) from myTable where originState='CA' or 
originState='VA'";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    int count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+    assertEquals(count1, count2);
+
+    // Test nested transform
+    sqlQuery = "SELECT count(*) from myTable where 
contains(regexpReplace(originState, '(C)(A)', '$1TEST$2'), "
+        + "'CTESTA')";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+    sqlQuery = "SELECT count(*) from myTable where originState='CA'";
+    response = postQuery(sqlQuery, _brokerBaseApiUrl);
+    count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+    assertEquals(count1, count2);
+  }
+
   @Test
   public void testUrlFunc()
       throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to