This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new dbbf44ce15 Add splitPartWithLimit and splitPartFromEnd UDFs (#12437)
dbbf44ce15 is described below
commit dbbf44ce153bbe72fb3161f1977d0af1b403e06e
Author: deemoliu <[email protected]>
AuthorDate: Thu Apr 18 15:54:59 2024 -0700
Add splitPartWithLimit and splitPartFromEnd UDFs (#12437)
---
.../common/function/scalar/StringFunctions.java | 26 ++++++++++--
.../function/scalar/StringFunctionsTest.java | 48 ++++++++++++++++++++++
2 files changed, 71 insertions(+), 3 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 8ce77e8ccb..374917ec99 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -49,7 +49,6 @@ public class StringFunctions {
private final static Pattern LTRIM = Pattern.compile("^\\s+");
private final static Pattern RTRIM = Pattern.compile("\\s+$");
-
/**
* @see StringUtils#reverse(String)
* @param input
@@ -585,14 +584,35 @@ public class StringFunctions {
* TODO: Revisit if index should be one-based (both Presto and Postgres use
one-based index, which starts with 1)
* @param input
* @param delimiter
- * @param index
+ * @param index we allow negative value for index which indicates the index
from the end.
* @return splits string on specified delimiter and returns String at
specified index from the split.
*/
@ScalarFunction(names = {"splitPart", "split_part"})
public static String splitPart(String input, String delimiter, int index) {
String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter);
- if (index < splitString.length) {
+ if (index >= 0 && index < splitString.length) {
+ return splitString[index];
+ } else if (index < 0 && index >= -splitString.length) {
+ return splitString[splitString.length + index];
+ } else {
+ return "null";
+ }
+ }
+
+ /**
+ * @param input the input String to be split into parts.
+ * @param delimiter the specified delimiter to split the input string.
+ * @param limit the max count of parts that the input string can be splitted
into.
+ * @param index the specified index for the splitted parts to be returned.
+ * @return splits string on the delimiter with the limit count and returns
String at specified index from the split.
+ */
+ @ScalarFunction
+ public static String splitPart(String input, String delimiter, int limit,
int index) {
+ String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter,
limit);
+ if (index >= 0 && index < splitString.length) {
return splitString[index];
+ } else if (index < 0 && index >= -splitString.length) {
+ return splitString[splitString.length + index];
} else {
return "null";
}
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
index 9129ccdc37..d75b8ada43 100644
---
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
+++
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
@@ -26,6 +26,47 @@ import static org.testng.Assert.assertEquals;
public class StringFunctionsTest {
+ @DataProvider(name = "splitPartTestCases")
+ public static Object[][] splitPartTestCases() {
+ return new Object[][]{
+ {"org.apache.pinot.common.function", ".", 0, 100, "org", "org"},
+ {"org.apache.pinot.common.function", ".", 10, 100, "null", "null"},
+ {"org.apache.pinot.common.function", ".", 1, 0, "apache", "apache"},
+ {"org.apache.pinot.common.function", ".", 1, 1, "apache", "null"},
+ {"org.apache.pinot.common.function", ".", 0, 1, "org",
"org.apache.pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", 1, 2, "apache",
"apache.pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", 2, 3, "pinot",
"pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", 3, 4, "common",
"common.function"},
+ {"org.apache.pinot.common.function", ".", 4, 5, "function",
"function"},
+ {"org.apache.pinot.common.function", ".", 5, 6, "null", "null"},
+ {"org.apache.pinot.common.function", ".", 3, 3, "common", "null"},
+ {"+++++", "+", 0, 100, "", ""},
+ {"+++++", "+", 1, 100, "null", "null"},
+ // note that splitPart will split with limit first, then lookup by
index from START or END.
+ {"org.apache.pinot.common.function", ".", -1, 100, "function",
"function"},
+ {"org.apache.pinot.common.function", ".", -10, 100, "null", "null"},
+ {"org.apache.pinot.common.function", ".", -2, 0, "common", "common"},
// Case: limit=0 is not taking effect.
+ {"org.apache.pinot.common.function", ".", -1, 1, "function",
"org.apache.pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", -2, 1, "common", "null"},
+ {"org.apache.pinot.common.function", ".", -1, 2, "function",
"apache.pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", -2, 2, "common", "org"},
+ {"org.apache.pinot.common.function", ".", -1, 3, "function",
"pinot.common.function"},
+ {"org.apache.pinot.common.function", ".", -3, 3, "pinot", "org"},
+ {"org.apache.pinot.common.function", ".", -4, 3, "apache", "null"},
+ {"org.apache.pinot.common.function", ".", -1, 4, "function",
"common.function"},
+ {"org.apache.pinot.common.function", ".", -3, 4, "pinot", "apache"},
+ {"org.apache.pinot.common.function", ".", -4, 4, "apache", "org"},
+ {"org.apache.pinot.common.function", ".", -1, 5, "function",
"function"},
+ {"org.apache.pinot.common.function", ".", -5, 5, "org", "org"},
+ {"org.apache.pinot.common.function", ".", -6, 5, "null", "null"},
+ {"org.apache.pinot.common.function", ".", -1, 6, "function",
"function"},
+ {"org.apache.pinot.common.function", ".", -5, 6, "org", "org"},
+ {"org.apache.pinot.common.function", ".", -6, 6, "null", "null"},
+ {"+++++", "+", -1, 100, "", ""},
+ {"+++++", "+", -2, 100, "null", "null"},
+ };
+ }
+
@DataProvider(name = "isJson")
public static Object[][] isJsonTestCases() {
return new Object[][]{
@@ -40,4 +81,11 @@ public class StringFunctionsTest {
public void testIsJson(String input, boolean expectedValue) {
assertEquals(StringFunctions.isJson(input), expectedValue);
}
+
+ @Test(dataProvider = "splitPartTestCases")
+ public void testSplitPart(String input, String delimiter, int index, int
limit, String expectedToken,
+ String expectedTokenWithLimitCounts) {
+ assertEquals(StringFunctions.splitPart(input, delimiter, index),
expectedToken);
+ assertEquals(StringFunctions.splitPart(input, delimiter, limit, index),
expectedTokenWithLimitCounts);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]