This is an automated email from the ASF dual-hosted git repository.
pvillard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new efb54cb39f NIFI-15628 - Add unique() expression language function
efb54cb39f is described below
commit efb54cb39ffca8347ca5d79076dcbfc62a0a93ad
Author: mcducks <[email protected]>
AuthorDate: Wed Feb 25 22:34:15 2026 +1100
NIFI-15628 - Add unique() expression language function
- Added UniqueEvaluator for removing duplicates from delimited strings
- Preserves order of first occurrence using LinkedHashSet
- Handles special regex characters with Pattern.quote()
- Empty strings treated as distinct values
- Added unit tests
- Updated ANTLR grammar (lexer and parser)
- Integrated into ExpressionCompiler with proper type coercion
- Added documentation to expression language guide
This closes #10923.
Signed-off-by: Pierre Villard <[email protected]>
---
.../language/antlr/AttributeExpressionLexer.g | 1 +
.../language/antlr/AttributeExpressionParser.g | 2 +-
.../language/compile/ExpressionCompiler.java | 8 ++
.../evaluation/functions/UniqueEvaluator.java | 95 ++++++++++++++++++++++
.../attribute/expression/language/TestQuery.java | 67 +++++++++++++++
.../main/asciidoc/expression-language-guide.adoc | 35 ++++++++
6 files changed, 207 insertions(+), 1 deletion(-)
diff --git
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
index 0905f4119a..98c498988d 100644
---
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
+++
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
@@ -219,6 +219,7 @@ REPEAT : 'repeat';
UUID3 : 'UUID3';
UUID5 : 'UUID5';
HASH : 'hash';
+UNIQUE : 'unique';
// 2 arg functions
SUBSTRING : 'substring';
diff --git
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
index 6c8226eda5..bb4942124b 100644
---
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
+++
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
@@ -77,7 +77,7 @@ tokens {
// functions that return Strings
zeroArgString : (TO_UPPER | TO_LOWER | TRIM | TO_STRING | URL_ENCODE |
URL_DECODE | BASE64_ENCODE | BASE64_DECODE | ESCAPE_JSON | ESCAPE_XML |
ESCAPE_CSV | ESCAPE_HTML3 | ESCAPE_HTML4 | UNESCAPE_JSON | UNESCAPE_XML |
UNESCAPE_CSV | UNESCAPE_HTML3 | UNESCAPE_HTML4 | EVALUATE_EL_STRING) LPAREN!
RPAREN!;
oneArgString : ((SUBSTRING_BEFORE | SUBSTRING_BEFORE_LAST | SUBSTRING_AFTER |
SUBSTRING_AFTER_LAST | REPLACE_NULL | REPLACE_EMPTY | REPLACE_BY_PATTERN |
- PREPEND | APPEND | STARTS_WITH | ENDS_WITH |
CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5 |
HASH) LPAREN! anyArg RPAREN!) |
+ PREPEND | APPEND | STARTS_WITH | ENDS_WITH |
CONTAINS | UNIQUE | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 |
UUID5 | HASH) LPAREN! anyArg RPAREN!) |
(TO_RADIX LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
twoArgString : ((REPLACE | REPLACE_FIRST | REPLACE_ALL | IF_ELSE |
JSON_PATH_SET | JSON_PATH_ADD) LPAREN! anyArg COMMA! anyArg RPAREN!) |
((SUBSTRING | FORMAT | FORMAT_INSTANT | PAD_LEFT |
PAD_RIGHT | REPEAT) LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
diff --git
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
index b1b5e08dd4..4a7d85a9ad 100644
---
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
+++
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
@@ -119,6 +119,7 @@ import
org.apache.nifi.attribute.expression.language.evaluation.functions.ToRadi
import
org.apache.nifi.attribute.expression.language.evaluation.functions.ToStringEvaluator;
import
org.apache.nifi.attribute.expression.language.evaluation.functions.ToUpperEvaluator;
import
org.apache.nifi.attribute.expression.language.evaluation.functions.TrimEvaluator;
+import
org.apache.nifi.attribute.expression.language.evaluation.functions.UniqueEvaluator;
import
org.apache.nifi.attribute.expression.language.evaluation.functions.UrlDecodeEvaluator;
import
org.apache.nifi.attribute.expression.language.evaluation.functions.UrlEncodeEvaluator;
import
org.apache.nifi.attribute.expression.language.evaluation.functions.Uuid3Evaluator;
@@ -263,6 +264,7 @@ import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpre
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_HTML4;
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_JSON;
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_XML;
+import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNIQUE;
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.URL_DECODE;
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.URL_ENCODE;
import static
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UUID;
@@ -1085,6 +1087,12 @@ public class ExpressionCompiler {
toStringEvaluator(argEvaluators.get(0), "argument to
return if true"),
toStringEvaluator(argEvaluators.get(1), "argument to
return if false")), "ifElse");
}
+ case UNIQUE: {
+ verifyArgCount(argEvaluators, 1, "unique");
+ return addToken(new UniqueEvaluator(
+ toStringEvaluator(subjectEvaluator),
+ toStringEvaluator(argEvaluators.get(0), "first
argument to unique")), "unique");
+ }
default:
throw new
AttributeExpressionLanguageParsingException("Expected a Function-type
expression but got " + tree.toString());
}
diff --git
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
new file mode 100644
index 0000000000..8cde2bf7ba
--- /dev/null
+++
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.attribute.expression.language.evaluation.functions;
+
+import org.apache.nifi.attribute.expression.language.EvaluationContext;
+import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
+import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
+import
org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
+
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * UniqueEvaluator removes duplicate values from a delimited string while
preserving
+ * the order of first occurrence.
+ *
+ * <p>This evaluator takes a separator as an argument and returns the unique
values
+ * from the subject string. The order of values is preserved based on their
first
+ * appearance in the original string.</p>
+ *
+ * <p>Examples:</p>
+ * <ul>
+ * <li>"red,blue,red,green" with separator "," returns "red,blue,green"</li>
+ * <li>"a::b::a::c" with separator "::" returns "a::b::c"</li>
+ * <li>"test" with separator "," returns "test" (no duplicates)</li>
+ * </ul>
+ *
+ * <p>Special cases:</p>
+ * <ul>
+ * <li>If the subject is null, returns empty string</li>
+ * <li>If the separator is null or empty, returns the original subject
unchanged</li>
+ * <li>Empty strings are treated as distinct values (e.g., "a,,b,," with
separator "," returns "a,,b")</li>
+ * </ul>
+ */
+public class UniqueEvaluator extends StringEvaluator {
+
+ private final Evaluator<String> subject;
+ private final Evaluator<String> separator;
+
+ /**
+ * Constructs a new UniqueEvaluator.
+ *
+ * @param subject the evaluator that provides the delimited string to
process
+ * @param separator the evaluator that provides the delimiter to use for
splitting and joining
+ */
+ public UniqueEvaluator(final Evaluator<String> subject, final
Evaluator<String> separator) {
+ this.subject = subject;
+ this.separator = separator;
+ }
+
+ @Override
+ public QueryResult<String> evaluate(final EvaluationContext
evaluationContext) {
+ final String subjectValue =
subject.evaluate(evaluationContext).getValue();
+ if (subjectValue == null) {
+ return new StringQueryResult("");
+ }
+
+ final String separatorValue =
separator.evaluate(evaluationContext).getValue();
+ if (separatorValue == null || separatorValue.isEmpty()) {
+ return new StringQueryResult(subjectValue);
+ }
+
+ // Split the subject by the separator, using Pattern.quote to handle
special regex characters
+ final String[] parts =
subjectValue.split(Pattern.quote(separatorValue), -1);
+
+ // Use LinkedHashSet to maintain insertion order while removing
duplicates
+ final Set<String> uniqueValues = new
LinkedHashSet<>(Arrays.asList(parts));
+
+ // Join the unique values back together with the separator
+ final String result = String.join(separatorValue, uniqueValues);
+ return new StringQueryResult(result);
+ }
+
+ @Override
+ public Evaluator<?> getSubjectEvaluator() {
+ return subject;
+ }
+}
diff --git
a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
index 77ade9117c..9e213e6456 100644
---
a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
+++
b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
@@ -2689,4 +2689,71 @@ public class TestQuery {
return 0;
}
}
+
+ @Test
+ public void testUnique() {
+ final Map<String, String> attributes = new HashMap<>();
+
+ // Test basic comma-separated list
+ attributes.put("list", "apple,banana,apple,orange,banana,grape");
+ verifyEquals("${list:unique(',')}", attributes,
"apple,banana,orange,grape");
+
+ // Test pipe-separated list
+ attributes.put("pipe_list", "red|blue|red|green|blue|yellow");
+ verifyEquals("${pipe_list:unique('|')}", attributes,
"red|blue|green|yellow");
+
+ // Test with spaces
+ attributes.put("space_list", "one two one three two four");
+ verifyEquals("${space_list:unique(' ')}", attributes, "one two three
four");
+
+ // Test with empty values in list
+ attributes.put("empty_values_in_list", "a,b,,c,,d,b");
+ verifyEquals("${empty_values_in_list:unique(',')}", attributes,
"a,b,,c,d");
+
+ // Test with single value
+ attributes.put("single", "only_one");
+ verifyEquals("${single:unique(',')}", attributes, "only_one");
+
+ // Test with no duplicates
+ attributes.put("no_dups", "x,y,z");
+ verifyEquals("${no_dups:unique(',')}", attributes, "x,y,z");
+
+ // Test with all duplicates
+ attributes.put("all_dups", "same,same,same,same");
+ verifyEquals("${all_dups:unique(',')}", attributes, "same");
+
+ // Test with multi-character separator
+ attributes.put("multi_sep", "one::two::one::three::two");
+ verifyEquals("${multi_sep:unique('::')}", attributes,
"one::two::three");
+
+ // Test with special characters in separator
+ attributes.put("special_sep", "a|b|a|c|b");
+ verifyEquals("${special_sep:unique('|')}", attributes, "a|b|c");
+
+ // Test with empty string separator (should return original)
+ attributes.put("test_list", "abc");
+ verifyEquals("${test_list:unique('')}", attributes, "abc");
+
+ // Test with empty string
+ attributes.put("empty_attr", "");
+ verifyEquals("${empty_attr:unique(',')}", attributes, "");
+
+ // Test with null attribute
+ verifyEquals("${missing_attr:unique(',')}", attributes, "");
+
+ attributes.put("ordered", "3,1,4,1,5,9,2,6,5,3,5");
+ verifyEquals("${ordered:unique(',')}", attributes, "3,1,4,5,9,2,6");
+
+ // Test with URLs
+ attributes.put("urls",
"http://example.com,http://test.com,http://example.com");
+ verifyEquals("${urls:unique(',')}", attributes,
"http://example.com,http://test.com");
+
+ // Test with file paths (Windows-style)
+ attributes.put("paths",
"C:\\Users\\test;D:\\Data;C:\\Users\\test;E:\\Backup");
+ verifyEquals("${paths:unique(';')}", attributes,
"C:\\Users\\test;D:\\Data;E:\\Backup");
+
+ // Test with dash separator (replaced tab test)
+ attributes.put("dash_separated", "field1-field2-field1-field3");
+ verifyEquals("${dash_separated:unique('-')}", attributes,
"field1-field2-field3");
+ }
}
diff --git a/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
b/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
index 595b7be996..63f48e393c 100644
--- a/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
+++ b/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
@@ -880,6 +880,41 @@ then the following Expressions will result in the
following values:
+[.function]
+=== unique
+
+*Description*: [.description]#Returns the unique values from a delimited
string while preserving the order of first occurrence. The function takes a
delimiter as an argument and returns the deduplicated values joined by the same
delimiter.#
+
+*Subject Type*: [.subject]#String#
+
+*Arguments*:
+
+- [.argName]#_delimiter_# : [.argDesc]#The delimiter used to split and join
the subject string#
+
+*Return Type*: [.returnType]#String#
+
+*Examples*: If the "tags" attribute has the value
"red,blue,red,green,blue,yellow", then the following
+Expressions will result in the following values:
+
+.Unique Examples
+|===================================================================
+| Expression | Value
+| `${tags:unique(',')}` | `red,blue,green,yellow`
+| `${tags:unique(';')}` | `red,blue,red,green,blue,yellow`
+|===================================================================
+
+If the "paths" attribute has the value
"C:\Users\test;D:\Data;C:\Users\test;E:\Backup", then:
+
+.Path Deduplication
+|===================================================================
+| Expression | Value
+| `${paths:unique(';')}` | `C:\Users\test;D:\Data;E:\Backup`
+|===================================================================
+
+*Special Cases*: If the subject is null, returns an empty string. If the
delimiter is null or empty, returns the original subject unchanged. Empty
values are preserved (e.g., "a,,b,,c" becomes "a,,b,c").
+
+
+
[.function]
=== append