This is an automated email from the ASF dual-hosted git repository.

pvillard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new efb54cb39f NIFI-15628 - Add unique() expression language function
efb54cb39f is described below

commit efb54cb39ffca8347ca5d79076dcbfc62a0a93ad
Author: mcducks <[email protected]>
AuthorDate: Wed Feb 25 22:34:15 2026 +1100

    NIFI-15628 - Add unique() expression language function
    
    - Added UniqueEvaluator for removing duplicates from delimited strings
    - Preserves order of first occurrence using LinkedHashSet
    - Handles special regex characters with Pattern.quote()
    - Empty strings treated as distinct values
    - Added unit tests
    - Updated ANTLR grammar (lexer and parser)
    - Integrated into ExpressionCompiler with proper type coercion
    - Added documentation to expression language guide
    
    This closes #10923.
    
    Signed-off-by: Pierre Villard <[email protected]>
---
 .../language/antlr/AttributeExpressionLexer.g      |  1 +
 .../language/antlr/AttributeExpressionParser.g     |  2 +-
 .../language/compile/ExpressionCompiler.java       |  8 ++
 .../evaluation/functions/UniqueEvaluator.java      | 95 ++++++++++++++++++++++
 .../attribute/expression/language/TestQuery.java   | 67 +++++++++++++++
 .../main/asciidoc/expression-language-guide.adoc   | 35 ++++++++
 6 files changed, 207 insertions(+), 1 deletion(-)

diff --git 
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
 
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
index 0905f4119a..98c498988d 100644
--- 
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
+++ 
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g
@@ -219,6 +219,7 @@ REPEAT : 'repeat';
 UUID3 : 'UUID3';
 UUID5 : 'UUID5';
 HASH : 'hash';
+UNIQUE : 'unique';
 
 // 2 arg functions
 SUBSTRING      : 'substring';
diff --git 
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
 
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
index 6c8226eda5..bb4942124b 100644
--- 
a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
+++ 
b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionParser.g
@@ -77,7 +77,7 @@ tokens {
 // functions that return Strings
 zeroArgString : (TO_UPPER | TO_LOWER | TRIM | TO_STRING | URL_ENCODE | 
URL_DECODE | BASE64_ENCODE | BASE64_DECODE | ESCAPE_JSON | ESCAPE_XML | 
ESCAPE_CSV | ESCAPE_HTML3 | ESCAPE_HTML4 | UNESCAPE_JSON | UNESCAPE_XML | 
UNESCAPE_CSV | UNESCAPE_HTML3 | UNESCAPE_HTML4 | EVALUATE_EL_STRING) LPAREN! 
RPAREN!;
 oneArgString : ((SUBSTRING_BEFORE | SUBSTRING_BEFORE_LAST | SUBSTRING_AFTER | 
SUBSTRING_AFTER_LAST | REPLACE_NULL | REPLACE_EMPTY | REPLACE_BY_PATTERN |
-                               PREPEND | APPEND | STARTS_WITH | ENDS_WITH | 
CONTAINS | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | UUID5 | 
HASH) LPAREN! anyArg RPAREN!) |
+                               PREPEND | APPEND | STARTS_WITH | ENDS_WITH | 
CONTAINS | UNIQUE | JOIN | JSON_PATH | JSON_PATH_DELETE | FROM_RADIX | UUID3 | 
UUID5 | HASH) LPAREN! anyArg RPAREN!) |
                           (TO_RADIX LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
 twoArgString : ((REPLACE | REPLACE_FIRST | REPLACE_ALL | IF_ELSE | 
JSON_PATH_SET | JSON_PATH_ADD) LPAREN! anyArg COMMA! anyArg RPAREN!) |
                           ((SUBSTRING | FORMAT | FORMAT_INSTANT | PAD_LEFT | 
PAD_RIGHT | REPEAT) LPAREN! anyArg (COMMA! anyArg)? RPAREN!);
diff --git 
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
 
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
index b1b5e08dd4..4a7d85a9ad 100644
--- 
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
+++ 
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/compile/ExpressionCompiler.java
@@ -119,6 +119,7 @@ import 
org.apache.nifi.attribute.expression.language.evaluation.functions.ToRadi
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.ToStringEvaluator;
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.ToUpperEvaluator;
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.TrimEvaluator;
+import 
org.apache.nifi.attribute.expression.language.evaluation.functions.UniqueEvaluator;
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.UrlDecodeEvaluator;
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.UrlEncodeEvaluator;
 import 
org.apache.nifi.attribute.expression.language.evaluation.functions.Uuid3Evaluator;
@@ -263,6 +264,7 @@ import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpre
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_HTML4;
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_JSON;
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNESCAPE_XML;
+import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UNIQUE;
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.URL_DECODE;
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.URL_ENCODE;
 import static 
org.apache.nifi.attribute.expression.language.antlr.AttributeExpressionParser.UUID;
@@ -1085,6 +1087,12 @@ public class ExpressionCompiler {
                     toStringEvaluator(argEvaluators.get(0), "argument to 
return if true"),
                     toStringEvaluator(argEvaluators.get(1), "argument to 
return if false")), "ifElse");
             }
+            case UNIQUE: {
+                verifyArgCount(argEvaluators, 1, "unique");
+                return addToken(new UniqueEvaluator(
+                        toStringEvaluator(subjectEvaluator),
+                        toStringEvaluator(argEvaluators.get(0), "first 
argument to unique")), "unique");
+            }
             default:
                 throw new 
AttributeExpressionLanguageParsingException("Expected a Function-type 
expression but got " + tree.toString());
         }
diff --git 
a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
 
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
new file mode 100644
index 0000000000..8cde2bf7ba
--- /dev/null
+++ 
b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/functions/UniqueEvaluator.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.attribute.expression.language.evaluation.functions;
+
+import org.apache.nifi.attribute.expression.language.EvaluationContext;
+import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
+import org.apache.nifi.attribute.expression.language.evaluation.QueryResult;
+import 
org.apache.nifi.attribute.expression.language.evaluation.StringEvaluator;
+import 
org.apache.nifi.attribute.expression.language.evaluation.StringQueryResult;
+
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * UniqueEvaluator removes duplicate values from a delimited string while 
preserving
+ * the order of first occurrence.
+ *
+ * <p>This evaluator takes a separator as an argument and returns the unique 
values
+ * from the subject string. The order of values is preserved based on their 
first
+ * appearance in the original string.</p>
+ *
+ * <p>Examples:</p>
+ * <ul>
+ *   <li>"red,blue,red,green" with separator "," returns "red,blue,green"</li>
+ *   <li>"a::b::a::c" with separator "::" returns "a::b::c"</li>
+ *   <li>"test" with separator "," returns "test" (no duplicates)</li>
+ * </ul>
+ *
+ * <p>Special cases:</p>
+ * <ul>
+ *   <li>If the subject is null, returns empty string</li>
+ *   <li>If the separator is null or empty, returns the original subject 
unchanged</li>
+ *   <li>Empty strings are treated as distinct values (e.g., "a,,b,," with 
separator "," returns "a,,b")</li>
+ * </ul>
+ */
+public class UniqueEvaluator extends StringEvaluator {
+
+    private final Evaluator<String> subject;
+    private final Evaluator<String> separator;
+
+    /**
+     * Constructs a new UniqueEvaluator.
+     *
+     * @param subject the evaluator that provides the delimited string to 
process
+     * @param separator the evaluator that provides the delimiter to use for 
splitting and joining
+     */
+    public UniqueEvaluator(final Evaluator<String> subject, final 
Evaluator<String> separator) {
+        this.subject = subject;
+        this.separator = separator;
+    }
+
+    @Override
+    public QueryResult<String> evaluate(final EvaluationContext 
evaluationContext) {
+        final String subjectValue = 
subject.evaluate(evaluationContext).getValue();
+        if (subjectValue == null) {
+            return new StringQueryResult("");
+        }
+
+        final String separatorValue = 
separator.evaluate(evaluationContext).getValue();
+        if (separatorValue == null || separatorValue.isEmpty()) {
+            return new StringQueryResult(subjectValue);
+        }
+
+        // Split the subject by the separator, using Pattern.quote to handle 
special regex characters
+        final String[] parts = 
subjectValue.split(Pattern.quote(separatorValue), -1);
+
+        // Use LinkedHashSet to maintain insertion order while removing 
duplicates
+        final Set<String> uniqueValues = new 
LinkedHashSet<>(Arrays.asList(parts));
+
+        // Join the unique values back together with the separator
+        final String result = String.join(separatorValue, uniqueValues);
+        return new StringQueryResult(result);
+    }
+
+    @Override
+    public Evaluator<?> getSubjectEvaluator() {
+        return subject;
+    }
+}
diff --git 
a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
 
b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
index 77ade9117c..9e213e6456 100644
--- 
a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
+++ 
b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java
@@ -2689,4 +2689,71 @@ public class TestQuery {
             return 0;
         }
     }
+
+    @Test
+    public void testUnique() {
+        final Map<String, String> attributes = new HashMap<>();
+
+        // Test basic comma-separated list
+        attributes.put("list", "apple,banana,apple,orange,banana,grape");
+        verifyEquals("${list:unique(',')}", attributes, 
"apple,banana,orange,grape");
+
+        // Test pipe-separated list
+        attributes.put("pipe_list", "red|blue|red|green|blue|yellow");
+        verifyEquals("${pipe_list:unique('|')}", attributes, 
"red|blue|green|yellow");
+
+        // Test with spaces
+        attributes.put("space_list", "one two one three two four");
+        verifyEquals("${space_list:unique(' ')}", attributes, "one two three 
four");
+
+        // Test with empty values in list
+        attributes.put("empty_values_in_list", "a,b,,c,,d,b");
+        verifyEquals("${empty_values_in_list:unique(',')}", attributes, 
"a,b,,c,d");
+
+        // Test with single value
+        attributes.put("single", "only_one");
+        verifyEquals("${single:unique(',')}", attributes, "only_one");
+
+        // Test with no duplicates
+        attributes.put("no_dups", "x,y,z");
+        verifyEquals("${no_dups:unique(',')}", attributes, "x,y,z");
+
+        // Test with all duplicates
+        attributes.put("all_dups", "same,same,same,same");
+        verifyEquals("${all_dups:unique(',')}", attributes, "same");
+
+        // Test with multi-character separator
+        attributes.put("multi_sep", "one::two::one::three::two");
+        verifyEquals("${multi_sep:unique('::')}", attributes, 
"one::two::three");
+
+        // Test with special characters in separator
+        attributes.put("special_sep", "a|b|a|c|b");
+        verifyEquals("${special_sep:unique('|')}", attributes, "a|b|c");
+
+        // Test with empty string separator (should return original)
+        attributes.put("test_list", "abc");
+        verifyEquals("${test_list:unique('')}", attributes, "abc");
+
+        // Test with empty string
+        attributes.put("empty_attr", "");
+        verifyEquals("${empty_attr:unique(',')}", attributes, "");
+
+        // Test with null attribute
+        verifyEquals("${missing_attr:unique(',')}", attributes, "");
+
+        attributes.put("ordered", "3,1,4,1,5,9,2,6,5,3,5");
+        verifyEquals("${ordered:unique(',')}", attributes, "3,1,4,5,9,2,6");
+
+        // Test with URLs
+        attributes.put("urls", 
"http://example.com,http://test.com,http://example.com";);
+        verifyEquals("${urls:unique(',')}", attributes, 
"http://example.com,http://test.com";);
+
+        // Test with file paths (Windows-style)
+        attributes.put("paths", 
"C:\\Users\\test;D:\\Data;C:\\Users\\test;E:\\Backup");
+        verifyEquals("${paths:unique(';')}", attributes, 
"C:\\Users\\test;D:\\Data;E:\\Backup");
+
+        // Test with dash separator (replaced tab test)
+        attributes.put("dash_separated", "field1-field2-field1-field3");
+        verifyEquals("${dash_separated:unique('-')}", attributes, 
"field1-field2-field3");
+    }
 }
diff --git a/nifi-docs/src/main/asciidoc/expression-language-guide.adoc 
b/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
index 595b7be996..63f48e393c 100644
--- a/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
+++ b/nifi-docs/src/main/asciidoc/expression-language-guide.adoc
@@ -880,6 +880,41 @@ then the following Expressions will result in the 
following values:
 
 
 
+[.function]
+=== unique
+
+*Description*: [.description]#Returns the unique values from a delimited 
string while preserving the order of first occurrence. The function takes a 
delimiter as an argument and returns the deduplicated values joined by the same 
delimiter.#
+
+*Subject Type*: [.subject]#String#
+
+*Arguments*:
+
+- [.argName]#_delimiter_# : [.argDesc]#The delimiter used to split and join 
the subject string#
+
+*Return Type*: [.returnType]#String#
+
+*Examples*: If the "tags" attribute has the value 
"red,blue,red,green,blue,yellow", then the following
+Expressions will result in the following values:
+
+.Unique Examples
+|===================================================================
+| Expression | Value
+| `${tags:unique(',')}` | `red,blue,green,yellow`
+| `${tags:unique(';')}` | `red,blue,red,green,blue,yellow`
+|===================================================================
+
+If the "paths" attribute has the value 
"C:\Users\test;D:\Data;C:\Users\test;E:\Backup", then:
+
+.Path Deduplication
+|===================================================================
+| Expression | Value
+| `${paths:unique(';')}` | `C:\Users\test;D:\Data;E:\Backup`
+|===================================================================
+
+*Special Cases*: If the subject is null, returns an empty string. If the 
delimiter is null or empty, returns the original subject unchanged. Empty 
values are preserved (e.g., "a,,b,,c" becomes "a,,b,c").
+
+
+
 [.function]
 === append
 

Reply via email to