This is an automated email from the ASF dual-hosted git repository. chriss pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push: new c28d040bca NIFI-12118: refactored RemoveRecordPath member variable that was caching values, and improve performance with Pattern.matcher().find() instead of .match(). c28d040bca is described below commit c28d040bcabb9cc8f716d8e8701fad63b1a3166f Author: Mark Payne <marka...@hotmail.com> AuthorDate: Fri Sep 22 17:25:14 2023 -0400 NIFI-12118: refactored RemoveRecordPath member variable that was caching values, and improve performance with Pattern.matcher().find() instead of .match(). This closes #7783 Signed-off-by: Chris Sampson <chris.sampso...@gmail.com> --- .../nifi/record/path/RecordFieldRemover.java | 37 +++++++++++----------- .../processors/standard/RemoveRecordField.java | 33 ++++++++----------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/RecordFieldRemover.java b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/RecordFieldRemover.java index 579e14c01a..02d9f522ac 100644 --- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/RecordFieldRemover.java +++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/RecordFieldRemover.java @@ -17,15 +17,14 @@ package org.apache.nifi.record.path; -import org.apache.nifi.record.path.util.RecordPathCache; -import org.apache.nifi.serialization.record.Record; -import org.apache.nifi.serialization.record.RecordFieldRemovalPath; -import org.apache.nifi.serialization.record.RecordSchema; - import java.util.List; import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; +import org.apache.nifi.record.path.util.RecordPathCache; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordFieldRemovalPath; +import org.apache.nifi.serialization.record.RecordSchema; public class RecordFieldRemover { private final RecordPathCache recordPathCache; @@ -50,20 +49,22 @@ public class RecordFieldRemover { final RecordPathResult recordPathResult = recordPath.evaluate(record); final List<FieldValue> selectedFields = recordPathResult.getSelectedFields().collect(Collectors.toList()); - if (!selectedFields.isEmpty()) { - if (recordPathRemovalProperties.isAppliedToAllElementsInCollection()) { - // all elements have the same parent, so navigate up from the first element in the collection - selectedFields.get(0).getParent().ifPresent(FieldValue::removeContent); - } else { - selectedFields.forEach(FieldValue::remove); - } + if (selectedFields.isEmpty()) { + return; + } - if (recordPathRemovalProperties.isRemovingFieldsNotJustElementsFromWithinCollection()) { - removeFieldsFromSchema(selectedFields); - } + if (recordPathRemovalProperties.isAppliedToAllElementsInCollection()) { + // all elements have the same parent, so navigate up from the first element in the collection + selectedFields.get(0).getParent().ifPresent(FieldValue::removeContent); + } else { + selectedFields.forEach(FieldValue::remove); + } - fieldsChanged = true; + if (recordPathRemovalProperties.isRemovingFieldsNotJustElementsFromWithinCollection()) { + removeFieldsFromSchema(selectedFields); } + + fieldsChanged = true; } private void removeFieldsFromSchema(final List<FieldValue> selectedFields) { @@ -92,7 +93,7 @@ public class RecordFieldRemover { } public static class RecordPathRemovalProperties { - private static final Pattern ALL_ELEMENTS_REGEX = Pattern.compile(".*\\[\\s*(?:\\*|0\\s*\\.\\.\\s*-1)\\s*]$"); + private static final Pattern ALL_ELEMENTS_REGEX = Pattern.compile("\\[\\s*(?:\\*|0\\s*\\.\\.\\s*-1)\\s*]$"); private static final Pattern ARRAY_ELEMENTS_REGEX = Pattern.compile("\\[\\s*-?\\d+(?:\\s*,\\s*-?\\d+)*+\\s*]"); private static final Pattern MAP_ELEMENTS_REGEX = Pattern.compile("\\[\\s*'[^']+'(?:\\s*,\\s*'[^']+')*+\\s*]"); @@ -106,7 +107,7 @@ public class RecordFieldRemover { this.recordPath = recordPath; // ends with [*] or [0..-1] - this.appliedToAllElementsInCollection = ALL_ELEMENTS_REGEX.matcher(recordPath).matches(); + this.appliedToAllElementsInCollection = ALL_ELEMENTS_REGEX.matcher(recordPath).find(); // contains an array reference [] with one or more element references, e.g. [1], [ 1, -1] this.appliedToIndividualArrayElements = ARRAY_ELEMENTS_REGEX.matcher(recordPath).find(); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/RemoveRecordField.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/RemoveRecordField.java index 7a5cccec71..c1d38c4326 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/RemoveRecordField.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/RemoveRecordField.java @@ -17,6 +17,10 @@ package org.apache.nifi.processors.standard; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; @@ -40,11 +44,6 @@ import org.apache.nifi.record.path.util.RecordPathCache; import org.apache.nifi.record.path.validation.RecordPathValidator; import org.apache.nifi.serialization.record.Record; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - @SideEffectFree @SupportsBatching @@ -65,7 +64,6 @@ import java.util.List; @SeeAlso({UpdateRecord.class}) public class RemoveRecordField extends AbstractRecordProcessor { private volatile RecordPathCache recordPathCache; - private volatile List<RecordFieldRemover.RecordPathRemovalProperties> recordPathsToRemove; private static final String ROOT_PATH = "/"; @@ -111,24 +109,21 @@ public class RemoveRecordField extends AbstractRecordProcessor { @OnScheduled public void collectRecordPaths(final ProcessContext context) { recordPathCache = new RecordPathCache(context.getProperties().size() * 2); - - recordPathsToRemove = null; } @Override protected Record process(final Record record, final FlowFile flowFile, final ProcessContext context, final long count) { - if (recordPathsToRemove == null) { - recordPathsToRemove = new ArrayList<>(context.getProperties().size()); - context.getProperties().keySet().forEach(property -> { - if (property.isDynamic()) { - // validate RecordPath from Expression Language (if applicable) - final String recordPath = context.getProperty(property).evaluateAttributeExpressions(flowFile).getValue(); - if (ROOT_PATH.equals(recordPath)) { - throw new ProcessException(String.format("The root Record Path %s cannot be removed for %s", ROOT_PATH, property.getDisplayName())); - } - recordPathsToRemove.add(new RecordFieldRemover.RecordPathRemovalProperties(recordPath)); + final List<RecordFieldRemover.RecordPathRemovalProperties> recordPathsToRemove = new ArrayList<>(); + for (final PropertyDescriptor property : context.getProperties().keySet()) { + if (property.isDynamic()) { + // validate RecordPath from Expression Language (if applicable) + final String recordPath = context.getProperty(property).evaluateAttributeExpressions(flowFile).getValue(); + if (ROOT_PATH.equals(recordPath)) { + throw new ProcessException(String.format("The root Record Path %s cannot be removed for %s", ROOT_PATH, property.getDisplayName())); } - }); + + recordPathsToRemove.add(new RecordFieldRemover.RecordPathRemovalProperties(recordPath)); + } } final RecordFieldRemover recordFieldRemover = new RecordFieldRemover(record, recordPathCache);