[ https://issues.apache.org/jira/browse/NIFI-2747?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15949096#comment-15949096 ]
ASF GitHub Bot commented on NIFI-2747: -------------------------------------- Github user trixpan commented on a diff in the pull request: https://github.com/apache/nifi/pull/1619#discussion_r108931643 --- Diff: nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java --- @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.cybersecurity; + +import com.idealista.tlsh.TLSH; +import com.idealista.tlsh.exceptions.InsufficientComplexityException; +import info.debatty.java.spamsum.SpamSum; + +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.SideEffectFree; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.behavior.ReadsAttribute; +import org.apache.nifi.annotation.behavior.ReadsAttributes; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; + +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processors.standard.HashContent; + +import org.apache.nifi.stream.io.StreamUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + + +@EventDriven +@SideEffectFree +@SupportsBatching +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) +@Tags({"hashing", "fuzzy-hashing", "cyber-security"}) +@CapabilityDescription("Calculates a fuzzy/locality-sensitive hash value for the Content of a FlowFile and puts that " + + "hash value on the FlowFile as an attribute whose name is determined by the <Hash Attribute Name> property." + + "Note: this processor only offers non-criptographic hash algorithms. And it should be not be " + + "seen as a replacement to the HashContent processor") + +@SeeAlso({HashContent.class}) +@ReadsAttributes({@ReadsAttribute(attribute="", description="")}) +@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the " + + "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")}) + +public class FuzzyHashContent extends AbstractProcessor { + + public static final AllowableValue allowableValueSSDEEP = new AllowableValue( + "ssdeep", + "ssdeep", + "Uses ssdeep / SpamSum 'context triggered piecewise hash'."); + public static final AllowableValue allowableValueTLSH = new AllowableValue( + "tlsh", + "tlsh", + "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long"); + + public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder() + .name("ATTRIBUTE_NAME") + .displayName("Hash Attribute Name") + .description("The name of the FlowFile Attribute into which the Hash Value should be written. " + + "If the value already exists, it will be overwritten") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .defaultValue("fuzzyhash.value") + .build(); + + public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder() + .name("HASH_ALGORITHM") + .displayName("Hashing Algorithm") + .description("The hashing algorithm utilised") + .allowableValues(allowableValueSSDEEP, allowableValueTLSH) + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("Success") + .description("Any FlowFile that is successfully hashed will be sent to this Relationship.") + .build(); + + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("Failure") + .description("Any FlowFile that is successfully hashed will be sent to this Relationship.") + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); + descriptors.add(ATTRIBUTE_NAME); + descriptors.add(HASH_ALGORITHM); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @OnScheduled + public void onScheduled(final ProcessContext context) { + + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + + final ComponentLog logger = getLogger(); + final String algorithm = context.getProperty(HASH_ALGORITHM).getValue(); + + + final AtomicReference<String> hashValueHolder = new AtomicReference<>(null); + + try { + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream in) throws IOException { + try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) { + StreamUtils.copy(in,holder); + + if (algorithm.equals(allowableValueSSDEEP.getValue())) { --- End diff -- just as a note. I did change the code to equals(allowableValueSSDEEP) but it failed to behave as expected, so I rolled back to a similar approach but without the declaration of the algorithm string. > Add Processor support SSDeep (SpamSum) hashes (and possibly others e.g. TLSH) > ----------------------------------------------------------------------------- > > Key: NIFI-2747 > URL: https://issues.apache.org/jira/browse/NIFI-2747 > Project: Apache NiFi > Issue Type: Improvement > Affects Versions: 1.0.0 > Reporter: Andre F de Miranda > Assignee: Andre F de Miranda > > It would be great if we had a processor to support fuzzy hashing algorithms > such as SpamSum/SSDeep -- This message was sent by Atlassian JIRA (v6.3.15#6346)