[ https://issues.apache.org/jira/browse/NIFI-2747?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15949109#comment-15949109 ]
ASF GitHub Bot commented on NIFI-2747: -------------------------------------- Github user apiri commented on a diff in the pull request: https://github.com/apache/nifi/pull/1619#discussion_r108933894 --- Diff: nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java --- @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.cybersecurity; + +import com.idealista.tlsh.TLSH; +import com.idealista.tlsh.exceptions.InsufficientComplexityException; +import info.debatty.java.spamsum.SpamSum; + +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.SideEffectFree; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.behavior.ReadsAttribute; +import org.apache.nifi.annotation.behavior.ReadsAttributes; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; + +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processors.standard.HashContent; + +import org.apache.nifi.stream.io.StreamUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + + +@EventDriven +@SideEffectFree +@SupportsBatching +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) +@Tags({"hashing", "fuzzy-hashing", "cyber-security"}) +@CapabilityDescription("Calculates a fuzzy/locality-sensitive hash value for the Content of a FlowFile and puts that " + + "hash value on the FlowFile as an attribute whose name is determined by the <Hash Attribute Name> property." + + "Note: this processor only offers non-criptographic hash algorithms. And it should be not be " + + "seen as a replacement to the HashContent processor") + +@SeeAlso({HashContent.class}) +@ReadsAttributes({@ReadsAttribute(attribute="", description="")}) +@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the " + + "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")}) + +public class FuzzyHashContent extends AbstractProcessor { + + public static final AllowableValue allowableValueSSDEEP = new AllowableValue( + "ssdeep", + "ssdeep", + "Uses ssdeep / SpamSum 'context triggered piecewise hash'."); + public static final AllowableValue allowableValueTLSH = new AllowableValue( + "tlsh", + "tlsh", + "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile Content must be at least 512 characters long"); + + public static final PropertyDescriptor ATTRIBUTE_NAME = new PropertyDescriptor.Builder() + .name("ATTRIBUTE_NAME") + .displayName("Hash Attribute Name") + .description("The name of the FlowFile Attribute into which the Hash Value should be written. " + + "If the value already exists, it will be overwritten") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .defaultValue("fuzzyhash.value") + .build(); + + public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder() --- End diff -- sounds fair. Not sure how old HashContent is, may predate EL. More of a comment as to whether or not this would be useful, but have no bearing for the overall flow and how it would be used. > Add Processor support SSDeep (SpamSum) hashes (and possibly others e.g. TLSH) > ----------------------------------------------------------------------------- > > Key: NIFI-2747 > URL: https://issues.apache.org/jira/browse/NIFI-2747 > Project: Apache NiFi > Issue Type: Improvement > Affects Versions: 1.0.0 > Reporter: Andre F de Miranda > Assignee: Andre F de Miranda > > It would be great if we had a processor to support fuzzy hashing algorithms > such as SpamSum/SSDeep -- This message was sent by Atlassian JIRA (v6.3.15#6346)