[ 
https://issues.apache.org/jira/browse/NIFI-2747?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15949096#comment-15949096
 ] 

ASF GitHub Bot commented on NIFI-2747:
--------------------------------------

Github user trixpan commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/1619#discussion_r108931643
  
    --- Diff: 
nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java
 ---
    @@ -0,0 +1,192 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.nifi.processors.cybersecurity;
    +
    +import com.idealista.tlsh.TLSH;
    +import com.idealista.tlsh.exceptions.InsufficientComplexityException;
    +import info.debatty.java.spamsum.SpamSum;
    +
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.SideEffectFree;
    +import org.apache.nifi.annotation.behavior.SupportsBatching;
    +import org.apache.nifi.annotation.behavior.ReadsAttribute;
    +import org.apache.nifi.annotation.behavior.ReadsAttributes;
    +import org.apache.nifi.annotation.behavior.WritesAttribute;
    +import org.apache.nifi.annotation.behavior.WritesAttributes;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.SeeAlso;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.annotation.lifecycle.OnScheduled;
    +
    +import org.apache.nifi.components.AllowableValue;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.logging.ComponentLog;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.AbstractProcessor;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.ProcessorInitializationContext;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.io.InputStreamCallback;
    +import org.apache.nifi.processor.util.StandardValidators;
    +import org.apache.nifi.processors.standard.HashContent;
    +
    +import org.apache.nifi.stream.io.StreamUtils;
    +
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.util.ArrayList;
    +import java.util.Collections;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.Set;
    +import java.util.concurrent.atomic.AtomicReference;
    +
    +
    +@EventDriven
    +@SideEffectFree
    +@SupportsBatching
    +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
    +@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
    +@CapabilityDescription("Calculates a fuzzy/locality-sensitive hash value 
for the Content of a FlowFile and puts that " +
    +        "hash value on the FlowFile as an attribute whose name is 
determined by the <Hash Attribute Name> property." +
    +        "Note: this processor only offers non-criptographic hash 
algorithms. And it should be not be " +
    +        "seen as a replacement to the HashContent processor")
    +
    +@SeeAlso({HashContent.class})
    +@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
    +@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", 
description = "This Processor adds an attribute whose value is the result of 
Hashing the "
    +        + "existing FlowFile content. The name of this attribute is 
specified by the <Hash Attribute Name> property")})
    +
    +public class FuzzyHashContent extends AbstractProcessor {
    +
    +    public static final AllowableValue allowableValueSSDEEP = new 
AllowableValue(
    +            "ssdeep",
    +            "ssdeep",
    +            "Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
    +    public static final AllowableValue allowableValueTLSH = new 
AllowableValue(
    +            "tlsh",
    +            "tlsh",
    +            "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile 
Content must be at least 512 characters long");
    +
    +    public static final PropertyDescriptor ATTRIBUTE_NAME = new 
PropertyDescriptor.Builder()
    +            .name("ATTRIBUTE_NAME")
    +            .displayName("Hash Attribute Name")
    +            .description("The name of the FlowFile Attribute into which 
the Hash Value should be written. " +
    +                    "If the value already exists, it will be overwritten")
    +            .required(true)
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .defaultValue("fuzzyhash.value")
    +            .build();
    +
    +    public static final PropertyDescriptor HASH_ALGORITHM = new 
PropertyDescriptor.Builder()
    +            .name("HASH_ALGORITHM")
    +            .displayName("Hashing Algorithm")
    +            .description("The hashing algorithm utilised")
    +            .allowableValues(allowableValueSSDEEP, allowableValueTLSH)
    +            .required(true)
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final Relationship REL_SUCCESS = new 
Relationship.Builder()
    +            .name("Success")
    +            .description("Any FlowFile that is successfully hashed will be 
sent to this Relationship.")
    +            .build();
    +
    +    public static final Relationship REL_FAILURE = new 
Relationship.Builder()
    +            .name("Failure")
    +            .description("Any FlowFile that is successfully hashed will be 
sent to this Relationship.")
    +            .build();
    +
    +    private List<PropertyDescriptor> descriptors;
    +
    +    private Set<Relationship> relationships;
    +
    +    @Override
    +    protected void init(final ProcessorInitializationContext context) {
    +        final List<PropertyDescriptor> descriptors = new 
ArrayList<PropertyDescriptor>();
    +        descriptors.add(ATTRIBUTE_NAME);
    +        descriptors.add(HASH_ALGORITHM);
    +        this.descriptors = Collections.unmodifiableList(descriptors);
    +
    +        final Set<Relationship> relationships = new 
HashSet<Relationship>();
    +        relationships.add(REL_SUCCESS);
    +        relationships.add(REL_FAILURE);
    +        this.relationships = Collections.unmodifiableSet(relationships);
    +    }
    +
    +    @Override
    +    public Set<Relationship> getRelationships() {
    +        return this.relationships;
    +    }
    +
    +    @Override
    +    public final List<PropertyDescriptor> 
getSupportedPropertyDescriptors() {
    +        return descriptors;
    +    }
    +
    +    @OnScheduled
    +    public void onScheduled(final ProcessContext context) {
    +
    +    }
    +
    +    @Override
    +    public void onTrigger(final ProcessContext context, final 
ProcessSession session) throws ProcessException {
    +        FlowFile flowFile = session.get();
    +        if (flowFile == null) {
    +            return;
    +        }
    +
    +        final ComponentLog logger = getLogger();
    +        final String algorithm = 
context.getProperty(HASH_ALGORITHM).getValue();
    +
    +
    +        final AtomicReference<String> hashValueHolder = new 
AtomicReference<>(null);
    +
    +        try {
    +            session.read(flowFile, new InputStreamCallback() {
    +                @Override
    +                public void process(final InputStream in) throws 
IOException {
    +                    try (ByteArrayOutputStream holder = new 
ByteArrayOutputStream()) {
    +                        StreamUtils.copy(in,holder);
    +
    +                        if 
(algorithm.equals(allowableValueSSDEEP.getValue())) {
    --- End diff --
    
    just as a note. I did change the code to equals(allowableValueSSDEEP) but 
it failed to behave as expected, so I rolled back to a similar approach but 
without the declaration of the algorithm string.


> Add Processor support SSDeep (SpamSum) hashes (and possibly others e.g. TLSH)
> -----------------------------------------------------------------------------
>
>                 Key: NIFI-2747
>                 URL: https://issues.apache.org/jira/browse/NIFI-2747
>             Project: Apache NiFi
>          Issue Type: Improvement
>    Affects Versions: 1.0.0
>            Reporter: Andre F de Miranda
>            Assignee: Andre F de Miranda
>
> It would be great if we had a processor to support fuzzy hashing algorithms 
> such as SpamSum/SSDeep



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to