[ https://issues.apache.org/jira/browse/MINIFICPP-681?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16694988#comment-16694988 ]
ASF GitHub Bot commented on MINIFICPP-681: ------------------------------------------ Github user phrocker commented on a diff in the pull request: https://github.com/apache/nifi-minifi-cpp/pull/445#discussion_r235474428 --- Diff: libminifi/include/processors/ContentHash.h --- @@ -0,0 +1,186 @@ +/** + * @file ContentHash.h + * ContentHash class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef NIFI_MINIFI_CPP_CONTENTHASH_H +#define NIFI_MINIFI_CPP_CONTENTHASH_H + +#ifdef OPENSSL_SUPPORT + +#include <iomanip> +#include <map> +#include <memory> +#include <string> +#include <sstream> +#include <utility> +#include <stdint.h> + +#include <openssl/md5.h> +#include <openssl/sha.h> + +#include "FlowFileRecord.h" +#include "core/Processor.h" +#include "core/ProcessSession.h" +#include "core/Resource.h" +#include "io/BaseStream.h" + +using HashReturnType = std::pair<std::string, int64_t>; + +namespace { +#define HASH_BUFFER_SIZE 16384 + + std::string digestToString(const unsigned char * const digest, size_t size) { + std::stringstream ss; + for(int i = 0; i < size; i++) + { + ss << std::uppercase << std::hex << std::setw(2) << std::setfill('0') << (int)digest[i]; + } + return ss.str(); + } + + HashReturnType MD5Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) { + HashReturnType ret_val; + ret_val.second = 0; + uint8_t buffer[HASH_BUFFER_SIZE]; + MD5_CTX context; + MD5_Init(&context); + + size_t ret = 0; + do { + ret = stream->readData(buffer, HASH_BUFFER_SIZE); + if(ret > 0) { + MD5_Update(&context, buffer, ret); + ret_val.second += ret; + } + } while(ret > 0); + unsigned char digest[MD5_DIGEST_LENGTH]; + MD5_Final(digest, &context); + + ret_val.first = digestToString(digest, MD5_DIGEST_LENGTH); + return ret_val; + } + + HashReturnType SHA1Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) { + HashReturnType ret_val; + ret_val.second = 0; + uint8_t buffer[HASH_BUFFER_SIZE]; + SHA_CTX context; + SHA1_Init(&context); + + size_t ret = 0; + do { + ret = stream->readData(buffer, HASH_BUFFER_SIZE); + if(ret > 0) { + SHA1_Update(&context, buffer, ret); + ret_val.second += ret; + } + } while(ret > 0); + unsigned char digest[SHA_DIGEST_LENGTH]; + SHA1_Final(digest, &context); + + ret_val.first = digestToString(digest, SHA_DIGEST_LENGTH); + return ret_val; + } + + HashReturnType SHA256Hash(std::shared_ptr<org::apache::nifi::minifi::io::BaseStream> stream) { + HashReturnType ret_val; + ret_val.second = 0; + uint8_t buffer[HASH_BUFFER_SIZE]; + SHA256_CTX context; + SHA256_Init(&context); + + size_t ret ; + do { + ret = stream->readData(buffer, HASH_BUFFER_SIZE); + if(ret > 0) { + SHA256_Update(&context, buffer, ret); + ret_val.second += ret; + } + } while(ret > 0); + unsigned char digest[SHA256_DIGEST_LENGTH]; + SHA256_Final(digest, &context); + + ret_val.first = digestToString(digest, SHA256_DIGEST_LENGTH); + return ret_val; + } +} + + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace processors { + +static const std::map<std::string, const std::function<HashReturnType(std::shared_ptr<io::BaseStream>)>> HashAlgos = --- End diff -- SHA-2 is suite of digests ( a set of function). It's not tantamount to SHA-256 as it includes others. We should probably avoid using that as it's not a ubiquitous understanding. > Add content hash processor > -------------------------- > > Key: MINIFICPP-681 > URL: https://issues.apache.org/jira/browse/MINIFICPP-681 > Project: NiFi MiNiFi C++ > Issue Type: Improvement > Reporter: Arpad Boda > Assignee: Arpad Boda > Priority: Major > Fix For: 0.6.0 > > > Add a new processor that supports hashing content and add the checksum to the > flowfile as an attribute. -- This message was sent by Atlassian JIRA (v7.6.3#76005)