Github user ottobackwards commented on a diff in the pull request:
https://github.com/apache/metron/pull/781#discussion_r141919312
--- Diff:
metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/hashing/TLSHHasher.java
---
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.metron.stellar.common.utils.hashing;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.trendmicro.tlsh.BucketOption;
+import com.trendmicro.tlsh.ChecksumOption;
+import com.trendmicro.tlsh.Tlsh;
+import com.trendmicro.tlsh.TlshCreator;
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.metron.stellar.common.utils.ConversionUtils;
+import org.apache.metron.stellar.common.utils.JSONUtils;
+import org.apache.metron.stellar.common.utils.SerDeUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.security.NoSuchAlgorithmException;
+import java.util.*;
+import java.util.function.Function;
+
+public class TLSHHasher implements Hasher {
+ public static final String TLSH_KEY = "tlsh";
+ public static final String TLSH_BIN_KEY = "tlsh_bin";
+ public enum Config implements EnumConfigurable {
+ BUCKET_SIZE("bucketSize"),
+ CHECKSUM("checksumBytes"),
+ HASHES("hashes"),
+ FORCE("force")
+ ;
+ final public String key;
+ Config(String key) {
+ this.key = key;
+ }
+
+ @Override
+ public String getKey() {
+ return key;
+ }
+ }
+
+ BucketOption bucketOption = BucketOption.BUCKETS_128;
+ ChecksumOption checksumOption = ChecksumOption.CHECKSUM_1B;
+ Boolean force = true;
+ List<Integer> hashes = new ArrayList<>();
+
+ /**
+ * Returns an encoded string representation of the hash value of the
input. It is expected that
+ * this implementation does throw exceptions when the input is null.
+ *
+ * @param o The value to hash.
+ * @return A hash of {@code toHash} that has been encoded.
+ * @throws EncoderException If unable to encode the hash then
this exception occurs.
+ * @throws NoSuchAlgorithmException If the supplied algorithm is not
known.
+ */
+ @Override
+ public Object getHash(Object o) throws EncoderException,
NoSuchAlgorithmException {
+ TlshCreator creator = new TlshCreator(bucketOption, checksumOption);
--- End diff --
Can these be cached and reused? Similar to how we cache regex patterns?
---