garydgregory commented on code in PR #687: URL: https://github.com/apache/commons-text/pull/687#discussion_r2293598194
########## src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java: ########## @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +/** + * An algorithm for measuring the difference between two character sequences using the + * <a href="https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance">Damerau-Levenshtein Distance</a>. + * + * <p> + * This is the number of changes needed to change one sequence into another, where each change is a single character + * modification (deletion, insertion, substitution, or transposition of two adjacent characters). + * </p> + * + * @see <a href="https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance">Damerau-Levenshtein Distance on Wikipedia</a> + * @since 1.15.0 + */ +public class DamerauLevenshteinDistance implements EditDistance<Integer> { + + /** + * The singleton instance. + */ + private static final DamerauLevenshteinDistance INSTANCE = new DamerauLevenshteinDistance(); + + /** + * Gets the default instance. + * + * @return The default instance. + */ + public static DamerauLevenshteinDistance getDefaultInstance() { + return INSTANCE; + } + + /** + * Utility function to ensure distance is valid according to threshold + * + * @param distance The distance value + * @param threshold The threshold value + * @return The distance value, or {@code -1} if distance is greater than threshold + */ + private static int clampDistance(final int distance, final int threshold) { + return distance > threshold ? -1 : distance; + } + + /** + * Finds the Damerau-Levenshtein distance between two CharSequences if it's less than or equal to a given threshold. + * + * @param left the first SimilarityInput, must not be null. + * @param right the second SimilarityInput, must not be null. + * @param threshold the target threshold, must not be negative. + * @return result distance, or -1 if distance exceeds threshold + */ + private static <E> int limitedCompare(SimilarityInput<E> left, SimilarityInput<E> right, final int threshold) { + if (left == null || right == null) { + throw new IllegalArgumentException("Left/right inputs must not be null"); + } + + if (threshold < 0) { + throw new IllegalArgumentException("Threshold can not be negative"); + } + + /* Review Comment: Use // inline comments inside methods. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@commons.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org