Repository: commons-text Updated Branches: refs/heads/master 376c7c4d8 -> a5ac07106
Fix checkstyle errors Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a5ac0710 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a5ac0710 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a5ac0710 Branch: refs/heads/master Commit: a5ac07106f25c0e359bb5782cf8eb16ce15aa0b8 Parents: 376c7c4 Author: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Authored: Mon Apr 13 00:17:25 2015 +1200 Committer: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Committed: Mon Apr 13 00:17:25 2015 +1200 ---------------------------------------------------------------------- .../commons/text/diff/CommandVisitor.java | 1 + .../apache/commons/text/diff/DeleteCommand.java | 1 + .../apache/commons/text/diff/EditCommand.java | 1 + .../apache/commons/text/diff/EditScript.java | 1 + .../apache/commons/text/diff/InsertCommand.java | 1 + .../apache/commons/text/diff/KeepCommand.java | 1 + .../commons/text/diff/ReplacementsFinder.java | 10 +++++ .../commons/text/diff/ReplacementsHandler.java | 1 + .../commons/text/diff/StringsComparator.java | 32 +++++++------- .../commons/text/similarity/FuzzyScore.java | 8 ++++ .../text/similarity/JaroWrinklerDistance.java | 17 +++++--- .../text/similarity/LevenshteinDistance.java | 44 +++++++++++++------- 12 files changed, 85 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/CommandVisitor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java index d73dde8..7e5f40f 100644 --- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java +++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java @@ -117,6 +117,7 @@ package org.apache.commons.text.diff; * } * </pre> * + * @param <T> object type * @since 1.0 */ public interface CommandVisitor<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/DeleteCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java index 7494002..8173718 100644 --- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java +++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java @@ -29,6 +29,7 @@ package org.apache.commons.text.diff; * @see StringsComparator * @see EditScript * + * @param <T> object type * @since 1.0 */ public class DeleteCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/EditCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java index 972cebb..7920206 100644 --- a/src/main/java/org/apache/commons/text/diff/EditCommand.java +++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java @@ -48,6 +48,7 @@ package org.apache.commons.text.diff; * @see StringsComparator * @see EditScript * + * @param <T> object type * @since 1.0 */ public abstract class EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/EditScript.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java index 641d60b..8891044 100644 --- a/src/main/java/org/apache/commons/text/diff/EditScript.java +++ b/src/main/java/org/apache/commons/text/diff/EditScript.java @@ -40,6 +40,7 @@ import java.util.List; * @see CommandVisitor * @see ReplacementsHandler * + * @param <T> object type * @since 1.0 */ public class EditScript<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/InsertCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java index 9a365d0..f0337dc 100644 --- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java +++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java @@ -29,6 +29,7 @@ package org.apache.commons.text.diff; * @see StringsComparator * @see EditScript * + * @param <T> object type * @since 1.0 */ public class InsertCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/KeepCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java index 687f7e7..34c6fe7 100644 --- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java +++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java @@ -29,6 +29,7 @@ package org.apache.commons.text.diff; * @see StringsComparator * @see EditScript * + * @param <T> object type * @since 1.0 */ public class KeepCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java index 52e4112..669585a 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java @@ -47,12 +47,22 @@ import java.util.List; * @see EditScript * @see StringsComparator * + * @param <T> object type * @since 1.0 */ public class ReplacementsFinder<T> implements CommandVisitor<T> { + /** + * List of pending insertions. + */ private final List<T> pendingInsertions; + /** + * List of pending deletions. + */ private final List<T> pendingDeletions; + /** + * Count of elements skipped. + */ private int skipped; /** Handler to call when synchronized sequences are found. */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java index d5d61a4..3beb716 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java @@ -21,6 +21,7 @@ import java.util.List; /** * This interface is devoted to handle synchronized replacement sequences. * + * @param <T> object type * @see ReplacementsFinder * @since 1.0 */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/diff/StringsComparator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java index b2940fa..4215931 100644 --- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java +++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java @@ -61,9 +61,13 @@ public class StringsComparator { * Second character sequence. */ private final String right; - - /** Temporary variables. */ + /** + * Temporary array. + */ private final int[] vDown; + /** + * Temporary array. + */ private final int[] vUp; /** @@ -189,19 +193,19 @@ public class StringsComparator { final int delta = m - n; final int sum = n + m; final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; - vDown[1+offset] = start1; - vUp[1+offset] = end1 + 1; + vDown[1 + offset] = start1; + vUp[1 + offset] = end1 + 1; - for (int d = 0; d <= offset ; ++d) { + for (int d = 0; d <= offset; ++d) { // Down for (int k = -d; k <= d; k += 2) { // First step final int i = k + offset; - if (k == -d || k != d && vDown[i-1] < vDown[i+1]) { - vDown[i] = vDown[i+1]; + if (k == -d || k != d && vDown[i - 1] < vDown[i + 1]) { + vDown[i] = vDown[i + 1]; } else { - vDown[i] = vDown[i-1] + 1; + vDown[i] = vDown[i - 1] + 1; } int x = vDown[i]; @@ -213,8 +217,8 @@ public class StringsComparator { } // Second step if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { - if (vUp[i-delta] <= vDown[i]) { - return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2); + if (vUp[i - delta] <= vDown[i]) { + return buildSnake(vUp[i - delta], k + start1 - start2, end1, end2); } } } @@ -224,10 +228,10 @@ public class StringsComparator { // First step final int i = k + offset - delta; if (k == delta - d - || k != delta + d && vUp[i+1] <= vUp[i-1]) { - vUp[i] = vUp[i+1] - 1; + || k != delta + d && vUp[i + 1] <= vUp[i - 1]) { + vUp[i] = vUp[i + 1] - 1; } else { - vUp[i] = vUp[i-1]; + vUp[i] = vUp[i - 1]; } int x = vUp[i] - 1; @@ -238,7 +242,7 @@ public class StringsComparator { y--; } // Second step - if (delta % 2 == 0 && -d <= k && k <= d ) { + if (delta % 2 == 0 && -d <= k && k <= d) { if (vUp[i] <= vDown[i + delta]) { return buildSnake(vUp[i], k + start1 - start2, end1, end2); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java index e7e5b09..73b282a 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java @@ -33,6 +33,9 @@ import java.util.Locale; */ public class FuzzyScore implements StringMetric<Integer> { + /** + * Locale used to change the case of text. + */ private final Locale locale; @@ -128,6 +131,11 @@ public class FuzzyScore implements StringMetric<Integer> { return score; } + /** + * Gets the locale. + * + * @return the locale + */ public Locale getLocale() { return locale; } http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java index 6955c3d..b96b83b 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java @@ -38,6 +38,10 @@ package org.apache.commons.text.similarity; public class JaroWrinklerDistance implements StringMetric<Double> { /** + * The default prefix length limit set to four. + */ + private static final int PREFIX_LENGTH_LIMIT = 4; + /** * Represents a failed index search. */ public static final int INDEX_NOT_FOUND = -1; @@ -70,7 +74,8 @@ public class JaroWrinklerDistance implements StringMetric<Double> { */ @Override public Double apply(CharSequence left, CharSequence right) { - final double DEFAULT_SCALING_FACTOR = 0.1; + final double defaultScalingFactor = 0.1; + final double percentageRoundValue = 100.0; if (left == null || right == null) { throw new IllegalArgumentException("Strings must not be null"); @@ -78,8 +83,8 @@ public class JaroWrinklerDistance implements StringMetric<Double> { final double jaro = score(left, right); final int cl = commonPrefixLength(left, right); - final double matchScore = Math.round((jaro + (DEFAULT_SCALING_FACTOR - * cl * (1.0 - jaro))) *100.0)/100.0; + final double matchScore = Math.round((jaro + (defaultScalingFactor + * cl * (1.0 - jaro))) * percentageRoundValue) / percentageRoundValue; return matchScore; } @@ -98,7 +103,7 @@ public class JaroWrinklerDistance implements StringMetric<Double> { .length(); // Limit the result to 4. - return result > 4 ? 4 : result; + return result > PREFIX_LENGTH_LIMIT ? PREFIX_LENGTH_LIMIT : result; } /** @@ -204,10 +209,12 @@ public class JaroWrinklerDistance implements StringMetric<Double> { // of common characters. final int transpositions = transpositions(m1, m2); + final double defaultDenominator = 3.0; + // Calculate the distance. final double dist = (m1.length() / ((double) shorter.length()) + m2.length() / ((double) longer.length()) + (m1.length() - transpositions) - / ((double) m1.length())) / 3.0; + / ((double) m1.length())) / defaultDenominator; return dist; } http://git-wip-us.apache.org/repos/asf/commons-text/blob/a5ac0710/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index c75e12e..f776cce 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java @@ -33,8 +33,14 @@ import java.util.Arrays; */ public class LevenshteinDistance implements StringMetric<Integer> { + /** + * Default instance. + */ private static final LevenshteinDistance DEFAULT_INSTANCE = new LevenshteinDistance(); + /** + * Threshold. + */ private final Integer threshold; /** @@ -106,10 +112,20 @@ public class LevenshteinDistance implements StringMetric<Integer> { } } + /** + * Gets the default instance. + * + * @return the default instace + */ public static LevenshteinDistance getDefaultInstance() { return DEFAULT_INSTANCE; } + /** + * Gets the distance threshold. + * + * @return the distance threshold + */ public Integer getThreshold() { return threshold; } @@ -158,7 +174,7 @@ public class LevenshteinDistance implements StringMetric<Integer> { * equal to the threshold value, returning -1 if it's greater. The * advantage is performance: unbounded distance is O(nm), but a bound of * k allows us to reduce it to O(km) time by only computing a diagonal - * stripe of width 2k + 1 of the cost table. It is also possible to use* this to compute the unbounded Levenshtein distance by starting the + * stripe of width 2k + 1 of the cost table. It is also possible to use * this to compute the unbounded Levenshtein distance by starting the * threshold at 1 and doubling each time until the distance is found; * this is O(dm), where d is the distance. @@ -225,7 +241,7 @@ public class LevenshteinDistance implements StringMetric<Integer> { int[] p = new int[n + 1]; // 'previous' cost array, horizontally int[] d = new int[n + 1]; // cost array, horizontally - int[] _d; // placeholder to assist in swapping p and d + int[] tempD; // placeholder to assist in swapping p and d // fill in starting table values final int boundary = Math.min(n, threshold) + 1; @@ -239,7 +255,7 @@ public class LevenshteinDistance implements StringMetric<Integer> { // iterates through t for (int j = 1; j <= m; j++) { - final char right_j = right.charAt(j - 1); // jth character of right + final char rightJ = right.charAt(j - 1); // jth character of right d[0] = j; // compute stripe indices, constrain to array size @@ -260,7 +276,7 @@ public class LevenshteinDistance implements StringMetric<Integer> { // iterates through [min, max] in s for (int i = min; i <= max; i++) { - if (left.charAt(i - 1) == right_j) { + if (left.charAt(i - 1) == rightJ) { // diagonally left and up d[i] = p[i - 1]; } else { @@ -271,9 +287,9 @@ public class LevenshteinDistance implements StringMetric<Integer> { } // copy current distance counts to 'previous row' distance counts - _d = p; + tempD = p; p = d; - d = _d; + d = tempD; } // if p[n] is greater than the threshold, there's no guarantee on it @@ -357,15 +373,15 @@ public class LevenshteinDistance implements StringMetric<Integer> { m = right.length(); } - int p[] = new int[n + 1]; //'previous' cost array, horizontally - int d[] = new int[n + 1]; // cost array, horizontally - int _d[]; //placeholder to assist in swapping p and d + int[] p = new int[n + 1]; //'previous' cost array, horizontally + int[] d = new int[n + 1]; // cost array, horizontally + int[] tempD; //placeholder to assist in swapping p and d // indexes into strings left and right int i; // iterates through left int j; // iterates through right - char right_j; // jth character of right + char rightJ; // jth character of right int cost; // cost @@ -374,19 +390,19 @@ public class LevenshteinDistance implements StringMetric<Integer> { } for (j = 1; j <= m; j++) { - right_j = right.charAt(j - 1); + rightJ = right.charAt(j - 1); d[0] = j; for (i = 1; i <= n; i++) { - cost = left.charAt(i - 1) == right_j ? 0 : 1; + cost = left.charAt(i - 1) == rightJ ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); } // copy current distance counts to 'previous row' distance counts - _d = p; + tempD = p; p = d; - d = _d; + d = tempD; } // our last action in the above loop was to switch d and p, so p now