Repository: commons-text Updated Branches: refs/heads/master 87b789fbe -> 7570eb016
SANDBOX-483 Add changes and fix old Javadocs from [lang] that remained after the code porting Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/7570eb01 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/7570eb01 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/7570eb01 Branch: refs/heads/master Commit: 7570eb0163cab027b444ca55e6d4c9768fcd0d34 Parents: 87b789f Author: Bruno P. Kinoshita <ki...@apache.org> Authored: Sat Dec 13 01:21:11 2014 -0200 Committer: Bruno P. Kinoshita <ki...@apache.org> Committed: Sat Dec 13 01:21:11 2014 -0200 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../commons/text/similarity/FuzzyDistance.java | 20 ++-- .../text/similarity/JaroWrinklerDistance.java | 103 +++++++++---------- .../text/similarity/LevenshteinDistance.java | 44 ++++---- 4 files changed, 83 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index d8c3fdf..f890519 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -23,6 +23,7 @@ <release version="1.0" date="tba" description="tba"> <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming distance</action> + <action issue="SANDBOX-483" type="add" dev="kinow" due-to="britter">Incorporate String algorithms from Commons Lang</action> </release> </body> http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java index 8e9228a..f4299ea 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java @@ -26,6 +26,10 @@ import java.util.Locale; * indicates a higher similarity. * </p> * + * <p> + * This code has been adapted from Apache Commons Lang 3.3. + * </p> + * * @since 1.0 */ public class FuzzyDistance implements StringMetric<Integer> { @@ -54,14 +58,14 @@ public class FuzzyDistance implements StringMetric<Integer> { * </p> * * <pre> - * StringUtils.getFuzzyDistance(null, null, null) = IllegalArgumentException - * StringUtils.getFuzzyDistance("", "", Locale.ENGLISH) = 0 - * StringUtils.getFuzzyDistance("Workshop", "b", Locale.ENGLISH) = 0 - * StringUtils.getFuzzyDistance("Room", "o", Locale.ENGLISH) = 1 - * StringUtils.getFuzzyDistance("Workshop", "w", Locale.ENGLISH) = 1 - * StringUtils.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH) = 2 - * StringUtils.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH) = 4 - * StringUtils.getFuzzyDistance("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 + * distance.getFuzzyDistance(null, null, null) = IllegalArgumentException + * distance.getFuzzyDistance("", "", Locale.ENGLISH) = 0 + * distance.getFuzzyDistance("Workshop", "b", Locale.ENGLISH) = 0 + * distance.getFuzzyDistance("Room", "o", Locale.ENGLISH) = 1 + * distance.getFuzzyDistance("Workshop", "w", Locale.ENGLISH) = 1 + * distance.getFuzzyDistance("Workshop", "ws", Locale.ENGLISH) = 2 + * distance.getFuzzyDistance("Workshop", "wo", Locale.ENGLISH) = 4 + * distance.getFuzzyDistance("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 * </pre> * * @param term a full term that should be matched against, must not be null http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java index 3a94969..67aa2b8 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java @@ -49,20 +49,20 @@ public class JaroWrinklerDistance implements StringMetric<Double> { * </p> * * <pre> - * StringUtils.getJaroWinklerDistance(null, null) = IllegalArgumentException - * StringUtils.getJaroWinklerDistance("","") = 0.0 - * StringUtils.getJaroWinklerDistance("","a") = 0.0 - * StringUtils.getJaroWinklerDistance("aaapppp", "") = 0.0 - * StringUtils.getJaroWinklerDistance("frog", "fog") = 0.93 - * StringUtils.getJaroWinklerDistance("fly", "ant") = 0.0 - * StringUtils.getJaroWinklerDistance("elephant", "hippo") = 0.44 - * StringUtils.getJaroWinklerDistance("hippo", "elephant") = 0.44 - * StringUtils.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0 - * StringUtils.getJaroWinklerDistance("hello", "hallo") = 0.88 - * StringUtils.getJaroWinklerDistance("ABC Corporation", "ABC Corp") = 0.91 - * StringUtils.getJaroWinklerDistance("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.93 - * StringUtils.getJaroWinklerDistance("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94 - * StringUtils.getJaroWinklerDistance("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9 + * distance.getJaroWinklerDistance(null, null) = IllegalArgumentException + * distance.getJaroWinklerDistance("","") = 0.0 + * distance.getJaroWinklerDistance("","a") = 0.0 + * distance.getJaroWinklerDistance("aaapppp", "") = 0.0 + * distance.getJaroWinklerDistance("frog", "fog") = 0.93 + * distance.getJaroWinklerDistance("fly", "ant") = 0.0 + * distance.getJaroWinklerDistance("elephant", "hippo") = 0.44 + * distance.getJaroWinklerDistance("hippo", "elephant") = 0.44 + * distance.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0 + * distance.getJaroWinklerDistance("hello", "hallo") = 0.88 + * distance.getJaroWinklerDistance("ABC Corporation", "ABC Corp") = 0.91 + * distance.getJaroWinklerDistance("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.93 + * distance.getJaroWinklerDistance("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94 + * distance.getJaroWinklerDistance("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9 * </pre> * * @param left the first String, must not be null @@ -86,9 +86,6 @@ public class JaroWrinklerDistance implements StringMetric<Double> { return matchScore; } - // TODO: we can move these methods to a Util class, keep them here, - // create a common abstract class, shade lang-3.3... - /** * Calculates the number of characters from the beginning of the strings * that match exactly one-to-one, up to a maximum of four (4) characters. @@ -118,30 +115,29 @@ public class JaroWrinklerDistance implements StringMetric<Double> { * </p> * * <pre> - * StringUtils.getCommonPrefix(null) = "" - * StringUtils.getCommonPrefix(new String[] {}) = "" - * StringUtils.getCommonPrefix(new String[] {"abc"}) = "abc" - * StringUtils.getCommonPrefix(new String[] {null, null}) = "" - * StringUtils.getCommonPrefix(new String[] {"", ""}) = "" - * StringUtils.getCommonPrefix(new String[] {"", null}) = "" - * StringUtils.getCommonPrefix(new String[] {"abc", null, null}) = "" - * StringUtils.getCommonPrefix(new String[] {null, null, "abc"}) = "" - * StringUtils.getCommonPrefix(new String[] {"", "abc"}) = "" - * StringUtils.getCommonPrefix(new String[] {"abc", ""}) = "" - * StringUtils.getCommonPrefix(new String[] {"abc", "abc"}) = "abc" - * StringUtils.getCommonPrefix(new String[] {"abc", "a"}) = "a" - * StringUtils.getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab" - * StringUtils.getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab" - * StringUtils.getCommonPrefix(new String[] {"abcde", "xyz"}) = "" - * StringUtils.getCommonPrefix(new String[] {"xyz", "abcde"}) = "" - * StringUtils.getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a " + * getCommonPrefix(null) = "" + * getCommonPrefix(new String[] {}) = "" + * getCommonPrefix(new String[] {"abc"}) = "abc" + * getCommonPrefix(new String[] {null, null}) = "" + * getCommonPrefix(new String[] {"", ""}) = "" + * getCommonPrefix(new String[] {"", null}) = "" + * getCommonPrefix(new String[] {"abc", null, null}) = "" + * getCommonPrefix(new String[] {null, null, "abc"}) = "" + * getCommonPrefix(new String[] {"", "abc"}) = "" + * getCommonPrefix(new String[] {"abc", ""}) = "" + * getCommonPrefix(new String[] {"abc", "abc"}) = "abc" + * getCommonPrefix(new String[] {"abc", "a"}) = "a" + * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab" + * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab" + * getCommonPrefix(new String[] {"abcde", "xyz"}) = "" + * getCommonPrefix(new String[] {"xyz", "abcde"}) = "" + * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a " * </pre> * * @param strs array of String objects, entries may be null * @return the initial sequence of characters that are common to all Strings * in the array; empty String if the array is null, the elements are * all null or if there is no common prefix. - * @since 2.4 */ public static String getCommonPrefix(final String... strs) { if (strs == null || strs.length == 0) { @@ -249,31 +245,28 @@ public class JaroWrinklerDistance implements StringMetric<Double> { * </p> * * <pre> - * StringUtils.indexOfDifference(null) = -1 - * StringUtils.indexOfDifference(new String[] {}) = -1 - * StringUtils.indexOfDifference(new String[] {"abc"}) = -1 - * StringUtils.indexOfDifference(new String[] {null, null}) = -1 - * StringUtils.indexOfDifference(new String[] {"", ""}) = -1 - * StringUtils.indexOfDifference(new String[] {"", null}) = 0 - * StringUtils.indexOfDifference(new String[] {"abc", null, null}) = 0 - * StringUtils.indexOfDifference(new String[] {null, null, "abc"}) = 0 - * StringUtils.indexOfDifference(new String[] {"", "abc"}) = 0 - * StringUtils.indexOfDifference(new String[] {"abc", ""}) = 0 - * StringUtils.indexOfDifference(new String[] {"abc", "abc"}) = -1 - * StringUtils.indexOfDifference(new String[] {"abc", "a"}) = 1 - * StringUtils.indexOfDifference(new String[] {"ab", "abxyz"}) = 2 - * StringUtils.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2 - * StringUtils.indexOfDifference(new String[] {"abcde", "xyz"}) = 0 - * StringUtils.indexOfDifference(new String[] {"xyz", "abcde"}) = 0 - * StringUtils.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7 + * distance.indexOfDifference(null) = -1 + * distance.indexOfDifference(new String[] {}) = -1 + * distance.indexOfDifference(new String[] {"abc"}) = -1 + * distance.indexOfDifference(new String[] {null, null}) = -1 + * distance.indexOfDifference(new String[] {"", ""}) = -1 + * distance.indexOfDifference(new String[] {"", null}) = 0 + * distance.indexOfDifference(new String[] {"abc", null, null}) = 0 + * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0 + * distance.indexOfDifference(new String[] {"", "abc"}) = 0 + * distance.indexOfDifference(new String[] {"abc", ""}) = 0 + * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1 + * distance.indexOfDifference(new String[] {"abc", "a"}) = 1 + * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2 + * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2 + * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0 + * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0 + * distance.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7 * </pre> * * @param css array of CharSequences, entries may be null * @return the index where the strings begin to differ; -1 if they are all * equal - * @since 2.4 - * @since 3.0 Changed signature from indexOfDifference(String...) to - * indexOfDifference(CharSequence...) */ protected static int indexOfDifference(final CharSequence... css) { if (css == null || css.length <= 1) { http://git-wip-us.apache.org/repos/asf/commons-text/blob/7570eb01/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index 1793f1e..cca3dc1 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java @@ -59,17 +59,17 @@ public class LevenshteinDistance implements StringMetric<Integer> { * </p> * * <pre> - * StringUtils.getLevenshteinDistance(null, *) = IllegalArgumentException - * StringUtils.getLevenshteinDistance(*, null) = IllegalArgumentException - * StringUtils.getLevenshteinDistance("","") = 0 - * StringUtils.getLevenshteinDistance("","a") = 1 - * StringUtils.getLevenshteinDistance("aaapppp", "") = 7 - * StringUtils.getLevenshteinDistance("frog", "fog") = 1 - * StringUtils.getLevenshteinDistance("fly", "ant") = 3 - * StringUtils.getLevenshteinDistance("elephant", "hippo") = 7 - * StringUtils.getLevenshteinDistance("hippo", "elephant") = 7 - * StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 - * StringUtils.getLevenshteinDistance("hello", "hallo") = 1 + * distance.getLevenshteinDistance(null, *) = IllegalArgumentException + * distance.getLevenshteinDistance(*, null) = IllegalArgumentException + * distance.getLevenshteinDistance("","") = 0 + * distance.getLevenshteinDistance("","a") = 1 + * distance.getLevenshteinDistance("aaapppp", "") = 7 + * distance.getLevenshteinDistance("frog", "fog") = 1 + * distance.getLevenshteinDistance("fly", "ant") = 3 + * distance.getLevenshteinDistance("elephant", "hippo") = 7 + * distance.getLevenshteinDistance("hippo", "elephant") = 7 + * distance.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 + * distance.getLevenshteinDistance("hello", "hallo") = 1 * </pre> * * @param left the first string, must not be null @@ -103,17 +103,17 @@ public class LevenshteinDistance implements StringMetric<Integer> { * </p> * * <pre> - * StringUtils.getLevenshteinDistance(null, *, *) = IllegalArgumentException - * StringUtils.getLevenshteinDistance(*, null, *) = IllegalArgumentException - * StringUtils.getLevenshteinDistance(*, *, -1) = IllegalArgumentException - * StringUtils.getLevenshteinDistance("","", 0) = 0 - * StringUtils.getLevenshteinDistance("aaapppp", "", 8) = 7 - * StringUtils.getLevenshteinDistance("aaapppp", "", 7) = 7 - * StringUtils.getLevenshteinDistance("aaapppp", "", 6)) = -1 - * StringUtils.getLevenshteinDistance("elephant", "hippo", 7) = 7 - * StringUtils.getLevenshteinDistance("elephant", "hippo", 6) = -1 - * StringUtils.getLevenshteinDistance("hippo", "elephant", 7) = 7 - * StringUtils.getLevenshteinDistance("hippo", "elephant", 6) = -1 + * distance.getLevenshteinDistance(null, *, *) = IllegalArgumentException + * distance.getLevenshteinDistance(*, null, *) = IllegalArgumentException + * distance.getLevenshteinDistance(*, *, -1) = IllegalArgumentException + * distance.getLevenshteinDistance("","", 0) = 0 + * distance.getLevenshteinDistance("aaapppp", "", 8) = 7 + * distance.getLevenshteinDistance("aaapppp", "", 7) = 7 + * distance.getLevenshteinDistance("aaapppp", "", 6)) = -1 + * distance.getLevenshteinDistance("elephant", "hippo", 7) = 7 + * distance.getLevenshteinDistance("elephant", "hippo", 6) = -1 + * distance.getLevenshteinDistance("hippo", "elephant", 7) = 7 + * distance.getLevenshteinDistance("hippo", "elephant", 6) = -1 * </pre> * * @param left the first string, must not be null