Repository: commons-text Updated Branches: refs/heads/master 413aeeb1a -> 87b789fbe
SANDBOX-485 Add Hamming distance Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/87b789fb Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/87b789fb Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/87b789fb Branch: refs/heads/master Commit: 87b789fbec0360a7e2c54f7cb45225e238530ee8 Parents: 413aeeb Author: Bruno P. Kinoshita <ki...@apache.org> Authored: Sat Dec 13 01:09:40 2014 -0200 Committer: Bruno P. Kinoshita <ki...@apache.org> Committed: Sat Dec 13 01:12:01 2014 -0200 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../text/similarity/HammingDistance.java | 77 ++++++++++++++++++++ .../text/similarity/HammingDistanceTest.java | 58 +++++++++++++++ 3 files changed, 136 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 416f579..d8c3fdf 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -22,6 +22,7 @@ <body> <release version="1.0" date="tba" description="tba"> + <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming distance</action> </release> </body> http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/main/java/org/apache/commons/text/similarity/HammingDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java new file mode 100644 index 0000000..13621b4 --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +/** + * <p> + * The hamming distance between two strings of equal length is the number of + * positions at which the corresponding symbols are different. + * </p> + * + * <p> + * For further explanation about the hamming distance, take a look at its + * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance. + * </p> + * + * @since 1.0 + */ +public class HammingDistance implements StringMetric<Integer> { + + /** + * <p>Find the hamming distance between two strings with the same + * length.</p> + * + * <p>The distance starts with zero, and for each occurrence of a + * different character in either String, it increments the distance + * by 1, and finally return its value.</p> + * + * <pre> + * distance.compare("", "") = 0 + * distance.compare("pappa", "pappa") = 0 + * distance.compare("1011101", "1011111") = 1 + * distance.compare("ATCG", "ACCC") = 2 + * distance.compare("karolin", "kerstin" = 3 + * </pre> + * + * @param left the first string, must not be null + * @param right the second string, must not be null + * @return distance + * @throws IllegalArgumentException if either String input {@code null} or + * if they do not have the same length + */ + @Override + public Integer compare(CharSequence left, CharSequence right) { + if (left == null || right == null) { + throw new IllegalArgumentException("Strings must not be null"); + } + + if (left.length() != right.length()) { + throw new IllegalArgumentException("Strings must have the same length"); + } + + int distance = 0; + + for (int i = 0; i < left.length(); i++) { + if (left.charAt(i) != right.charAt(i)) { + distance++; + } + } + + return distance; + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/87b789fb/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java new file mode 100644 index 0000000..aac2fa3 --- /dev/null +++ b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import static org.junit.Assert.assertEquals; + +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for {@link org.apache.commons.text.similarity.HammingDistance}. + */ +public class HammingDistanceTest { + + private static HammingDistance distance; + + @BeforeClass + public static void setUp() { + distance = new HammingDistance(); + } + + @Test + public void testHammingDistance() { + assertEquals(Integer.valueOf(0), distance.compare("", "")); + assertEquals(Integer.valueOf(0), distance.compare("pappa", "pappa")); + assertEquals(Integer.valueOf(1), distance.compare("papaa", "pappa")); + assertEquals(Integer.valueOf(3), distance.compare("karolin", "kathrin")); + assertEquals(Integer.valueOf(3), distance.compare("karolin", "kerstin")); + assertEquals(Integer.valueOf(2), distance.compare("1011101", "1001001")); + assertEquals(Integer.valueOf(3), distance.compare("2173896", "2233796")); + assertEquals(Integer.valueOf(2), distance.compare("ATCG", "ACCC")); + } + + @Test(expected=IllegalArgumentException.class) + public void testHammingDistance_nullLeftValue() { + distance.compare(null, ""); + } + + @Test(expected=IllegalArgumentException.class) + public void testHammingDistance_nullRightValue() { + distance.compare("", null); + } + +}