Author: ggregory Date: Fri Aug 5 15:33:28 2011 New Revision: 1154269 URL: http://svn.apache.org/viewvc?rev=1154269&view=rev Log: [CODEC-125] Implement a Beider-Morse phonetic matching codec. Apply Matthew's patch https://issues.apache.org/jira/secure/attachment/12489480/handleH.patch. Also: Reduce speed test loop boundary.
Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1154269&r1=1154268&r2=1154269&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Fri Aug 5 15:33:28 2011 @@ -143,13 +143,13 @@ public class Rule { Languages ls = Languages.instance(s); for (String l : ls.getLanguages()) { try { - rs.put(l, parseRules(createScanner(s, rt, l))); + rs.put(l, parseRules(createScanner(s, rt, l), createResourceName(s, rt, l))); } catch (IllegalStateException e) { throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); } } if (!rt.equals(RuleType.RULES)) { - rs.put("common", parseRules(createScanner(s, rt, "common"))); + rs.put("common", parseRules(createScanner(s, rt, "common"), createResourceName(s, rt, "common"))); } rts.put(rt, Collections.unmodifiableMap(rs)); @@ -262,7 +262,7 @@ public class Rule { } } - private static List<Rule> parseRules(Scanner scanner) { + private static List<Rule> parseRules(final Scanner scanner, final String location) { List<Rule> lines = new ArrayList<Rule>(); int currentLine = 0; @@ -300,7 +300,7 @@ public class Rule { if (incl.contains(" ")) { System.err.println("Warining: malformed import statement: " + rawLine); } else { - lines.addAll(parseRules(createScanner(incl))); + lines.addAll(parseRules(createScanner(incl), location + "->" + incl)); } } else { // rule @@ -313,7 +313,21 @@ public class Rule { String lCon = stripQuotes(parts[1]); String rCon = stripQuotes(parts[2]); PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); - Rule r = new Rule(pat, lCon, rCon, ph); + final int cLine = currentLine; + Rule r = new Rule(pat, lCon, rCon, ph) { + private final int line = cLine; + private final String loc = location; + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("Rule"); + sb.append("{line=").append(line); + sb.append(", loc='").append(loc).append('\''); + sb.append('}'); + return sb.toString(); + } + }; lines.add(r); } catch (IllegalArgumentException e) { throw new IllegalStateException("Problem parsing line " + currentLine, e); Modified: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt?rev=1154269&r1=1154268&r2=1154269&view=diff ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt (original) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt Fri Aug 5 15:33:28 2011 @@ -123,8 +123,8 @@ "gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" "ouh" "" "[aioe]" "(v[french]|uh)" -"uh" "" "[aioe]" "(v|uh)" -"h" "" "$" "" +"uh" "" "[aioe]" "(v|uh)" +"h" "." "$" "" // match h at the end of words, but not as a single letter "h" "[aeiouyäöü]" "" "" // german "h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1154269&r1=1154268&r2=1154269&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Fri Aug 5 15:33:28 2011 @@ -25,7 +25,6 @@ import org.apache.commons.codec.EncoderE import org.apache.commons.codec.StringEncoder; import org.apache.commons.codec.StringEncoderAbstractTest; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; /** @@ -44,7 +43,7 @@ public class BeiderMorseEncoderTest exte return new BeiderMorseEncoder(); } - @Ignore + // @Ignore @Test public void testAsciiEncodeNotEmpty1Letter() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); @@ -113,7 +112,6 @@ public class BeiderMorseEncoderTest exte Languages.instance("thereIsNoSuchLanguage"); } - // @Ignore @Test(timeout = 10000L) public void testLongestEnglishSurname() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); @@ -165,7 +163,7 @@ public class BeiderMorseEncoderTest exte Random rand = new Random(); stringBuffer.append(chars[rand.nextInt(chars.length)]); long start; - for (int i = 0; i < 40; i++) { + for (int i = 0; i < 30; i++) { start = System.currentTimeMillis(); // System.out.println(i + " String to encode:" + stringBuffer.toString()); bmpm.encode(stringBuffer.toString());