Author: ggregory
Date: Fri Aug  5 15:33:28 2011
New Revision: 1154269

URL: http://svn.apache.org/viewvc?rev=1154269&view=rev
Log:
[CODEC-125] Implement a Beider-Morse phonetic matching codec. Apply Matthew's 
patch https://issues.apache.org/jira/secure/attachment/12489480/handleH.patch. 
Also: Reduce speed test loop boundary.

Modified:
    
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
    
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
    
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java

Modified: 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
 (original)
+++ 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
 Fri Aug  5 15:33:28 2011
@@ -143,13 +143,13 @@ public class Rule {
                 Languages ls = Languages.instance(s);
                 for (String l : ls.getLanguages()) {
                     try {
-                        rs.put(l, parseRules(createScanner(s, rt, l)));
+                        rs.put(l, parseRules(createScanner(s, rt, l), 
createResourceName(s, rt, l)));
                     } catch (IllegalStateException e) {
                         throw new IllegalStateException("Problem processing " 
+ createResourceName(s, rt, l), e);
                     }
                 }
                 if (!rt.equals(RuleType.RULES)) {
-                    rs.put("common", parseRules(createScanner(s, rt, 
"common")));
+                    rs.put("common", parseRules(createScanner(s, rt, 
"common"), createResourceName(s, rt, "common")));
                 }
 
                 rts.put(rt, Collections.unmodifiableMap(rs));
@@ -262,7 +262,7 @@ public class Rule {
         }
     }
 
-    private static List<Rule> parseRules(Scanner scanner) {
+    private static List<Rule> parseRules(final Scanner scanner, final String 
location) {
         List<Rule> lines = new ArrayList<Rule>();
         int currentLine = 0;
 
@@ -300,7 +300,7 @@ public class Rule {
                         if (incl.contains(" ")) {
                             System.err.println("Warining: malformed import 
statement: " + rawLine);
                         } else {
-                            lines.addAll(parseRules(createScanner(incl)));
+                            lines.addAll(parseRules(createScanner(incl), 
location + "->" + incl));
                         }
                     } else {
                         // rule
@@ -313,7 +313,21 @@ public class Rule {
                                 String lCon = stripQuotes(parts[1]);
                                 String rCon = stripQuotes(parts[2]);
                                 PhonemeExpr ph = 
parsePhonemeExpr(stripQuotes(parts[3]));
-                                Rule r = new Rule(pat, lCon, rCon, ph);
+                                final int cLine = currentLine;
+                                Rule r = new Rule(pat, lCon, rCon, ph) {
+                                    private final int line = cLine;
+                                    private final String loc = location;
+
+                                    @Override
+                                    public String toString() {
+                                        final StringBuilder sb = new 
StringBuilder();
+                                        sb.append("Rule");
+                                        sb.append("{line=").append(line);
+                                        sb.append(", 
loc='").append(loc).append('\'');
+                                        sb.append('}');
+                                        return sb.toString();
+                                    }
+                                };
                                 lines.add(r);
                             } catch (IllegalArgumentException e) {
                                 throw new IllegalStateException("Problem 
parsing line " + currentLine, e);

Modified: 
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
 (original)
+++ 
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
 Fri Aug  5 15:33:28 2011
@@ -123,8 +123,8 @@
 "gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" 
           
 "ouh" "" "[aioe]" "(v[french]|uh)"
-"uh" "" "[aioe]" "(v|uh)" 
-"h" "" "$" "" 
+"uh" "" "[aioe]" "(v|uh)"
+"h" "." "$" "" // match h at the end of words, but not as a single letter
 "h" "[aeiouyäöü]" "" ""  // german
 "h" "^" "" 
"(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])"
 
          

Modified: 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
 (original)
+++ 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
 Fri Aug  5 15:33:28 2011
@@ -25,7 +25,6 @@ import org.apache.commons.codec.EncoderE
 import org.apache.commons.codec.StringEncoder;
 import org.apache.commons.codec.StringEncoderAbstractTest;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -44,7 +43,7 @@ public class BeiderMorseEncoderTest exte
         return new BeiderMorseEncoder();
     }
 
-    @Ignore
+    // @Ignore
     @Test
     public void testAsciiEncodeNotEmpty1Letter() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -113,7 +112,6 @@ public class BeiderMorseEncoderTest exte
         Languages.instance("thereIsNoSuchLanguage");
     }
 
-    // @Ignore
     @Test(timeout = 10000L)
     public void testLongestEnglishSurname() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -165,7 +163,7 @@ public class BeiderMorseEncoderTest exte
         Random rand = new Random();
         stringBuffer.append(chars[rand.nextInt(chars.length)]);
         long start;
-        for (int i = 0; i < 40; i++) {
+        for (int i = 0; i < 30; i++) {
             start = System.currentTimeMillis();
             // System.out.println(i + " String to encode:" + 
stringBuffer.toString());
             bmpm.encode(stringBuffer.toString());


Reply via email to