Author: ggregory
Date: Sun Jul 31 21:25:45 2011
New Revision: 1152642

URL: http://svn.apache.org/viewvc?rev=1152642&view=rev
Log:
Apply patch based on 
https://issues.apache.org/jira/secure/attachment/12488362/acz.patch for 
[CODEC-125]

Modified:
    
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
    
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
    
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
    
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
    
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
    
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java
    
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
    
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java

Modified: 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
 (original)
+++ 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
 Sun Jul 31 21:25:45 2011
@@ -234,6 +234,11 @@ public class Lang {
             }
         }
 
-        return Languages.LanguageSet.from(langs);
+        Languages.LanguageSet ls = Languages.LanguageSet.from(langs);
+        if (ls.equals(Languages.NO_LANGUAGES)) {
+            return Languages.ANY_LANGUAGE;
+        } else {
+            return ls;
+        }
     }
 }

Modified: 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
 (original)
+++ 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
 Sun Jul 31 21:25:45 2011
@@ -54,6 +54,84 @@ import java.util.Set;
  */
 public class Languages {
 
+    /**
+     * A set of languages.
+     */
+    public static abstract class LanguageSet {
+        public static LanguageSet from(Set<String> langs) {
+            if (langs.isEmpty()) {
+                return NO_LANGUAGES;
+            } else {
+                return new SomeLanguages(langs);
+            }
+        }
+
+        public abstract boolean contains(String language);
+
+        public abstract String getAny();
+
+        public abstract boolean isEmpty();
+
+        public abstract boolean isSingleton();
+
+        public abstract LanguageSet restrictTo(LanguageSet other);
+    }
+
+    /**
+     * Some languages, explicitly enumerated.
+     */
+    public static class SomeLanguages extends LanguageSet {
+        private final Set<String> languages;
+
+        private SomeLanguages(Set<String> languages) {
+            this.languages = Collections.unmodifiableSet(languages);
+        }
+
+        @Override
+        public boolean contains(String language) {
+            return this.languages.contains(language);
+        }
+
+        @Override
+        public String getAny() {
+            return this.languages.iterator().next();
+        }
+
+        public Set<String> getLanguages() {
+            return this.languages;
+        }
+
+        @Override
+        public boolean isEmpty() {
+            return this.languages.isEmpty();
+        }
+
+        @Override
+        public boolean isSingleton() {
+            return this.languages.size() == 1;
+        }
+
+        @Override
+        public LanguageSet restrictTo(LanguageSet other) {
+            if (other == NO_LANGUAGES) {
+                return other;
+            } else if (other == ANY_LANGUAGE) {
+                return this;
+            } else {
+                SomeLanguages sl = (SomeLanguages) other;
+                Set<String> ls = new HashSet<String>(this.languages);
+                ls.retainAll(sl.languages);
+                return from(ls);
+            }
+        }
+
+        @Override
+        public String toString() {
+            return "Languages(" + languages.toString() + ")";
+        }
+
+    }
+
     public static final String ANY = "any";
 
     private static final Map<NameType, Languages> LANGUAGES = new 
EnumMap<NameType, Languages>(NameType.class);
@@ -107,49 +185,23 @@ public class Languages {
 
     private final Set<String> languages;
 
-    private Languages(Set<String> languages) {
-        this.languages = languages;
-    }
-
-    public Set<String> getLanguages() {
-        return this.languages;
-    }
-
-    /**
-     * A set of languages.
-     */
-    public static abstract class LanguageSet {
-        public abstract LanguageSet restrictTo(LanguageSet other);
-
-        public static LanguageSet from(Set<String> langs) {
-            if (langs.isEmpty()) {
-                return NO_LANGUAGES;
-            } else {
-                return new SomeLanguages(langs);
-            }
-        }
-
-        public abstract boolean contains(String language);
-
-        public abstract boolean isSingleton();
-
-        public abstract String getAny();
-
-        public abstract boolean isEmpty();
-    }
-
     /**
      * No languages at all.
      */
     public static final LanguageSet NO_LANGUAGES = new LanguageSet() {
         @Override
-        public LanguageSet restrictTo(LanguageSet other) {
-            return this;
+        public boolean contains(String language) {
+            return false;
         }
 
         @Override
-        public boolean contains(String language) {
-            return false;
+        public String getAny() {
+            throw new NoSuchElementException("Can't fetch any language from 
the empty language set.");
+        }
+
+        @Override
+        public boolean isEmpty() {
+            return true;
         }
 
         @Override
@@ -158,13 +210,13 @@ public class Languages {
         }
 
         @Override
-        public String getAny() {
-            throw new NoSuchElementException("Can't fetch any language from 
the empty language set.");
+        public LanguageSet restrictTo(LanguageSet other) {
+            return this;
         }
 
         @Override
-        public boolean isEmpty() {
-            return true;
+        public String toString() {
+            return "NO_LANGUAGES";
         }
     };
 
@@ -173,21 +225,11 @@ public class Languages {
      */
     public static final LanguageSet ANY_LANGUAGE = new LanguageSet() {
         @Override
-        public LanguageSet restrictTo(LanguageSet other) {
-            return other;
-        }
-
-        @Override
         public boolean contains(String language) {
             return true;
         }
 
         @Override
-        public boolean isSingleton() {
-            return false;
-        }
-
-        @Override
         public String getAny() {
             throw new NoSuchElementException("Can't fetch any language from 
the any language set.");
         }
@@ -196,54 +238,28 @@ public class Languages {
         public boolean isEmpty() {
             return false;
         }
-    };
-
-    /**
-     * Some languages, explicitly enumerated.
-     */
-    public static class SomeLanguages extends LanguageSet {
-        private final Set<String> languages;
-
-        private SomeLanguages(Set<String> languages) {
-            this.languages = Collections.unmodifiableSet(languages);
-        }
-
-        public Set<String> getLanguages() {
-            return this.languages;
-        }
 
         @Override
-        public LanguageSet restrictTo(LanguageSet other) {
-            if (other == NO_LANGUAGES) {
-                return other;
-            } else if (other == ANY_LANGUAGE) {
-                return this;
-            } else {
-                SomeLanguages sl = (SomeLanguages) other;
-                Set<String> ls = new HashSet<String>(this.languages);
-                ls.retainAll(sl.languages);
-                return from(ls);
-            }
+        public boolean isSingleton() {
+            return false;
         }
 
         @Override
-        public boolean contains(String language) {
-            return this.languages.contains(language);
+        public LanguageSet restrictTo(LanguageSet other) {
+            return other;
         }
 
         @Override
-        public boolean isSingleton() {
-            return this.languages.size() == 1;
+        public String toString() {
+            return "ANY_LANGUAGE";
         }
+    };
 
-        @Override
-        public String getAny() {
-            return this.languages.iterator().next();
-        }
+    private Languages(Set<String> languages) {
+        this.languages = languages;
+    }
 
-        @Override
-        public boolean isEmpty() {
-            return this.languages.isEmpty();
-        }
+    public Set<String> getLanguages() {
+        return this.languages;
     }
 }

Modified: 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
 (original)
+++ 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
 Sun Jul 31 21:25:45 2011
@@ -49,6 +49,124 @@ import java.util.Set;
  * @since 2.0
  */
 public class PhoneticEngine {
+    static class PhonemeBuilder {
+
+        public static PhonemeBuilder empty(Languages.LanguageSet languages) {
+            return new PhonemeBuilder(Collections.singleton(new 
Rule.Phoneme("", languages)));
+        }
+
+        private final Set<Rule.Phoneme> phonemes;
+
+        private PhonemeBuilder(Set<Rule.Phoneme> phonemes) {
+            this.phonemes = phonemes;
+        }
+
+        public PhonemeBuilder append(String str) {
+            Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>();
+
+            for (Rule.Phoneme ph : this.phonemes) {
+                newPhonemes.add(ph.append(str));
+            }
+
+            return new PhonemeBuilder(newPhonemes);
+        }
+
+        public PhonemeBuilder apply(Rule.PhonemeExpr phonemeExpr) {
+            Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>();
+
+            for (Rule.Phoneme left : this.phonemes) {
+                for (Rule.Phoneme right : phonemeExpr.getPhonemes()) {
+                    Rule.Phoneme join = left.join(right);
+                    if (!join.getLanguages().isEmpty()) {
+                        newPhonemes.add(join);
+                    }
+                }
+            }
+
+            return new PhonemeBuilder(newPhonemes);
+        }
+
+        public Set<Rule.Phoneme> getPhonemes() {
+            return this.phonemes;
+        }
+
+        public String makeString() {
+            List<String> sorted = new ArrayList<String>();
+
+            for (Rule.Phoneme ph : this.phonemes) {
+                sorted.add(ph.getPhonemeText());
+            }
+
+            Collections.sort(sorted);
+            StringBuilder sb = new StringBuilder();
+
+            for (String ph : sorted) {
+                if (sb.length() > 0)
+                    sb.append("|");
+                sb.append(ph);
+            }
+
+            return sb.toString();
+        }
+    }
+
+    private static class RulesApplication {
+        private final List<Rule> finalRules;
+        private final String input;
+
+        private PhonemeBuilder phonemeBuilder;
+        private int i;
+        private boolean found;
+
+        public RulesApplication(List<Rule> finalRules, String input, 
PhonemeBuilder phonemeBuilder, int i) {
+            if (finalRules == null) {
+                throw new NullPointerException("The finalRules argument must 
not be null");
+            }
+            this.finalRules = finalRules;
+            this.phonemeBuilder = phonemeBuilder;
+            this.input = input;
+            this.i = i;
+        }
+
+        public int getI() {
+            return this.i;
+        }
+
+        public PhonemeBuilder getPhonemeBuilder() {
+            return this.phonemeBuilder;
+        }
+
+        public RulesApplication invoke() {
+            this.found = false;
+            int patternLength = 0;
+            RULES: for (Rule rule : this.finalRules) {
+                String pattern = rule.getPattern();
+                patternLength = pattern.length();
+                // log("trying pattern: " + pattern);
+
+                if (!rule.patternAndContextMatches(this.input, this.i)) {
+                    // log("no match");
+                    continue RULES;
+                }
+
+                this.phonemeBuilder = 
this.phonemeBuilder.apply(rule.getPhoneme());
+                this.found = true;
+                break RULES;
+            }
+
+            if (!this.found) {
+                patternLength = 1;
+            }
+
+            this.i += patternLength;
+            return this;
+        }
+
+        public boolean isFound() {
+            return this.found;
+        }
+    }
+
     private static final Map<NameType, Set<String>> NAME_PREFIXES = new 
EnumMap<NameType, Set<String>>(NameType.class);
 
     static {
@@ -60,6 +178,19 @@ public class PhoneticEngine {
                 "de la", "della", "des", "di", "do", "dos", "du", "van", 
"von"))));
     }
 
+    private static String join(Iterable<String> strings, String sep) {
+        StringBuilder sb = new StringBuilder();
+        Iterator<String> si = strings.iterator();
+        if (si.hasNext()) {
+            sb.append(si.next());
+        }
+        while (si.hasNext()) {
+            sb.append(sep).append(si.next());
+        }
+
+        return sb.toString();
+    }
+
     private final Lang lang;
 
     private final NameType nameType;
@@ -88,6 +219,57 @@ public class PhoneticEngine {
         this.lang = Lang.instance(nameType);
     }
 
+    private PhonemeBuilder applyFinalRules(PhonemeBuilder phonemeBuilder, 
List<Rule> finalRules, Languages.LanguageSet languageSet,
+            boolean strip) {
+        if (finalRules == null) {
+            throw new NullPointerException("finalRules can not be null");
+        }
+        if (finalRules.isEmpty()) {
+            return phonemeBuilder;
+        }
+
+        Set<Rule.Phoneme> phonemes = new HashSet<Rule.Phoneme>();
+
+        for (Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) {
+            PhonemeBuilder subBuilder = 
PhonemeBuilder.empty(phoneme.getLanguages());
+            String phonemeText = phoneme.getPhonemeText();
+            // System.err.println("Expanding: " + phonemeText);
+
+            for (int i = 0; i < phonemeText.length();) {
+                RulesApplication rulesApplication = new 
RulesApplication(finalRules, phonemeText, subBuilder, i).invoke();
+                boolean found = rulesApplication.isFound();
+                subBuilder = rulesApplication.getPhonemeBuilder();
+
+                if (!found) {
+                    // System.err.println("Not found. Appending as-is");
+                    subBuilder = subBuilder.append(phonemeText.substring(i, i 
+ 1));
+                }
+
+                i = rulesApplication.getI();
+
+                // System.err.println(phonemeText + " " + i + ": " + 
subBuilder.makeString());
+            }
+
+            // System.err.println("Expanded to: " + subBuilder.makeString());
+
+            phonemes.addAll(subBuilder.getPhonemes());
+        }
+
+        return new PhonemeBuilder(phonemes);
+    }
+
+    /**
+     * Encodes a string to its phonetic representation.
+     * 
+     * @param input
+     *            the String to encode
+     * @return the encoding of the input
+     */
+    public String encode(String input) {
+        Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
+        return phoneticUtf8(input, languageSet);
+    }
+
     /**
      * Gets the Lang language guessing rules being used.
      * 
@@ -125,18 +307,6 @@ public class PhoneticEngine {
     }
 
     /**
-     * Encodes a string to its phonetic representation.
-     * 
-     * @param input
-     *            the String to encode
-     * @return the encoding of the input
-     */
-    public String encode(String input) {
-        Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
-        return phoneticUtf8(input, languageSet);
-    }
-
-    /**
      * Encodes an input string into an output phonetic representation, given a 
set of possible origin languages.
      * 
      * @param input
@@ -233,174 +403,4 @@ public class PhoneticEngine {
 
         return phonemeBuilder.makeString();
     }
-
-    private PhonemeBuilder applyFinalRules(PhonemeBuilder phonemeBuilder, 
List<Rule> finalRules, Languages.LanguageSet languageSet,
-            boolean strip) {
-        if (finalRules == null) {
-            throw new NullPointerException("finalRules can not be null");
-        }
-        if (finalRules.isEmpty()) {
-            return phonemeBuilder;
-        }
-
-        Set<Rule.Phoneme> phonemes = new HashSet<Rule.Phoneme>();
-
-        for (Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) {
-            PhonemeBuilder subBuilder = 
PhonemeBuilder.empty(phoneme.getLanguages());
-            String phonemeText = phoneme.getPhonemeText();
-            // System.err.println("Expanding: " + phonemeText);
-
-            for (int i = 0; i < phonemeText.length();) {
-                RulesApplication rulesApplication = new 
RulesApplication(finalRules, phonemeText, subBuilder, i).invoke();
-                boolean found = rulesApplication.isFound();
-                subBuilder = rulesApplication.getPhonemeBuilder();
-
-                if (!found) {
-                    // System.err.println("Not found. Appending as-is");
-                    subBuilder = subBuilder.append(phonemeText.substring(i, i 
+ 1));
-                }
-
-                i = rulesApplication.getI();
-
-                // System.err.println(phonemeText + " " + i + ": " + 
subBuilder.makeString());
-            }
-
-            // System.err.println("Expanded to: " + subBuilder.makeString());
-
-            phonemes.addAll(subBuilder.getPhonemes());
-        }
-
-        return new PhonemeBuilder(phonemes);
-    }
-
-    private static String join(Iterable<String> strings, String sep) {
-        StringBuilder sb = new StringBuilder();
-        Iterator<String> si = strings.iterator();
-        if (si.hasNext()) {
-            sb.append(si.next());
-        }
-        while (si.hasNext()) {
-            sb.append(sep).append(si.next());
-        }
-
-        return sb.toString();
-    }
-
-    private static class RulesApplication {
-        private final List<Rule> finalRules;
-        private final String input;
-
-        private PhonemeBuilder phonemeBuilder;
-        private int i;
-        private boolean found;
-
-        public RulesApplication(List<Rule> finalRules, String input, 
PhonemeBuilder phonemeBuilder, int i) {
-            if (finalRules == null) {
-                throw new NullPointerException("The finalRules argument must 
not be null");
-            }
-            this.finalRules = finalRules;
-            this.phonemeBuilder = phonemeBuilder;
-            this.input = input;
-            this.i = i;
-        }
-
-        public PhonemeBuilder getPhonemeBuilder() {
-            return this.phonemeBuilder;
-        }
-
-        public int getI() {
-            return this.i;
-        }
-
-        public boolean isFound() {
-            return this.found;
-        }
-
-        public RulesApplication invoke() {
-            this.found = false;
-            int patternLength = 0;
-            RULES: for (Rule rule : this.finalRules) {
-                String pattern = rule.getPattern();
-                patternLength = pattern.length();
-                // log("trying pattern: " + pattern);
-
-                if (!rule.patternAndContextMatches(this.input, this.i)) {
-                    // log("no match");
-                    continue RULES;
-                }
-
-                this.phonemeBuilder = 
this.phonemeBuilder.apply(rule.getPhoneme());
-                this.found = true;
-                break RULES;
-            }
-
-            if (!this.found) {
-                patternLength = 1;
-            }
-
-            this.i += patternLength;
-            return this;
-        }
-    }
-
-    static class PhonemeBuilder {
-
-        public static PhonemeBuilder empty(Languages.LanguageSet languages) {
-            return new PhonemeBuilder(Collections.singleton(new 
Rule.Phoneme("", languages)));
-        }
-
-        private final Set<Rule.Phoneme> phonemes;
-
-        private PhonemeBuilder(Set<Rule.Phoneme> phonemes) {
-            this.phonemes = phonemes;
-        }
-
-        public Set<Rule.Phoneme> getPhonemes() {
-            return this.phonemes;
-        }
-
-        public PhonemeBuilder apply(Rule.PhonemeExpr phonemeExpr) {
-            Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>();
-
-            for (Rule.Phoneme left : this.phonemes) {
-                for (Rule.Phoneme right : phonemeExpr.getPhonemes()) {
-                    Rule.Phoneme join = left.join(right);
-                    if (!join.getLanguages().isEmpty()) {
-                        newPhonemes.add(join);
-                    }
-                }
-            }
-
-            return new PhonemeBuilder(newPhonemes);
-        }
-
-        public String makeString() {
-            List<String> sorted = new ArrayList<String>();
-
-            for (Rule.Phoneme ph : this.phonemes) {
-                sorted.add(ph.getPhonemeText());
-            }
-
-            Collections.sort(sorted);
-            StringBuilder sb = new StringBuilder();
-
-            for (String ph : sorted) {
-                if (sb.length() > 0)
-                    sb.append("|");
-                sb.append(ph);
-            }
-
-            return sb.toString();
-        }
-
-        public PhonemeBuilder append(String str) {
-            Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>();
-
-            for (Rule.Phoneme ph : this.phonemes) {
-                newPhonemes.add(ph.append(str));
-            }
-
-            return new PhonemeBuilder(newPhonemes);
-        }
-    }
 }

Modified: 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
 (original)
+++ 
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
 Sun Jul 31 21:25:45 2011
@@ -78,6 +78,52 @@ import java.util.regex.Pattern;
  * @since 2.0
  */
 public class Rule {
+    public static class Phoneme implements PhonemeExpr {
+        private final String phonemeText;
+        private final Languages.LanguageSet languages;
+
+        public Phoneme(String phonemeText, Languages.LanguageSet languages) {
+            this.phonemeText = phonemeText;
+            this.languages = languages;
+        }
+
+        public Phoneme append(String str) {
+            return new Phoneme(this.phonemeText + str, this.languages);
+        }
+
+        public Languages.LanguageSet getLanguages() {
+            return this.languages;
+        }
+
+        public Iterable<Phoneme> getPhonemes() {
+            return Collections.singleton(this);
+        }
+
+        public String getPhonemeText() {
+            return this.phonemeText;
+        }
+
+        public Phoneme join(Phoneme right) {
+            return new Phoneme(this.phonemeText + right.phonemeText, 
this.languages.restrictTo(right.languages));
+        }
+    }
+
+    public interface PhonemeExpr {
+        Iterable<Phoneme> getPhonemes();
+    }
+
+    public static class PhonemeList implements PhonemeExpr {
+        private final List<Phoneme> phonemes;
+
+        public PhonemeList(List<Phoneme> phonemes) {
+            this.phonemes = phonemes;
+        }
+
+        public List<Phoneme> getPhonemes() {
+            return this.phonemes;
+        }
+    }
+
     public static final String ALL = "ALL";
 
     private static final String DOUBLE_QUOTE = "\"";
@@ -179,6 +225,43 @@ public class Rule {
         return rules;
     }
 
+    private static Phoneme parsePhoneme(String ph) {
+        int open = ph.indexOf("[");
+        if (open >= 0) {
+            if (!ph.endsWith("]")) {
+                throw new IllegalArgumentException("Phoneme expression 
contains a '[' but does not end in ']'");
+            }
+            String before = ph.substring(0, open);
+            String in = ph.substring(open + 1, ph.length() - 1);
+            Set<String> langs = new 
HashSet<String>(Arrays.asList(in.split("[+]")));
+
+            return new Phoneme(before, Languages.LanguageSet.from(langs));
+        } else {
+            return new Phoneme(ph, Languages.ANY_LANGUAGE);
+        }
+    }
+
+    private static PhonemeExpr parsePhonemeExpr(String ph) {
+        if (ph.startsWith("(")) { // we have a bracketed list of options
+            if (!ph.endsWith(")")) {
+                throw new IllegalArgumentException("Phoneme starts with '(' so 
must end with ')'");
+            }
+
+            List<Phoneme> phs = new ArrayList<Phoneme>();
+            String body = ph.substring(1, ph.length() - 1);
+            for (String part : body.split("[|]")) {
+                phs.add(parsePhoneme(part));
+            }
+            if (body.startsWith("|") || body.endsWith("|")) {
+                phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
+            }
+
+            return new PhonemeList(phs);
+        } else {
+            return parsePhoneme(ph);
+        }
+    }
+
     private static List<Rule> parseRules(Scanner scanner) {
         List<Rule> lines = new ArrayList<Rule>();
         int currentLine = 0;
@@ -256,43 +339,6 @@ public class Rule {
         return str;
     }
 
-    private static PhonemeExpr parsePhonemeExpr(String ph) {
-        if (ph.startsWith("(")) { // we have a bracketed list of options
-            if (!ph.endsWith(")")) {
-                throw new IllegalArgumentException("Phoneme starts with '(' so 
must end with ')'");
-            }
-
-            List<Phoneme> phs = new ArrayList<Phoneme>();
-            String body = ph.substring(1, ph.length() - 1);
-            for (String part : body.split("[|]")) {
-                phs.add(parsePhoneme(part));
-            }
-            if (body.startsWith("|") || body.endsWith("|")) {
-                phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
-            }
-
-            return new PhonemeList(phs);
-        } else {
-            return parsePhoneme(ph);
-        }
-    }
-
-    private static Phoneme parsePhoneme(String ph) {
-        int open = ph.indexOf("[");
-        if (open >= 0) {
-            if (!ph.endsWith("]")) {
-                throw new IllegalArgumentException("Phoneme expression 
contains a '[' but does not end in ']'");
-            }
-            String before = ph.substring(0, open);
-            String in = ph.substring(open + 1, ph.length() - 1);
-            Set<String> langs = new 
HashSet<String>(Arrays.asList(in.split("[+]")));
-
-            return new Phoneme(before, Languages.LanguageSet.from(langs));
-        } else {
-            return new Phoneme(ph, Languages.ANY_LANGUAGE);
-        }
-    }
-
     private final Pattern lContext;
 
     private final String pattern;
@@ -329,6 +375,27 @@ public class Rule {
         return this.lContext;
     }
 
+    // /**
+    // * Decides if the language restriction for this rule applies.
+    // *
+    // * @param languageArg
+    // * a Set of Strings giving the names of the languages in scope
+    // * @return true if these satistfy the language and logical restrictions 
on this rule, false otherwise
+    // */
+    // public boolean languageMatches(Set<String> languageArg) {
+    // if (!languageArg.contains(Languages.ANY) && !this.languages.isEmpty()) {
+    // if (ALL.equals(this.logical) && 
!languageArg.containsAll(this.languages)) {
+    // return false;
+    // } else {
+    // Set<String> isect = new HashSet<String>(languageArg);
+    // isect.retainAll(this.languages);
+    // return !isect.isEmpty();
+    // }
+    // } else {
+    // return true;
+    // }
+    // }
+
     /**
      * Gets the pattern. This is a string-literal that must exactly match.
      * 
@@ -356,27 +423,6 @@ public class Rule {
         return this.rContext;
     }
 
-    // /**
-    // * Decides if the language restriction for this rule applies.
-    // *
-    // * @param languageArg
-    // * a Set of Strings giving the names of the languages in scope
-    // * @return true if these satistfy the language and logical restrictions 
on this rule, false otherwise
-    // */
-    // public boolean languageMatches(Set<String> languageArg) {
-    // if (!languageArg.contains(Languages.ANY) && !this.languages.isEmpty()) {
-    // if (ALL.equals(this.logical) && 
!languageArg.containsAll(this.languages)) {
-    // return false;
-    // } else {
-    // Set<String> isect = new HashSet<String>(languageArg);
-    // isect.retainAll(this.languages);
-    // return !isect.isEmpty();
-    // }
-    // } else {
-    // return true;
-    // }
-    // }
-
     /**
      * Decides if the pattern and context match the input starting at a 
position.
      * 
@@ -404,50 +450,4 @@ public class Rule {
 
         return patternMatches && rContextMatches && lContextMatches;
     }
-
-    public interface PhonemeExpr {
-        Iterable<Phoneme> getPhonemes();
-    }
-
-    public static class Phoneme implements PhonemeExpr {
-        private final String phonemeText;
-        private final Languages.LanguageSet languages;
-
-        public Phoneme(String phonemeText, Languages.LanguageSet languages) {
-            this.phonemeText = phonemeText;
-            this.languages = languages;
-        }
-
-        public String getPhonemeText() {
-            return this.phonemeText;
-        }
-
-        public Languages.LanguageSet getLanguages() {
-            return this.languages;
-        }
-
-        public Iterable<Phoneme> getPhonemes() {
-            return Collections.singleton(this);
-        }
-
-        public Phoneme join(Phoneme right) {
-            return new Phoneme(this.phonemeText + right.phonemeText, 
this.languages.restrictTo(right.languages));
-        }
-
-        public Phoneme append(String str) {
-            return new Phoneme(this.phonemeText + str, this.languages);
-        }
-    }
-
-    public static class PhonemeList implements PhonemeExpr {
-        private final List<Phoneme> phonemes;
-
-        public PhonemeList(List<Phoneme> phonemes) {
-            this.phonemes = phonemes;
-        }
-
-        public List<Phoneme> getPhonemes() {
-            return this.phonemes;
-        }
-    }
 }

Modified: 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
 (original)
+++ 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
 Sun Jul 31 21:25:45 2011
@@ -25,7 +25,6 @@ import org.apache.commons.codec.EncoderE
 import org.apache.commons.codec.StringEncoder;
 import org.apache.commons.codec.StringEncoderAbstractTest;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -40,14 +39,16 @@ public class BeiderMorseEncoderTest exte
         return new BeiderMorseEncoder();
     }
 
-    @Ignore
     @Test
     public void testEncodeAtz() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
         bmpm.setNameType(NameType.GENERIC);
         bmpm.setRuleType(RuleType.APPROX);
-        Assert.assertFalse(bmpm.encode("ácz").equals(""));
-        Assert.assertFalse(bmpm.encode("átz").equals(""));
+        String[] names = { "ácz", "átz" };
+        for (String name : names) {
+            Assert.assertFalse(bmpm.encode(name).equals(""));
+
+        }
     }
 
     /**
@@ -79,6 +80,7 @@ public class BeiderMorseEncoderTest exte
         Languages.instance("thereIsNoSuchLanguage");
     }
 
+    // @Ignore
     @Test(timeout = 10000L)
     public void testLongestEnglishSurname() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -120,7 +122,7 @@ public class BeiderMorseEncoderTest exte
         bmpm.setRuleType(RuleType.RULES);
     }
 
-    @Test(timeout = 10000L)
+    @Test(timeout = 20000L)
     public void testSpeedCheck() throws EncoderException {
         char[] chars = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 
'o', 'u' };
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();

Modified: 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java
 (original)
+++ 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java
 Sun Jul 31 21:25:45 2011
@@ -48,7 +48,7 @@ public class LanguageGuessingTest {
                 { "Sjneijder", "dutch", EXACT }, { "Klausewitz", "german", 
EXACT }, { "Küçük", "turkish", EXACT },
                 { "Giacometti", "italian", EXACT }, { "Nagy", "hungarian", 
EXACT }, { "Ceauşescu", "romanian", EXACT },
                 { "Angelopoulos", "greeklatin", EXACT }, { 
"Αγγελόπουλος", "greek", EXACT }, { "Пушкин", "cyrillic", 
EXACT },
-                { "כהן", "hebrew", EXACT } });
+                { "כהן", "hebrew", EXACT }, { "ácz", "any", EXACT }, { 
"átz", "any", EXACT } });
     }
 
     private final String exactness;

Modified: 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
 (original)
+++ 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
 Sun Jul 31 21:25:45 2011
@@ -37,18 +37,14 @@ public class PhoneticEngineTest {
 
     @Parameterized.Parameters
     public static List<Object[]> data() {
-        return Arrays
-                .asList(new Object[] { "Renault", 
"rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, 
RuleType.APPROX,
-                        true },
-                        new Object[] { "Renault", 
"rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, 
RuleType.APPROX, true },
-                        new Object[] { "Renault", "rinDlt", 
NameType.SEPHARDIC, RuleType.APPROX, true },
-                        new Object[] { "SntJohn-Smith", "sntjonsmit", 
NameType.GENERIC, RuleType.EXACT, true },
-                        new Object[] { "d'ortley", 
"ortlaj|ortlaj|ortlej|ortlej-dortlaj|dortlaj|dortlej|dortlej", NameType.GENERIC,
-                                RuleType.EXACT, true },
-                        new Object[] {
-                                "van helsing",
-                                
"elSink|elsink|helSink|helsink|helzink|xelsink-banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink",
-                                NameType.GENERIC, RuleType.EXACT, false });
+        return Arrays.asList(new Object[] { "Renault", 
"rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC,
+                RuleType.APPROX, true }, new Object[] { "Renault", 
"rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI,
+                RuleType.APPROX, true }, new Object[] { "Renault", "rinDlt", 
NameType.SEPHARDIC, RuleType.APPROX, true }, new Object[] {
+                "SntJohn-Smith", "sntjonsmit", NameType.GENERIC, 
RuleType.EXACT, true }, new Object[] { "d'ortley",
+                "ortlaj|ortlaj|ortlej|ortlej-dortlaj|dortlaj|dortlej|dortlej", 
NameType.GENERIC, RuleType.EXACT, true }, new Object[] {
+                "van helsing",
+                
"elSink|elsink|helSink|helsink|helzink|xelsink-banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink",
+                NameType.GENERIC, RuleType.EXACT, false });
     }
 
     private final boolean concat;
@@ -72,7 +68,7 @@ public class PhoneticEngineTest {
         String phoneticActual = engine.encode(this.name);
 
         System.err.println("expecting: " + this.phoneticExpected);
-        System.err.println("actual: " + phoneticActual);
+        System.err.println("actual:    " + phoneticActual);
         assertEquals("phoneme incorrect", this.phoneticExpected, 
phoneticActual);
     }
 }

Modified: 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
 (original)
+++ 
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
 Sun Jul 31 21:25:45 2011
@@ -30,20 +30,20 @@ import org.junit.runners.Parameterized;
  * @author Apache Software Foundation
  * @since 2.0
  */
-//@RunWith(Parameterized.class)
+// @RunWith(Parameterized.class)
 public class RuleTest {
 
     @Parameterized.Parameters
     public static List<Object[]> data() {
         return Arrays.asList(
-                new Object[] { "matching language sets with ALL",
-                        new Rule("e", "", "", new Rule.Phoneme("o", 
Languages.LanguageSet.from(
-                                new HashSet<String>(Arrays.asList("english", 
"french"))))),
-                        new HashSet<String>(Arrays.asList("english", 
"french")), true },
-                new Object[] { "non-matching language sets with ALL",
-                        new Rule("e", "", "", new Rule.Phoneme("o", 
Languages.LanguageSet.from(
-                                new HashSet<String>(Arrays.asList("english", 
"french"))))),
-                        new HashSet<String>(Arrays.asList("english")), false 
});
+                new Object[] {
+                        "matching language sets with ALL",
+                        new Rule("e", "", "", new Rule.Phoneme("o", 
Languages.LanguageSet.from(new HashSet<String>(Arrays.asList("english",
+                                "french"))))), new 
HashSet<String>(Arrays.asList("english", "french")), true },
+                new Object[] {
+                        "non-matching language sets with ALL",
+                        new Rule("e", "", "", new Rule.Phoneme("o", 
Languages.LanguageSet.from(new HashSet<String>(Arrays.asList("english",
+                                "french"))))), new 
HashSet<String>(Arrays.asList("english")), false });
     }
 
     private final String caseName;
@@ -58,9 +58,9 @@ public class RuleTest {
         this.expected = expected;
     }
 
-//    @Test
-//    public void testRuleLanguageMatches() {
-//        assertEquals(this.caseName, this.expected, 
this.rule.languageMatches(this.langs));
-//    }
+    // @Test
+    // public void testRuleLanguageMatches() {
+    // assertEquals(this.caseName, this.expected, 
this.rule.languageMatches(this.langs));
+    // }
 
 }


Reply via email to