Author: ggregory Date: Sun Jul 31 21:25:45 2011 New Revision: 1152642 URL: http://svn.apache.org/viewvc?rev=1152642&view=rev Log: Apply patch based on https://issues.apache.org/jira/secure/attachment/12488362/acz.patch for [CODEC-125]
Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java Sun Jul 31 21:25:45 2011 @@ -234,6 +234,11 @@ public class Lang { } } - return Languages.LanguageSet.from(langs); + Languages.LanguageSet ls = Languages.LanguageSet.from(langs); + if (ls.equals(Languages.NO_LANGUAGES)) { + return Languages.ANY_LANGUAGE; + } else { + return ls; + } } } Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java Sun Jul 31 21:25:45 2011 @@ -54,6 +54,84 @@ import java.util.Set; */ public class Languages { + /** + * A set of languages. + */ + public static abstract class LanguageSet { + public static LanguageSet from(Set<String> langs) { + if (langs.isEmpty()) { + return NO_LANGUAGES; + } else { + return new SomeLanguages(langs); + } + } + + public abstract boolean contains(String language); + + public abstract String getAny(); + + public abstract boolean isEmpty(); + + public abstract boolean isSingleton(); + + public abstract LanguageSet restrictTo(LanguageSet other); + } + + /** + * Some languages, explicitly enumerated. + */ + public static class SomeLanguages extends LanguageSet { + private final Set<String> languages; + + private SomeLanguages(Set<String> languages) { + this.languages = Collections.unmodifiableSet(languages); + } + + @Override + public boolean contains(String language) { + return this.languages.contains(language); + } + + @Override + public String getAny() { + return this.languages.iterator().next(); + } + + public Set<String> getLanguages() { + return this.languages; + } + + @Override + public boolean isEmpty() { + return this.languages.isEmpty(); + } + + @Override + public boolean isSingleton() { + return this.languages.size() == 1; + } + + @Override + public LanguageSet restrictTo(LanguageSet other) { + if (other == NO_LANGUAGES) { + return other; + } else if (other == ANY_LANGUAGE) { + return this; + } else { + SomeLanguages sl = (SomeLanguages) other; + Set<String> ls = new HashSet<String>(this.languages); + ls.retainAll(sl.languages); + return from(ls); + } + } + + @Override + public String toString() { + return "Languages(" + languages.toString() + ")"; + } + + } + public static final String ANY = "any"; private static final Map<NameType, Languages> LANGUAGES = new EnumMap<NameType, Languages>(NameType.class); @@ -107,49 +185,23 @@ public class Languages { private final Set<String> languages; - private Languages(Set<String> languages) { - this.languages = languages; - } - - public Set<String> getLanguages() { - return this.languages; - } - - /** - * A set of languages. - */ - public static abstract class LanguageSet { - public abstract LanguageSet restrictTo(LanguageSet other); - - public static LanguageSet from(Set<String> langs) { - if (langs.isEmpty()) { - return NO_LANGUAGES; - } else { - return new SomeLanguages(langs); - } - } - - public abstract boolean contains(String language); - - public abstract boolean isSingleton(); - - public abstract String getAny(); - - public abstract boolean isEmpty(); - } - /** * No languages at all. */ public static final LanguageSet NO_LANGUAGES = new LanguageSet() { @Override - public LanguageSet restrictTo(LanguageSet other) { - return this; + public boolean contains(String language) { + return false; } @Override - public boolean contains(String language) { - return false; + public String getAny() { + throw new NoSuchElementException("Can't fetch any language from the empty language set."); + } + + @Override + public boolean isEmpty() { + return true; } @Override @@ -158,13 +210,13 @@ public class Languages { } @Override - public String getAny() { - throw new NoSuchElementException("Can't fetch any language from the empty language set."); + public LanguageSet restrictTo(LanguageSet other) { + return this; } @Override - public boolean isEmpty() { - return true; + public String toString() { + return "NO_LANGUAGES"; } }; @@ -173,21 +225,11 @@ public class Languages { */ public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { @Override - public LanguageSet restrictTo(LanguageSet other) { - return other; - } - - @Override public boolean contains(String language) { return true; } @Override - public boolean isSingleton() { - return false; - } - - @Override public String getAny() { throw new NoSuchElementException("Can't fetch any language from the any language set."); } @@ -196,54 +238,28 @@ public class Languages { public boolean isEmpty() { return false; } - }; - - /** - * Some languages, explicitly enumerated. - */ - public static class SomeLanguages extends LanguageSet { - private final Set<String> languages; - - private SomeLanguages(Set<String> languages) { - this.languages = Collections.unmodifiableSet(languages); - } - - public Set<String> getLanguages() { - return this.languages; - } @Override - public LanguageSet restrictTo(LanguageSet other) { - if (other == NO_LANGUAGES) { - return other; - } else if (other == ANY_LANGUAGE) { - return this; - } else { - SomeLanguages sl = (SomeLanguages) other; - Set<String> ls = new HashSet<String>(this.languages); - ls.retainAll(sl.languages); - return from(ls); - } + public boolean isSingleton() { + return false; } @Override - public boolean contains(String language) { - return this.languages.contains(language); + public LanguageSet restrictTo(LanguageSet other) { + return other; } @Override - public boolean isSingleton() { - return this.languages.size() == 1; + public String toString() { + return "ANY_LANGUAGE"; } + }; - @Override - public String getAny() { - return this.languages.iterator().next(); - } + private Languages(Set<String> languages) { + this.languages = languages; + } - @Override - public boolean isEmpty() { - return this.languages.isEmpty(); - } + public Set<String> getLanguages() { + return this.languages; } } Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java Sun Jul 31 21:25:45 2011 @@ -49,6 +49,124 @@ import java.util.Set; * @since 2.0 */ public class PhoneticEngine { + static class PhonemeBuilder { + + public static PhonemeBuilder empty(Languages.LanguageSet languages) { + return new PhonemeBuilder(Collections.singleton(new Rule.Phoneme("", languages))); + } + + private final Set<Rule.Phoneme> phonemes; + + private PhonemeBuilder(Set<Rule.Phoneme> phonemes) { + this.phonemes = phonemes; + } + + public PhonemeBuilder append(String str) { + Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>(); + + for (Rule.Phoneme ph : this.phonemes) { + newPhonemes.add(ph.append(str)); + } + + return new PhonemeBuilder(newPhonemes); + } + + public PhonemeBuilder apply(Rule.PhonemeExpr phonemeExpr) { + Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>(); + + for (Rule.Phoneme left : this.phonemes) { + for (Rule.Phoneme right : phonemeExpr.getPhonemes()) { + Rule.Phoneme join = left.join(right); + if (!join.getLanguages().isEmpty()) { + newPhonemes.add(join); + } + } + } + + return new PhonemeBuilder(newPhonemes); + } + + public Set<Rule.Phoneme> getPhonemes() { + return this.phonemes; + } + + public String makeString() { + List<String> sorted = new ArrayList<String>(); + + for (Rule.Phoneme ph : this.phonemes) { + sorted.add(ph.getPhonemeText()); + } + + Collections.sort(sorted); + StringBuilder sb = new StringBuilder(); + + for (String ph : sorted) { + if (sb.length() > 0) + sb.append("|"); + sb.append(ph); + } + + return sb.toString(); + } + } + + private static class RulesApplication { + private final List<Rule> finalRules; + private final String input; + + private PhonemeBuilder phonemeBuilder; + private int i; + private boolean found; + + public RulesApplication(List<Rule> finalRules, String input, PhonemeBuilder phonemeBuilder, int i) { + if (finalRules == null) { + throw new NullPointerException("The finalRules argument must not be null"); + } + this.finalRules = finalRules; + this.phonemeBuilder = phonemeBuilder; + this.input = input; + this.i = i; + } + + public int getI() { + return this.i; + } + + public PhonemeBuilder getPhonemeBuilder() { + return this.phonemeBuilder; + } + + public RulesApplication invoke() { + this.found = false; + int patternLength = 0; + RULES: for (Rule rule : this.finalRules) { + String pattern = rule.getPattern(); + patternLength = pattern.length(); + // log("trying pattern: " + pattern); + + if (!rule.patternAndContextMatches(this.input, this.i)) { + // log("no match"); + continue RULES; + } + + this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme()); + this.found = true; + break RULES; + } + + if (!this.found) { + patternLength = 1; + } + + this.i += patternLength; + return this; + } + + public boolean isFound() { + return this.found; + } + } + private static final Map<NameType, Set<String>> NAME_PREFIXES = new EnumMap<NameType, Set<String>>(NameType.class); static { @@ -60,6 +178,19 @@ public class PhoneticEngine { "de la", "della", "des", "di", "do", "dos", "du", "van", "von")))); } + private static String join(Iterable<String> strings, String sep) { + StringBuilder sb = new StringBuilder(); + Iterator<String> si = strings.iterator(); + if (si.hasNext()) { + sb.append(si.next()); + } + while (si.hasNext()) { + sb.append(sep).append(si.next()); + } + + return sb.toString(); + } + private final Lang lang; private final NameType nameType; @@ -88,6 +219,57 @@ public class PhoneticEngine { this.lang = Lang.instance(nameType); } + private PhonemeBuilder applyFinalRules(PhonemeBuilder phonemeBuilder, List<Rule> finalRules, Languages.LanguageSet languageSet, + boolean strip) { + if (finalRules == null) { + throw new NullPointerException("finalRules can not be null"); + } + if (finalRules.isEmpty()) { + return phonemeBuilder; + } + + Set<Rule.Phoneme> phonemes = new HashSet<Rule.Phoneme>(); + + for (Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) { + PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages()); + String phonemeText = phoneme.getPhonemeText(); + // System.err.println("Expanding: " + phonemeText); + + for (int i = 0; i < phonemeText.length();) { + RulesApplication rulesApplication = new RulesApplication(finalRules, phonemeText, subBuilder, i).invoke(); + boolean found = rulesApplication.isFound(); + subBuilder = rulesApplication.getPhonemeBuilder(); + + if (!found) { + // System.err.println("Not found. Appending as-is"); + subBuilder = subBuilder.append(phonemeText.substring(i, i + 1)); + } + + i = rulesApplication.getI(); + + // System.err.println(phonemeText + " " + i + ": " + subBuilder.makeString()); + } + + // System.err.println("Expanded to: " + subBuilder.makeString()); + + phonemes.addAll(subBuilder.getPhonemes()); + } + + return new PhonemeBuilder(phonemes); + } + + /** + * Encodes a string to its phonetic representation. + * + * @param input + * the String to encode + * @return the encoding of the input + */ + public String encode(String input) { + Languages.LanguageSet languageSet = this.lang.guessLanguages(input); + return phoneticUtf8(input, languageSet); + } + /** * Gets the Lang language guessing rules being used. * @@ -125,18 +307,6 @@ public class PhoneticEngine { } /** - * Encodes a string to its phonetic representation. - * - * @param input - * the String to encode - * @return the encoding of the input - */ - public String encode(String input) { - Languages.LanguageSet languageSet = this.lang.guessLanguages(input); - return phoneticUtf8(input, languageSet); - } - - /** * Encodes an input string into an output phonetic representation, given a set of possible origin languages. * * @param input @@ -233,174 +403,4 @@ public class PhoneticEngine { return phonemeBuilder.makeString(); } - - private PhonemeBuilder applyFinalRules(PhonemeBuilder phonemeBuilder, List<Rule> finalRules, Languages.LanguageSet languageSet, - boolean strip) { - if (finalRules == null) { - throw new NullPointerException("finalRules can not be null"); - } - if (finalRules.isEmpty()) { - return phonemeBuilder; - } - - Set<Rule.Phoneme> phonemes = new HashSet<Rule.Phoneme>(); - - for (Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) { - PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages()); - String phonemeText = phoneme.getPhonemeText(); - // System.err.println("Expanding: " + phonemeText); - - for (int i = 0; i < phonemeText.length();) { - RulesApplication rulesApplication = new RulesApplication(finalRules, phonemeText, subBuilder, i).invoke(); - boolean found = rulesApplication.isFound(); - subBuilder = rulesApplication.getPhonemeBuilder(); - - if (!found) { - // System.err.println("Not found. Appending as-is"); - subBuilder = subBuilder.append(phonemeText.substring(i, i + 1)); - } - - i = rulesApplication.getI(); - - // System.err.println(phonemeText + " " + i + ": " + subBuilder.makeString()); - } - - // System.err.println("Expanded to: " + subBuilder.makeString()); - - phonemes.addAll(subBuilder.getPhonemes()); - } - - return new PhonemeBuilder(phonemes); - } - - private static String join(Iterable<String> strings, String sep) { - StringBuilder sb = new StringBuilder(); - Iterator<String> si = strings.iterator(); - if (si.hasNext()) { - sb.append(si.next()); - } - while (si.hasNext()) { - sb.append(sep).append(si.next()); - } - - return sb.toString(); - } - - private static class RulesApplication { - private final List<Rule> finalRules; - private final String input; - - private PhonemeBuilder phonemeBuilder; - private int i; - private boolean found; - - public RulesApplication(List<Rule> finalRules, String input, PhonemeBuilder phonemeBuilder, int i) { - if (finalRules == null) { - throw new NullPointerException("The finalRules argument must not be null"); - } - this.finalRules = finalRules; - this.phonemeBuilder = phonemeBuilder; - this.input = input; - this.i = i; - } - - public PhonemeBuilder getPhonemeBuilder() { - return this.phonemeBuilder; - } - - public int getI() { - return this.i; - } - - public boolean isFound() { - return this.found; - } - - public RulesApplication invoke() { - this.found = false; - int patternLength = 0; - RULES: for (Rule rule : this.finalRules) { - String pattern = rule.getPattern(); - patternLength = pattern.length(); - // log("trying pattern: " + pattern); - - if (!rule.patternAndContextMatches(this.input, this.i)) { - // log("no match"); - continue RULES; - } - - this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme()); - this.found = true; - break RULES; - } - - if (!this.found) { - patternLength = 1; - } - - this.i += patternLength; - return this; - } - } - - static class PhonemeBuilder { - - public static PhonemeBuilder empty(Languages.LanguageSet languages) { - return new PhonemeBuilder(Collections.singleton(new Rule.Phoneme("", languages))); - } - - private final Set<Rule.Phoneme> phonemes; - - private PhonemeBuilder(Set<Rule.Phoneme> phonemes) { - this.phonemes = phonemes; - } - - public Set<Rule.Phoneme> getPhonemes() { - return this.phonemes; - } - - public PhonemeBuilder apply(Rule.PhonemeExpr phonemeExpr) { - Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>(); - - for (Rule.Phoneme left : this.phonemes) { - for (Rule.Phoneme right : phonemeExpr.getPhonemes()) { - Rule.Phoneme join = left.join(right); - if (!join.getLanguages().isEmpty()) { - newPhonemes.add(join); - } - } - } - - return new PhonemeBuilder(newPhonemes); - } - - public String makeString() { - List<String> sorted = new ArrayList<String>(); - - for (Rule.Phoneme ph : this.phonemes) { - sorted.add(ph.getPhonemeText()); - } - - Collections.sort(sorted); - StringBuilder sb = new StringBuilder(); - - for (String ph : sorted) { - if (sb.length() > 0) - sb.append("|"); - sb.append(ph); - } - - return sb.toString(); - } - - public PhonemeBuilder append(String str) { - Set<Rule.Phoneme> newPhonemes = new HashSet<Rule.Phoneme>(); - - for (Rule.Phoneme ph : this.phonemes) { - newPhonemes.add(ph.append(str)); - } - - return new PhonemeBuilder(newPhonemes); - } - } } Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Sun Jul 31 21:25:45 2011 @@ -78,6 +78,52 @@ import java.util.regex.Pattern; * @since 2.0 */ public class Rule { + public static class Phoneme implements PhonemeExpr { + private final String phonemeText; + private final Languages.LanguageSet languages; + + public Phoneme(String phonemeText, Languages.LanguageSet languages) { + this.phonemeText = phonemeText; + this.languages = languages; + } + + public Phoneme append(String str) { + return new Phoneme(this.phonemeText + str, this.languages); + } + + public Languages.LanguageSet getLanguages() { + return this.languages; + } + + public Iterable<Phoneme> getPhonemes() { + return Collections.singleton(this); + } + + public String getPhonemeText() { + return this.phonemeText; + } + + public Phoneme join(Phoneme right) { + return new Phoneme(this.phonemeText + right.phonemeText, this.languages.restrictTo(right.languages)); + } + } + + public interface PhonemeExpr { + Iterable<Phoneme> getPhonemes(); + } + + public static class PhonemeList implements PhonemeExpr { + private final List<Phoneme> phonemes; + + public PhonemeList(List<Phoneme> phonemes) { + this.phonemes = phonemes; + } + + public List<Phoneme> getPhonemes() { + return this.phonemes; + } + } + public static final String ALL = "ALL"; private static final String DOUBLE_QUOTE = "\""; @@ -179,6 +225,43 @@ public class Rule { return rules; } + private static Phoneme parsePhoneme(String ph) { + int open = ph.indexOf("["); + if (open >= 0) { + if (!ph.endsWith("]")) { + throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); + } + String before = ph.substring(0, open); + String in = ph.substring(open + 1, ph.length() - 1); + Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]"))); + + return new Phoneme(before, Languages.LanguageSet.from(langs)); + } else { + return new Phoneme(ph, Languages.ANY_LANGUAGE); + } + } + + private static PhonemeExpr parsePhonemeExpr(String ph) { + if (ph.startsWith("(")) { // we have a bracketed list of options + if (!ph.endsWith(")")) { + throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); + } + + List<Phoneme> phs = new ArrayList<Phoneme>(); + String body = ph.substring(1, ph.length() - 1); + for (String part : body.split("[|]")) { + phs.add(parsePhoneme(part)); + } + if (body.startsWith("|") || body.endsWith("|")) { + phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); + } + + return new PhonemeList(phs); + } else { + return parsePhoneme(ph); + } + } + private static List<Rule> parseRules(Scanner scanner) { List<Rule> lines = new ArrayList<Rule>(); int currentLine = 0; @@ -256,43 +339,6 @@ public class Rule { return str; } - private static PhonemeExpr parsePhonemeExpr(String ph) { - if (ph.startsWith("(")) { // we have a bracketed list of options - if (!ph.endsWith(")")) { - throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); - } - - List<Phoneme> phs = new ArrayList<Phoneme>(); - String body = ph.substring(1, ph.length() - 1); - for (String part : body.split("[|]")) { - phs.add(parsePhoneme(part)); - } - if (body.startsWith("|") || body.endsWith("|")) { - phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); - } - - return new PhonemeList(phs); - } else { - return parsePhoneme(ph); - } - } - - private static Phoneme parsePhoneme(String ph) { - int open = ph.indexOf("["); - if (open >= 0) { - if (!ph.endsWith("]")) { - throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); - } - String before = ph.substring(0, open); - String in = ph.substring(open + 1, ph.length() - 1); - Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]"))); - - return new Phoneme(before, Languages.LanguageSet.from(langs)); - } else { - return new Phoneme(ph, Languages.ANY_LANGUAGE); - } - } - private final Pattern lContext; private final String pattern; @@ -329,6 +375,27 @@ public class Rule { return this.lContext; } + // /** + // * Decides if the language restriction for this rule applies. + // * + // * @param languageArg + // * a Set of Strings giving the names of the languages in scope + // * @return true if these satistfy the language and logical restrictions on this rule, false otherwise + // */ + // public boolean languageMatches(Set<String> languageArg) { + // if (!languageArg.contains(Languages.ANY) && !this.languages.isEmpty()) { + // if (ALL.equals(this.logical) && !languageArg.containsAll(this.languages)) { + // return false; + // } else { + // Set<String> isect = new HashSet<String>(languageArg); + // isect.retainAll(this.languages); + // return !isect.isEmpty(); + // } + // } else { + // return true; + // } + // } + /** * Gets the pattern. This is a string-literal that must exactly match. * @@ -356,27 +423,6 @@ public class Rule { return this.rContext; } - // /** - // * Decides if the language restriction for this rule applies. - // * - // * @param languageArg - // * a Set of Strings giving the names of the languages in scope - // * @return true if these satistfy the language and logical restrictions on this rule, false otherwise - // */ - // public boolean languageMatches(Set<String> languageArg) { - // if (!languageArg.contains(Languages.ANY) && !this.languages.isEmpty()) { - // if (ALL.equals(this.logical) && !languageArg.containsAll(this.languages)) { - // return false; - // } else { - // Set<String> isect = new HashSet<String>(languageArg); - // isect.retainAll(this.languages); - // return !isect.isEmpty(); - // } - // } else { - // return true; - // } - // } - /** * Decides if the pattern and context match the input starting at a position. * @@ -404,50 +450,4 @@ public class Rule { return patternMatches && rContextMatches && lContextMatches; } - - public interface PhonemeExpr { - Iterable<Phoneme> getPhonemes(); - } - - public static class Phoneme implements PhonemeExpr { - private final String phonemeText; - private final Languages.LanguageSet languages; - - public Phoneme(String phonemeText, Languages.LanguageSet languages) { - this.phonemeText = phonemeText; - this.languages = languages; - } - - public String getPhonemeText() { - return this.phonemeText; - } - - public Languages.LanguageSet getLanguages() { - return this.languages; - } - - public Iterable<Phoneme> getPhonemes() { - return Collections.singleton(this); - } - - public Phoneme join(Phoneme right) { - return new Phoneme(this.phonemeText + right.phonemeText, this.languages.restrictTo(right.languages)); - } - - public Phoneme append(String str) { - return new Phoneme(this.phonemeText + str, this.languages); - } - } - - public static class PhonemeList implements PhonemeExpr { - private final List<Phoneme> phonemes; - - public PhonemeList(List<Phoneme> phonemes) { - this.phonemes = phonemes; - } - - public List<Phoneme> getPhonemes() { - return this.phonemes; - } - } } Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Sun Jul 31 21:25:45 2011 @@ -25,7 +25,6 @@ import org.apache.commons.codec.EncoderE import org.apache.commons.codec.StringEncoder; import org.apache.commons.codec.StringEncoderAbstractTest; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; /** @@ -40,14 +39,16 @@ public class BeiderMorseEncoderTest exte return new BeiderMorseEncoder(); } - @Ignore @Test public void testEncodeAtz() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); bmpm.setNameType(NameType.GENERIC); bmpm.setRuleType(RuleType.APPROX); - Assert.assertFalse(bmpm.encode("ácz").equals("")); - Assert.assertFalse(bmpm.encode("átz").equals("")); + String[] names = { "ácz", "átz" }; + for (String name : names) { + Assert.assertFalse(bmpm.encode(name).equals("")); + + } } /** @@ -79,6 +80,7 @@ public class BeiderMorseEncoderTest exte Languages.instance("thereIsNoSuchLanguage"); } + // @Ignore @Test(timeout = 10000L) public void testLongestEnglishSurname() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); @@ -120,7 +122,7 @@ public class BeiderMorseEncoderTest exte bmpm.setRuleType(RuleType.RULES); } - @Test(timeout = 10000L) + @Test(timeout = 20000L) public void testSpeedCheck() throws EncoderException { char[] chars = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' }; BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java Sun Jul 31 21:25:45 2011 @@ -48,7 +48,7 @@ public class LanguageGuessingTest { { "Sjneijder", "dutch", EXACT }, { "Klausewitz", "german", EXACT }, { "Küçük", "turkish", EXACT }, { "Giacometti", "italian", EXACT }, { "Nagy", "hungarian", EXACT }, { "CeauÅescu", "romanian", EXACT }, { "Angelopoulos", "greeklatin", EXACT }, { "ÎγγελÏÏÎ¿Ï Î»Î¿Ï", "greek", EXACT }, { "ÐÑÑкин", "cyrillic", EXACT }, - { "×××", "hebrew", EXACT } }); + { "×××", "hebrew", EXACT }, { "ácz", "any", EXACT }, { "átz", "any", EXACT } }); } private final String exactness; Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticEngineTest.java Sun Jul 31 21:25:45 2011 @@ -37,18 +37,14 @@ public class PhoneticEngineTest { @Parameterized.Parameters public static List<Object[]> data() { - return Arrays - .asList(new Object[] { "Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, RuleType.APPROX, - true }, - new Object[] { "Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, RuleType.APPROX, true }, - new Object[] { "Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, true }, - new Object[] { "SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, true }, - new Object[] { "d'ortley", "ortlaj|ortlaj|ortlej|ortlej-dortlaj|dortlaj|dortlej|dortlej", NameType.GENERIC, - RuleType.EXACT, true }, - new Object[] { - "van helsing", - "elSink|elsink|helSink|helsink|helzink|xelsink-banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink", - NameType.GENERIC, RuleType.EXACT, false }); + return Arrays.asList(new Object[] { "Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, + RuleType.APPROX, true }, new Object[] { "Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, + RuleType.APPROX, true }, new Object[] { "Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, true }, new Object[] { + "SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, true }, new Object[] { "d'ortley", + "ortlaj|ortlaj|ortlej|ortlej-dortlaj|dortlaj|dortlej|dortlej", NameType.GENERIC, RuleType.EXACT, true }, new Object[] { + "van helsing", + "elSink|elsink|helSink|helsink|helzink|xelsink-banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink", + NameType.GENERIC, RuleType.EXACT, false }); } private final boolean concat; @@ -72,7 +68,7 @@ public class PhoneticEngineTest { String phoneticActual = engine.encode(this.name); System.err.println("expecting: " + this.phoneticExpected); - System.err.println("actual: " + phoneticActual); + System.err.println("actual: " + phoneticActual); assertEquals("phoneme incorrect", this.phoneticExpected, phoneticActual); } } Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java?rev=1152642&r1=1152641&r2=1152642&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java Sun Jul 31 21:25:45 2011 @@ -30,20 +30,20 @@ import org.junit.runners.Parameterized; * @author Apache Software Foundation * @since 2.0 */ -//@RunWith(Parameterized.class) +// @RunWith(Parameterized.class) public class RuleTest { @Parameterized.Parameters public static List<Object[]> data() { return Arrays.asList( - new Object[] { "matching language sets with ALL", - new Rule("e", "", "", new Rule.Phoneme("o", Languages.LanguageSet.from( - new HashSet<String>(Arrays.asList("english", "french"))))), - new HashSet<String>(Arrays.asList("english", "french")), true }, - new Object[] { "non-matching language sets with ALL", - new Rule("e", "", "", new Rule.Phoneme("o", Languages.LanguageSet.from( - new HashSet<String>(Arrays.asList("english", "french"))))), - new HashSet<String>(Arrays.asList("english")), false }); + new Object[] { + "matching language sets with ALL", + new Rule("e", "", "", new Rule.Phoneme("o", Languages.LanguageSet.from(new HashSet<String>(Arrays.asList("english", + "french"))))), new HashSet<String>(Arrays.asList("english", "french")), true }, + new Object[] { + "non-matching language sets with ALL", + new Rule("e", "", "", new Rule.Phoneme("o", Languages.LanguageSet.from(new HashSet<String>(Arrays.asList("english", + "french"))))), new HashSet<String>(Arrays.asList("english")), false }); } private final String caseName; @@ -58,9 +58,9 @@ public class RuleTest { this.expected = expected; } -// @Test -// public void testRuleLanguageMatches() { -// assertEquals(this.caseName, this.expected, this.rule.languageMatches(this.langs)); -// } + // @Test + // public void testRuleLanguageMatches() { + // assertEquals(this.caseName, this.expected, this.rule.languageMatches(this.langs)); + // } }