Author: ggregory Date: Wed Jul 27 19:47:48 2011 New Revision: 1151603 URL: http://svn.apache.org/viewvc?rev=1151603&view=rev Log: Fix failing test "gna": org.apache.commons.codec.language.bm.BeiderMorseEncoderTest.testEncodeGna()
Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1151603&r1=1151602&r2=1151603&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java Wed Jul 27 19:47:48 2011 @@ -145,9 +145,11 @@ public class PhoneticEngine { * @return a phonetic representation of the input; a String containing '-'-separated phonetic representations of the input */ public String phoneticUtf8(String input, final Set<String> languageSet) { - List<Rule> rules = Rule.instance(this.nameType, RuleType.RULES, languageSet); - List<Rule> finalRules1 = Rule.instance(this.nameType, this.ruleType, "common"); - List<Rule> finalRules2 = Rule.instance(this.nameType, this.ruleType, languageSet); + final List<Rule> rules = Rule.instance(this.nameType, RuleType.RULES, languageSet); + final List<Rule> finalRules1 = Rule.instance(this.nameType, this.ruleType, "common"); + final List<Rule> finalRules2 = Rule.instance(this.nameType, this.ruleType, languageSet); + // System.err.println("Languages: " + languageSet); + // System.err.println("Rules: " + rules); // tidy the input // lower case is a locale-dependent operation @@ -345,6 +347,11 @@ public class PhoneticEngine { String prefix = phonetic.substring(0, altStart); altStart++; int altEnd = phonetic.indexOf(')'); + + if (altEnd < altStart) { + throw new IllegalArgumentException("Phonetic string has a close-bracket before the first open-bracket"); + } + String altString = phonetic.substring(altStart, altEnd); altEnd++; String suffix = phonetic.substring(altEnd); Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1151603&r1=1151602&r2=1151603&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Wed Jul 27 19:47:48 2011 @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.Set; +import java.util.Stack; import java.util.regex.Pattern; /** @@ -77,10 +78,10 @@ import java.util.regex.Pattern; * @since 2.0 */ public class Rule { - private static final String DOUBLE_QUOTE = "\""; - public static final String ALL = "ALL"; + private static final String DOUBLE_QUOTE = "\""; + private static final String HASH_INCLUDE = "#include"; private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>> RULES = new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>( @@ -95,10 +96,14 @@ public class Rule { Languages ls = Languages.instance(s); for (String l : ls.getLanguages()) { - rs.put(l, parseRules(mkScanner(s, rt, l))); + try { + rs.put(l, parseRules(createScanner(s, rt, l))); + } catch (IllegalStateException e) { + throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); + } } if (!rt.equals(RuleType.RULES)) { - rs.put("common", parseRules(mkScanner(s, rt, "common"))); + rs.put("common", parseRules(createScanner(s, rt, "common"))); } rts.put(rt, Collections.unmodifiableMap(rs)); @@ -108,6 +113,32 @@ public class Rule { } } + private static String createResourceName(NameType nameType, RuleType rt, String lang) { + return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", nameType.getName(), rt.getName(), lang); + } + + private static Scanner createScanner(NameType nameType, RuleType rt, String lang) { + String resName = createResourceName(nameType, rt, lang); + InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); + + if (rulesIS == null) { + throw new IllegalArgumentException("Unable to load resource: " + resName); + } + + return new Scanner(rulesIS, ResourceConstants.ENCODING); + } + + private static Scanner createScanner(String lang) { + String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); + InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); + + if (rulesIS == null) { + throw new IllegalArgumentException("Unable to load resource: " + resName); + } + + return new Scanner(rulesIS, ResourceConstants.ENCODING); + } + /** * Gets rules for a combination of name type, rule type and languages. * @@ -148,33 +179,13 @@ public class Rule { return rules; } - private static Scanner mkScanner(NameType nameType, RuleType rt, String lang) { - String resName = String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", nameType.getName(), rt.getName(), lang); - InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); - - if (rulesIS == null) { - throw new IllegalArgumentException("Unable to load resource: " + resName); - } - - return new Scanner(rulesIS, ResourceConstants.ENCODING); - } - - private static Scanner mkScanner(String lang) { - String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); - InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); - - if (rulesIS == null) { - throw new IllegalArgumentException("Unable to load resource: " + resName); - } - - return new Scanner(rulesIS, ResourceConstants.ENCODING); - } - private static List<Rule> parseRules(Scanner scanner) { List<Rule> lines = new ArrayList<Rule>(); + int currentLine = 0; boolean inMultilineComment = false; while (scanner.hasNextLine()) { + currentLine++; String rawLine = scanner.nextLine(); String line = rawLine; @@ -206,7 +217,7 @@ public class Rule { if (incl.contains(" ")) { System.err.println("Warining: malformed import statement: " + rawLine); } else { - lines.addAll(parseRules(mkScanner(incl))); + lines.addAll(parseRules(createScanner(incl))); } } else { // rule @@ -218,6 +229,11 @@ public class Rule { String lCon = stripQuotes(parts[1]); String rCon = stripQuotes(parts[2]); String ph = stripQuotes(parts[3]); + try { + validatePhenome(ph); + } catch (IllegalArgumentException e) { + throw new IllegalStateException("Problem parsing line " + currentLine, e); + } Rule r = new Rule(pat, lCon, rCon, ph, Collections.<String> emptySet(), ""); // guessing last 2 parameters lines.add(r); } @@ -241,6 +257,40 @@ public class Rule { return str; } + private static void validatePhenome(CharSequence ph) { + Stack<Character> stack = new Stack<Character>(); + for (int i = 0; i < ph.length(); i++) { + switch (ph.charAt(i)) { + case '(': + stack.push('('); + break; + case '[': + stack.push('['); + break; + case ')': { + if (stack.isEmpty()) + throw new IllegalArgumentException("Closing ')' at " + i + " without an opening '('" + " in " + ph); + char c = stack.pop(); + if (c != '(') + throw new IllegalArgumentException("Closing ')' does not pair with opening '" + c + "' at " + i + " in " + ph); + break; + } + case ']': { + if (stack.isEmpty()) + throw new IllegalArgumentException("Closing ']' at " + i + " without an opening '['" + " in " + ph); + char c = stack.pop(); + if (c != '[') + throw new IllegalArgumentException("Closing ']' does not pair with opening '" + c + "' at " + i + " in " + ph); + break; + } + default: + break; + } + } + if (!stack.isEmpty()) + throw new IllegalArgumentException("Bracket(s) opened without corresponding closes: " + stack + " in " + ph); + } + private final Set<String> languages; private final Pattern lContext; Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1151603&r1=1151602&r2=1151603&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java (original) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Wed Jul 27 19:47:48 2011 @@ -46,7 +46,7 @@ public class BeiderMorseEncoderTest exte * * @throws EncoderException */ - @Ignore + // @Ignore @Test public void testEncodeGna() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); @@ -71,7 +71,7 @@ public class BeiderMorseEncoderTest exte } @Ignore - @Test + @Test(timeout = 10000L) public void testLongestEnglishSurname() throws EncoderException { BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); bmpm.setNameType(NameType.GENERIC); @@ -113,7 +113,7 @@ public class BeiderMorseEncoderTest exte } @Ignore - @Test + @Test(timeout = 10000L) public void testSpeedCheck() throws EncoderException { char[] chars = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' }; BeiderMorseEncoder bmpm = new BeiderMorseEncoder();