Revision: 10222 http://sourceforge.net/p/languagetool/code/10222 Author: gulp21-1 Date: 2013-06-11 14:30:07 +0000 (Tue, 11 Jun 2013) Log Message: ----------- [de] fix some false alarms (case and agreement rule)
Modified Paths: -------------- trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/AgreementRule.java trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/AgreementRuleTest.java trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java Modified: trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/AgreementRule.java =================================================================== --- trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/AgreementRule.java 2013-06-10 22:49:31 UTC (rev 10221) +++ trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/AgreementRule.java 2013-06-11 14:30:07 UTC (rev 10222) @@ -82,8 +82,18 @@ "ADJ:DAT:PLU:MAS:GRU", "ADJ:DAT:PLU:NEU:GRU", "ADJ:DAT:PLU:FEM:GRU", // den Berliner Autos "ADJ:AKK:PLU:MAS:GRU", "ADJ:AKK:PLU:NEU:GRU", "ADJ:AKK:PLU:FEM:GRU", // den Berliner Bewohnern }; - + /* + * The heuristic of maybeAddAdjectiveReadings considers every noun ending with "er" as city name. + * The nouns in this list are NOT considered as city names. + * NOTE: Only nouns for which cutting off the final "er" produces a valid noun must be added to this list. + */ + private static final Set<String> ER_TO_BE_IGNORED = new HashSet<String>(Arrays.asList( + "Alter", + "Kinder", + "Rinder" + )); + private static final Set<String> REL_PRONOUN = new HashSet<String>(); static { REL_PRONOUN.add("der"); @@ -183,15 +193,15 @@ boolean ignore = couldBeRelativeClause(tokens, i); if (i > 0) { final String prevToken = tokens[i-1].getToken().toLowerCase(); - if ((prevToken.equals("der") || prevToken.equals("die") || prevToken.equals("das")) + if ((prevToken.equals("der") || prevToken.equals("die") || prevToken.equals("das") || prevToken.equals("des")) && (tokens[i].getToken().equals("eine") || tokens[i].getToken().equals("einen"))) { // TODO: "der eine Polizist" -> nicht ignorieren, sondern "der polizist" checken; "auf der einen Seite" ignore = true; } } - // avoid false alarm on "nichts Gutes": - if (analyzedToken.getToken().equals("nichts")) { + // avoid false alarm on "nichts Gutes" and "alles Gute" + if (analyzedToken.getToken().equals("nichts") || analyzedToken.getToken().equals("alles")) { ignore = true; } @@ -224,7 +234,7 @@ } } - } + } // for each token return toRuleMatchArray(ruleMatches); } @@ -259,7 +269,7 @@ final String nextTerm = nextToken.getToken(); // Just a heuristic: nouns and proper nouns that end with "er" are considered // city names: - if (nextTerm.endsWith("er") && tokens.length > tokenPos+1) { + if (nextTerm.endsWith("er") && tokens.length > tokenPos+1 && !ER_TO_BE_IGNORED.contains(nextTerm)) { final AnalyzedGermanTokenReadings nextNextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos+1]; try { final AnalyzedGermanTokenReadings nextATR = tagger.lookup(nextTerm.substring(0, nextTerm.length()-2)); Modified: trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java =================================================================== --- trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java 2013-06-10 22:49:31 UTC (rev 10221) +++ trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/CaseRule.java 2013-06-11 14:30:07 UTC (rev 10222) @@ -71,6 +71,7 @@ sentenceStartExceptions.add("“"); sentenceStartExceptions.add("«"); sentenceStartExceptions.add("»"); + sentenceStartExceptions.add("."); } private static final Set<String> exceptions = new HashSet<String>(); @@ -488,7 +489,7 @@ !analyzedToken.hasReadingOfType(POSType.PROPER_NOUN) && !isNilReading(analyzedToken) && !analyzedToken.isSentenceEnd() && - !( (tokens[i-1].getToken().equals("]") || tokens[i-1].getToken().equals(")")) && + !( (tokens[i-1].getToken().equals("]") || tokens[i-1].getToken().equals(")")) && // sentence starts with […] ( (i == 4 && tokens[i-2].getToken().equals("…")) || (i == 6 && tokens[i-2].getToken().equals(".")) ) ) && !isExceptionPhrase(i, tokens)) { final String msg = "Außer am Satzanfang werden nur Nomen und Eigennamen großgeschrieben"; Modified: trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/AgreementRuleTest.java =================================================================== --- trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/AgreementRuleTest.java 2013-06-10 22:49:31 UTC (rev 10221) +++ trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/AgreementRuleTest.java 2013-06-11 14:30:07 UTC (rev 10222) @@ -114,6 +114,9 @@ assertGood("Das Gütersloher Radio."); assertGood("Das wirklich Wichtige kommt jetzt erst."); assertGood("Besonders wenn wir Wermut oder Absinth trinken."); + assertGood("Ich wünsche dir alles Gute."); + assertGood("Es ist nicht bekannt, mit welchem Alter Kinder diese Fähigkeit erlernen."); + assertGood("Dieser ist nun in den Ortungsbereich des einen Roboters gefahren."); // incorrect sentences: assertBad("Es sind die Tisch."); Modified: trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java =================================================================== --- trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java 2013-06-10 22:49:31 UTC (rev 10221) +++ trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/CaseRuleTest.java 2013-06-11 14:30:07 UTC (rev 10222) @@ -79,6 +79,9 @@ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlinge zieht sich zu.")).length); assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlingen ziehen sich zu.")).length); + // used to trigger error because of "abbriviation" + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Sie fällt auf durch ihre hilfsbereite Art. Zudem zeigt sie soziale Kompetenz.")).length); + // TODO: nach dem Doppelpunkt wird derzeit nicht auf groß/klein getestet: assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: kein Satz.")).length); assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: Kein Satz.")).length); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ This SF.net email is sponsored by Windows: Build for Windows Store. http://p.sf.net/sfu/windows-dev2dev _______________________________________________ Languagetool-commits mailing list Languagetool-commits@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-commits