Revision: 6856 http://languagetool.svn.sourceforge.net/languagetool/?rev=6856&view=rev Author: dnaber Date: 2012-05-09 21:27:28 +0000 (Wed, 09 May 2012) Log Message: ----------- Overlapping rule matches are filtered now so that only the first match per <rulegroup> is kept
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java Added Paths: ----------- trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-05-08 21:18:55 UTC (rev 6855) +++ trunk/JLanguageTool/CHANGES.txt 2012-05-09 21:27:28 UTC (rev 6856) @@ -23,7 +23,9 @@ -Improved startup speed (Jarek Lipski) + -Overlapping rule matches are filtered now so that only the first match per <rulegroup> is kept + 1.7 (2012-03-25) -English: Modified: trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java 2012-05-08 21:18:55 UTC (rev 6855) +++ trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java 2012-05-09 21:27:28 UTC (rev 6856) @@ -21,8 +21,7 @@ import org.languagetool.databroker.DefaultResourceDataBroker; import org.languagetool.databroker.ResourceDataBroker; import org.languagetool.gui.ResourceBundleWithFallback; -import org.languagetool.rules.Rule; -import org.languagetool.rules.RuleMatch; +import org.languagetool.rules.*; import org.languagetool.rules.patterns.FalseFriendRuleLoader; import org.languagetool.rules.patterns.PatternRule; import org.languagetool.rules.patterns.PatternRuleLoader; @@ -521,8 +520,7 @@ } else { if (lineBreakPos == 0) { columnCount = sentence.length(); - if (!language.getSentenceTokenizer(). - singleLineBreaksMarksPara()) { + if (!language.getSentenceTokenizer().singleLineBreaksMarksPara()) { columnCount--; } } else { @@ -590,7 +588,8 @@ } } } - return sentenceMatches; + final RuleMatchFilter filter = new SameRuleGroupFilter(); + return filter.filter(sentenceMatches); } /** Added: trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java (rev 0) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java 2012-05-09 21:27:28 UTC (rev 6856) @@ -0,0 +1,32 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules; + +import java.util.List; + +/** + * Filter rule matches. + * + * @since 1.8 + */ +public interface RuleMatchFilter { + + List<RuleMatch> filter(List<RuleMatch> ruleMatches); + +} Added: trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java (rev 0) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java 2012-05-09 21:27:28 UTC (rev 6856) @@ -0,0 +1,66 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Filter rule matches so that only the first match is kept from overlapping + * matches with the same rule group. + * + * @since 1.8 + */ +public class SameRuleGroupFilter implements RuleMatchFilter { + + /** + * @param ruleMatches list of matches + * @return + */ + public List<RuleMatch> filter(List<RuleMatch> ruleMatches) { + Collections.sort(ruleMatches); + final List<RuleMatch> filteredRules = new ArrayList<RuleMatch>(); + for (int i = 0; i < ruleMatches.size(); i++) { + final RuleMatch match = ruleMatches.get(i); + if (i < ruleMatches.size() - 1) { + final RuleMatch nextMatch = ruleMatches.get(i + 1); + if (overlaps(match, nextMatch) && haveSameRuleGroup(match, nextMatch)) { + i++; // skip next match + } + } + filteredRules.add(match); + } + + return filteredRules; + } + + boolean overlaps(RuleMatch match, RuleMatch nextMatch) { + if (match.getFromPos() <= nextMatch.getToPos() && match.getToPos() >= nextMatch.getFromPos()) { + return true; + } + return false; + } + + private boolean haveSameRuleGroup(RuleMatch match, RuleMatch nextMatch) { + final String id1 = match.getRule().getId(); + return id1 != null && id1.equals(nextMatch.getRule().getId()); + } + +} Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java 2012-05-08 21:18:55 UTC (rev 6855) +++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java 2012-05-09 21:27:28 UTC (rev 6856) @@ -19,12 +19,17 @@ package org.languagetool; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import junit.framework.TestCase; import org.languagetool.JLanguageTool.ParagraphHandling; +import org.languagetool.rules.Category; +import org.languagetool.rules.Rule; import org.languagetool.rules.RuleMatch; +import org.languagetool.rules.patterns.Element; +import org.languagetool.rules.patterns.PatternRule; /** * @author Daniel Naber @@ -210,5 +215,32 @@ i++; } } - + + public void testOverlapFilter() throws IOException { + final Category category = new Category("test category"); + final List<Element> elements1 = Arrays.asList(new Element("one", true, false, false)); + final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, elements1, "desc1", "msg1", "shortMsg1"); + rule1.setSubId("1"); + rule1.setCategory(category); + + final List<Element> elements2 = Arrays.asList(new Element("one", true, false, false), new Element("two", true, false, false)); + final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, elements2, "desc2", "msg2", "shortMsg2"); + rule2.setSubId("2"); + rule2.setCategory(category); + + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + tool.addRule(rule1); + tool.addRule(rule2); + + final List<RuleMatch> ruleMatches1 = tool.check("And one two three."); + assertEquals("one overlapping rule must be filtered out", 1, ruleMatches1.size()); + assertEquals("msg1", ruleMatches1.get(0).getMessage()); + + final String sentence = "And one two three."; + final AnalyzedSentence analyzedSentence = tool.getAnalyzedSentence(sentence); + final List<Rule> bothRules = new ArrayList<Rule>(Arrays.asList(rule1, rule2)); + final List<RuleMatch> ruleMatches2 = tool.checkAnalyzedSentence(ParagraphHandling.NORMAL, bothRules, 0, 0, 0, sentence, analyzedSentence); + assertEquals("one overlapping rule must be filtered out", 1, ruleMatches2.size()); + assertEquals("msg1", ruleMatches2.get(0).getMessage()); + } } Added: trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java (rev 0) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java 2012-05-09 21:27:28 UTC (rev 6856) @@ -0,0 +1,84 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules; + +import junit.framework.TestCase; +import org.languagetool.Language; +import org.languagetool.rules.patterns.Element; +import org.languagetool.rules.patterns.PatternRule; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class SameRuleGroupFilterTest extends TestCase { + + public void testFilter() { + final List<Element> fakeElements = new ArrayList<Element>(); + final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, fakeElements, "desc1", "msg1", "shortMsg1"); + final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, fakeElements, "desc2", "msg2", "shortMsg2"); + final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1"); + final RuleMatch match2 = new RuleMatch(rule2, 15, 25, "Match2"); + final SameRuleGroupFilter filter = new SameRuleGroupFilter(); + final List<RuleMatch> filteredMatches = filter.filter(Arrays.asList(match1, match2)); + assertEquals(1, filteredMatches.size()); + assertEquals("Match1", filteredMatches.get(0).getMessage()); + } + + public void testNoFilteringIfNotOverlapping() { + final List<Element> fakeElements = new ArrayList<Element>(); + final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, fakeElements, "desc1", "msg1", "shortMsg1"); + final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, fakeElements, "desc2", "msg2", "shortMsg2"); + final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1"); + final RuleMatch match2 = new RuleMatch(rule2, 21, 25, "Match2"); + final SameRuleGroupFilter filter = new SameRuleGroupFilter(); + final List<RuleMatch> filteredMatches = filter.filter(Arrays.asList(match1, match2)); + assertEquals(2, filteredMatches.size()); + } + + public void testNoFilteringIfDifferentRulegroups() { + final List<Element> fakeElements = new ArrayList<Element>(); + final Rule rule1 = new PatternRule("id1", Language.ENGLISH, fakeElements, "desc1", "msg1", "shortMsg1"); + final Rule rule2 = new PatternRule("id2", Language.ENGLISH, fakeElements, "desc2", "msg2", "shortMsg2"); + final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1"); + final RuleMatch match2 = new RuleMatch(rule2, 15, 25, "Match2"); + final SameRuleGroupFilter filter = new SameRuleGroupFilter(); + final List<RuleMatch> filteredMatches = filter.filter(Arrays.asList(match1, match2)); + assertEquals(2, filteredMatches.size()); + } + + public void testOverlaps() { + final SameRuleGroupFilter filter = new SameRuleGroupFilter(); + + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(10, 20))); + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(5, 11))); + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(19, 21))); + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(11, 19))); + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(1, 10))); + assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(20, 20))); + + assertFalse(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(21, 30))); + assertFalse(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(1, 9))); + } + + private RuleMatch makeRuleMatch(int fromPos, int toPos) { + return new RuleMatch(null, fromPos, toPos, "FakeMatch1"); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs