Revision: 6856
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6856&view=rev
Author:   dnaber
Date:     2012-05-09 21:27:28 +0000 (Wed, 09 May 2012)
Log Message:
-----------
Overlapping rule matches are filtered now so that only the first match per 
<rulegroup> is kept

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java
    trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java

Added Paths:
-----------
    trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java
    trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-05-08 21:18:55 UTC (rev 6855)
+++ trunk/JLanguageTool/CHANGES.txt     2012-05-09 21:27:28 UTC (rev 6856)
@@ -23,7 +23,9 @@
 
  -Improved startup speed (Jarek Lipski)
 
+ -Overlapping rule matches are filtered now so that only the first match per 
<rulegroup> is kept
 
+
 1.7 (2012-03-25)
 
  -English:

Modified: trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java    
2012-05-08 21:18:55 UTC (rev 6855)
+++ trunk/JLanguageTool/src/java/org/languagetool/JLanguageTool.java    
2012-05-09 21:27:28 UTC (rev 6856)
@@ -21,8 +21,7 @@
 import org.languagetool.databroker.DefaultResourceDataBroker;
 import org.languagetool.databroker.ResourceDataBroker;
 import org.languagetool.gui.ResourceBundleWithFallback;
-import org.languagetool.rules.Rule;
-import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.*;
 import org.languagetool.rules.patterns.FalseFriendRuleLoader;
 import org.languagetool.rules.patterns.PatternRule;
 import org.languagetool.rules.patterns.PatternRuleLoader;
@@ -521,8 +520,7 @@
       } else {
         if (lineBreakPos == 0) {
           columnCount = sentence.length();
-          if (!language.getSentenceTokenizer().
-              singleLineBreaksMarksPara()) {
+          if (!language.getSentenceTokenizer().singleLineBreaksMarksPara()) {
             columnCount--;
           }
         } else {
@@ -590,7 +588,8 @@
         }
       }
     }
-    return sentenceMatches;
+    final RuleMatchFilter filter = new SameRuleGroupFilter();
+    return filter.filter(sentenceMatches);
   }
 
   /**

Added: trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java    
                        (rev 0)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/RuleMatchFilter.java    
2012-05-09 21:27:28 UTC (rev 6856)
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules;
+
+import java.util.List;
+
+/**
+ * Filter rule matches.
+ *
+ * @since 1.8
+ */
+public interface RuleMatchFilter {
+
+  List<RuleMatch> filter(List<RuleMatch> ruleMatches);
+
+}

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java    
                            (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/SameRuleGroupFilter.java    
    2012-05-09 21:27:28 UTC (rev 6856)
@@ -0,0 +1,66 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Filter rule matches so that only the first match is kept from overlapping
+ * matches with the same rule group.
+ *
+ * @since 1.8
+ */
+public class SameRuleGroupFilter implements RuleMatchFilter {
+
+  /**
+   * @param ruleMatches list of matches
+   * @return
+   */
+  public List<RuleMatch> filter(List<RuleMatch> ruleMatches) {
+    Collections.sort(ruleMatches);
+    final List<RuleMatch> filteredRules = new ArrayList<RuleMatch>();
+    for (int i = 0; i < ruleMatches.size(); i++) {
+      final RuleMatch match = ruleMatches.get(i);
+      if (i <  ruleMatches.size() - 1) {
+        final RuleMatch nextMatch = ruleMatches.get(i + 1);
+        if (overlaps(match, nextMatch) && haveSameRuleGroup(match, nextMatch)) 
{
+          i++;  // skip next match
+        }
+      }
+      filteredRules.add(match);
+    }
+
+    return filteredRules;
+  }
+
+  boolean overlaps(RuleMatch match, RuleMatch nextMatch) {
+    if (match.getFromPos() <= nextMatch.getToPos() && match.getToPos() >= 
nextMatch.getFromPos()) {
+      return true;
+    }
+    return false;
+  }
+
+  private boolean haveSameRuleGroup(RuleMatch match, RuleMatch nextMatch) {
+    final String id1 = match.getRule().getId();
+    return id1 != null && id1.equals(nextMatch.getRule().getId());
+  }
+
+}

Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-05-08 21:18:55 UTC (rev 6855)
+++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-05-09 21:27:28 UTC (rev 6856)
@@ -19,12 +19,17 @@
 package org.languagetool;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
 import junit.framework.TestCase;
 import org.languagetool.JLanguageTool.ParagraphHandling;
+import org.languagetool.rules.Category;
+import org.languagetool.rules.Rule;
 import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.patterns.Element;
+import org.languagetool.rules.patterns.PatternRule;
 
 /**
  * @author Daniel Naber
@@ -210,5 +215,32 @@
       i++;
     }
   }
-  
+
+  public void testOverlapFilter() throws IOException {
+    final Category category = new Category("test category");
+    final List<Element> elements1 = Arrays.asList(new Element("one", true, 
false, false));
+    final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, 
elements1, "desc1", "msg1", "shortMsg1");
+    rule1.setSubId("1");
+    rule1.setCategory(category);
+
+    final List<Element> elements2 = Arrays.asList(new Element("one", true, 
false, false), new Element("two", true, false, false));
+    final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, 
elements2, "desc2", "msg2", "shortMsg2");
+    rule2.setSubId("2");
+    rule2.setCategory(category);
+
+    final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+    tool.addRule(rule1);
+    tool.addRule(rule2);
+
+    final List<RuleMatch> ruleMatches1 = tool.check("And one two three.");
+    assertEquals("one overlapping rule must be filtered out", 1, 
ruleMatches1.size());
+    assertEquals("msg1", ruleMatches1.get(0).getMessage());
+
+    final String sentence = "And one two three.";
+    final AnalyzedSentence analyzedSentence = 
tool.getAnalyzedSentence(sentence);
+    final List<Rule> bothRules = new ArrayList<Rule>(Arrays.asList(rule1, 
rule2));
+    final List<RuleMatch> ruleMatches2 = 
tool.checkAnalyzedSentence(ParagraphHandling.NORMAL, bothRules, 0, 0, 0, 
sentence, analyzedSentence);
+    assertEquals("one overlapping rule must be filtered out", 1, 
ruleMatches2.size());
+    assertEquals("msg1", ruleMatches2.get(0).getMessage());
+  }
 }

Added: 
trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java
                            (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/SameRuleGroupFilterTest.java
    2012-05-09 21:27:28 UTC (rev 6856)
@@ -0,0 +1,84 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules;
+
+import junit.framework.TestCase;
+import org.languagetool.Language;
+import org.languagetool.rules.patterns.Element;
+import org.languagetool.rules.patterns.PatternRule;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class SameRuleGroupFilterTest extends TestCase {
+
+  public void testFilter() {
+    final List<Element> fakeElements = new ArrayList<Element>();
+    final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, 
fakeElements, "desc1", "msg1", "shortMsg1");
+    final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, 
fakeElements, "desc2", "msg2", "shortMsg2");
+    final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1");
+    final RuleMatch match2 = new RuleMatch(rule2, 15, 25, "Match2");
+    final SameRuleGroupFilter filter = new SameRuleGroupFilter();
+    final List<RuleMatch> filteredMatches = 
filter.filter(Arrays.asList(match1, match2));
+    assertEquals(1, filteredMatches.size());
+    assertEquals("Match1", filteredMatches.get(0).getMessage());
+  }
+
+  public void testNoFilteringIfNotOverlapping() {
+    final List<Element> fakeElements = new ArrayList<Element>();
+    final PatternRule rule1 = new PatternRule("id1", Language.ENGLISH, 
fakeElements, "desc1", "msg1", "shortMsg1");
+    final PatternRule rule2 = new PatternRule("id1", Language.ENGLISH, 
fakeElements, "desc2", "msg2", "shortMsg2");
+    final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1");
+    final RuleMatch match2 = new RuleMatch(rule2, 21, 25, "Match2");
+    final SameRuleGroupFilter filter = new SameRuleGroupFilter();
+    final List<RuleMatch> filteredMatches = 
filter.filter(Arrays.asList(match1, match2));
+    assertEquals(2, filteredMatches.size());
+  }
+
+  public void testNoFilteringIfDifferentRulegroups() {
+    final List<Element> fakeElements = new ArrayList<Element>();
+    final Rule rule1 = new PatternRule("id1", Language.ENGLISH, fakeElements, 
"desc1", "msg1", "shortMsg1");
+    final Rule rule2 = new PatternRule("id2", Language.ENGLISH, fakeElements, 
"desc2", "msg2", "shortMsg2");
+    final RuleMatch match1 = new RuleMatch(rule1, 10, 20, "Match1");
+    final RuleMatch match2 = new RuleMatch(rule2, 15, 25, "Match2");
+    final SameRuleGroupFilter filter = new SameRuleGroupFilter();
+    final List<RuleMatch> filteredMatches = 
filter.filter(Arrays.asList(match1, match2));
+    assertEquals(2, filteredMatches.size());
+  }
+
+  public void testOverlaps() {
+    final SameRuleGroupFilter filter = new SameRuleGroupFilter();
+
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(10, 20)));
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(5, 11)));
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(19, 21)));
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(11, 19)));
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(1, 10)));
+    assertTrue(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(20, 20)));
+
+    assertFalse(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(21, 30)));
+    assertFalse(filter.overlaps(makeRuleMatch(10, 20), makeRuleMatch(1, 9)));
+  }
+
+  private RuleMatch makeRuleMatch(int fromPos, int toPos) {
+    return new RuleMatch(null, fromPos, toPos, "FakeMatch1");
+  }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to