Revision: 8039
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8039&view=rev
Author:   dnaber
Date:     2012-09-14 19:40:30 +0000 (Fri, 14 Sep 2012)
Log Message:
-----------
[en] don't consider this an error: "Ellipsis . . . as suggested by The Chicago 
Manual of Style"

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
    
trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java

Added Paths:
-----------
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java
    
trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-09-14 19:20:24 UTC (rev 8038)
+++ trunk/JLanguageTool/CHANGES.txt     2012-09-14 19:40:30 UTC (rev 8039)
@@ -31,6 +31,8 @@
 
  -English:
    -fixed false alarm (sf bug #3543914)
+   -don't consider these ellipsis an error: "foo . . . bar" (source:
+    Chicago Manual of Style)
 
  -German:
    -added simple verb/subject agreement checker

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java    
2012-09-14 19:20:24 UTC (rev 8038)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java    
2012-09-14 19:40:30 UTC (rev 8039)
@@ -18,22 +18,9 @@
  */
 package org.languagetool.language;
 
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
-
 import org.languagetool.Language;
-import org.languagetool.rules.CommaWhitespaceRule;
-import org.languagetool.rules.DoublePunctuationRule;
-import org.languagetool.rules.LongSentenceRule;
-import org.languagetool.rules.Rule;
-import org.languagetool.rules.UppercaseSentenceStartRule;
-import org.languagetool.rules.WhitespaceRule;
-import org.languagetool.rules.en.AvsAnRule;
-import org.languagetool.rules.en.CompoundRule;
-import org.languagetool.rules.en.EnglishUnpairedBracketsRule;
-import org.languagetool.rules.en.EnglishWordRepeatBeginningRule;
-import org.languagetool.rules.en.EnglishWordRepeatRule;
+import org.languagetool.rules.*;
+import org.languagetool.rules.en.*;
 import org.languagetool.synthesis.Synthesizer;
 import org.languagetool.synthesis.en.EnglishSynthesizer;
 import org.languagetool.tagging.Tagger;
@@ -45,6 +32,10 @@
 import org.languagetool.tokenizers.Tokenizer;
 import org.languagetool.tokenizers.en.EnglishWordTokenizer;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+
 public class English extends Language {
 
   private Tagger tagger;
@@ -127,7 +118,7 @@
   @Override
   public List<Class<? extends Rule>> getRelevantRules() {
     return Arrays.asList(
-            CommaWhitespaceRule.class,
+            EnglishCommaWhitespaceRule.class,
             DoublePunctuationRule.class,
             EnglishUnpairedBracketsRule.class,
             UppercaseSentenceStartRule.class,

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
   2012-09-14 19:20:24 UTC (rev 8038)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
   2012-09-14 19:40:30 UTC (rev 8039)
@@ -40,7 +40,7 @@
   }
 
   @Override
-  public final String getId() {
+  public String getId() {
     return "COMMA_PARENTHESIS_WHITESPACE";
   }
 
@@ -59,12 +59,15 @@
     int prevLen = 0;
     for (int i = 0; i < tokens.length; i++) {
       final String token = tokens[i].getToken();
-      final boolean isWhite = tokens[i].isWhitespace() 
-      || tokens[i].isFieldCode();      
+      final boolean isWhite = tokens[i].isWhitespace() || 
tokens[i].isFieldCode();
       String msg = null;
       int fixLen = 0;
       String suggestionText = null;
-      if (isWhite && isLeftBracket(prevToken)) {
+      final int skip = getExceptionSkip(tokens, i);
+      if (skip > 0) {
+        // ignore
+        i += skip;
+      } else if (isWhite && isLeftBracket(prevToken)) {
         msg = messages.getString("no_space_after");
         suggestionText = prevToken;
         fixLen = 1;
@@ -85,9 +88,8 @@
           suggestionText = ",";
           fixLen = 1;
           //exception for duplicated comma (we already have another rule for 
that)
-          if (i + 1 < tokens.length
-             && ",".equals(tokens[i + 1].getToken())) {
-           msg = null; 
+          if (i + 1 < tokens.length && ",".equals(tokens[i + 1].getToken())) {
+           msg = null;
           }
         } else if (token.equals(".")) {
           msg = messages.getString("no_space_before_dot");
@@ -117,6 +119,14 @@
     return toRuleMatchArray(ruleMatches);
   }
 
+  /**
+   * @return return 0 if there is no exception here, return the amount of 
tokens to be skipped
+   * if there's an exception here, i.e. if you want to skip over tokens that 
would otherwise be an error
+   */
+  protected int getExceptionSkip(AnalyzedTokenReadings[] tokens, int pos) {
+    return 0;
+  }
+
   static boolean isNotQuoteOrHyphen(final String str) {
     if (str.length() == 1) {
       final char c = str.charAt(0);

Added: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java
                         (rev 0)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/EnglishCommaWhitespaceRule.java
 2012-09-14 19:40:30 UTC (rev 8039)
@@ -0,0 +1,66 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.en;
+
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.rules.CommaWhitespaceRule;
+
+import java.util.ResourceBundle;
+
+/**
+ * Exception for English, according to The Chicago Manual of Style as quoted by
+ * http://en.wikipedia.org/wiki/Ellipsis.
+ */
+public class EnglishCommaWhitespaceRule extends CommaWhitespaceRule {
+
+  public EnglishCommaWhitespaceRule(final ResourceBundle messages) {
+    super(messages);
+  }
+
+  @Override
+  public final String getId() {
+    return "ENGLISH_COMMA_PARENTHESIS_WHITESPACE";
+  }
+
+  @Override
+  protected int getExceptionSkip(AnalyzedTokenReadings[] tokens, int pos) {
+    // allow spaced end ellipsis, i.e. " . . . .":
+    if (pos + 8 < tokens.length
+            && isDotAt(tokens, pos + 2)
+            && isDotAt(tokens, pos + 4)
+            && isDotAt(tokens, pos + 6)
+            && isDotAt(tokens, pos + 8)) {
+      return 7;
+    }
+    // allow spaced ellipsis, i.e. " . . . ":
+    if (pos + 6 < tokens.length
+            && isDotAt(tokens, pos + 2)
+            && isDotAt(tokens, pos + 4)
+            && isDotAt(tokens, pos + 6)) {
+      return 5;
+    }
+    return 0;
+  }
+
+  private boolean isDotAt(AnalyzedTokenReadings[] tokens, int pos) {
+    final String str = tokens[pos].getToken();
+    return str.length() > 0 && str.charAt(0) == '.';
+  }
+
+}

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java
       2012-09-14 19:20:24 UTC (rev 8038)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/CommaWhitespaceRuleTest.java
       2012-09-14 19:40:30 UTC (rev 8039)
@@ -19,6 +19,7 @@
 package org.languagetool.rules;
 
 import java.io.IOException;
+import java.util.Arrays;
 
 import junit.framework.TestCase;
 
@@ -28,7 +29,8 @@
 
 public class CommaWhitespaceRuleTest extends TestCase {
 
-  private CommaWhitespaceRule rule;
+  protected CommaWhitespaceRule rule;
+
   private JLanguageTool langTool;
   
   @Override
@@ -73,12 +75,14 @@
     assertEquals(6, matches[0].getToPos());
     assertEquals(11, matches[1].getFromPos());
     assertEquals(13, matches[1].getToPos());
+  }
 
+  public void testSpecialCaseForEnglish() throws IOException {
     assertMatches("Ellipsis . . . as suggested by The Chicago Manual of 
Style", 3);
     assertMatches("Ellipsis . . . . as suggested by The Chicago Manual of 
Style", 4);
   }
 
-  private void assertMatches(String text, int expectedMatches) throws 
IOException {
+  protected void assertMatches(String text, int expectedMatches) throws 
IOException {
     assertEquals(expectedMatches, 
rule.match(langTool.getAnalyzedSentence(text)).length);
   }
 

Added: 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java
                             (rev 0)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/en/EnglishCommaWhitespaceRuleTest.java
     2012-09-14 19:40:30 UTC (rev 8039)
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.en;
+
+import org.languagetool.TestTools;
+import org.languagetool.rules.CommaWhitespaceRuleTest;
+
+import java.io.IOException;
+
+public class EnglishCommaWhitespaceRuleTest extends CommaWhitespaceRuleTest {
+
+  @Override
+  public void setUp() throws IOException {
+    super.setUp();
+    rule = new EnglishCommaWhitespaceRule(TestTools.getEnglishMessages());
+  }
+
+  @Override
+  public void testSpecialCaseForEnglish() throws IOException {
+    assertMatches("Ellipsis . . . as suggested by The Chicago Manual of 
Style", 0);
+    assertMatches("Ellipsis . . . as suggested . But this is wrong.", 1);
+    assertMatches("Ellipsis . . . . as suggested by The Chicago Manual of 
Style", 0);
+    assertMatches("Ellipsis . . . . as suggested . But this is wrong.", 1);
+    assertMatches("Ellipsis . . . ", 0);
+    assertMatches("Ellipsis . . . . ", 0);
+    assertMatches("Ellipsis . . .", 0);
+    assertMatches("Ellipsis . . . .", 0);
+  }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Got visibility?
Most devs has no idea what their production app looks like.
Find out how fast your code is with AppDynamics Lite.
http://ad.doubleclick.net/clk;262219671;13503038;y?
http://info.appdynamics.com/FreeJavaPerformanceDownload.html
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to