Revision: 7791
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7791&view=rev
Author:   dnaber
Date:     2012-08-04 21:59:09 +0000 (Sat, 04 Aug 2012)
Log Message:
-----------
helper class to extract suggestions from rule messages -  forgot to commit with 
its test case - not yet used

Added Paths:
-----------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java
                               (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java
       2012-08-04 21:59:09 UTC (rev 7791)
@@ -0,0 +1,94 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.spelling;
+
+import org.languagetool.Language;
+import org.languagetool.rules.Rule;
+import org.languagetool.rules.patterns.PatternRule;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Extract tokens from suggestions.
+ */
+class SuggestionExtractor {
+
+  private final static Pattern SUGGESTION_PATTERN = 
Pattern.compile("<suggestion.*?>(.*?)</suggestion>");
+  private final static Pattern BACK_REFERENCE_PATTERN = Pattern.compile("\\\\" 
+ "\\d+");
+  private final Language language;
+
+  public SuggestionExtractor(Language language) {
+    this.language = language;
+  }
+
+  /**
+   * Get the tokens of simple suggestions, i.e. those that don't use back 
references.
+   */
+  List<String> getSuggestionTokens(Rule rule) {
+    final List<String> wordsToBeIgnored = new ArrayList<String>();
+    if (rule instanceof PatternRule) {
+      final PatternRule patternRule = (PatternRule) rule;
+      final String message = patternRule.getMessage();
+      final List<String> suggestions = getSimpleSuggestions(message);
+      final List<String> tokens = getSuggestionTokens(suggestions);
+      wordsToBeIgnored.addAll(tokens);
+    }
+    return wordsToBeIgnored;
+  }
+
+  /**
+   * Get suggestions that don't use back references or regular
+   * @param message
+   * @return
+   */
+  List<String> getSimpleSuggestions(String message) {
+    final Matcher matcher = SUGGESTION_PATTERN.matcher(message);
+    int startPos = 0;
+    final List<String> suggestions = new ArrayList<String>();
+    while (matcher.find(startPos)) {
+      final String suggestion = matcher.group(1);
+      startPos = matcher.end();
+      if (isSimpleSuggestion(suggestion)) {
+        suggestions.add(suggestion);
+      }
+    }
+    return suggestions;
+  }
+
+  private boolean isSimpleSuggestion(String suggestion) {
+    if (suggestion.contains("<match")) {
+      return false;
+    }
+    final Matcher matcher = BACK_REFERENCE_PATTERN.matcher(suggestion);
+    return !matcher.find();
+  }
+
+  private List<String> getSuggestionTokens(List<String> suggestions) {
+    final List<String> tokens = new ArrayList<String>();
+    for (String suggestion : suggestions) {
+      final List<String> suggestionTokens = 
language.getWordTokenizer().tokenize(suggestion);
+      tokens.addAll(suggestionTokens);
+    }
+    return tokens;
+  }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to