Revision: 7791 http://languagetool.svn.sourceforge.net/languagetool/?rev=7791&view=rev Author: dnaber Date: 2012-08-04 21:59:09 +0000 (Sat, 04 Aug 2012) Log Message: ----------- helper class to extract suggestions from rule messages - forgot to commit with its test case - not yet used
Added Paths: ----------- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java Added: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java (rev 0) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SuggestionExtractor.java 2012-08-04 21:59:09 UTC (rev 7791) @@ -0,0 +1,94 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.spelling; + +import org.languagetool.Language; +import org.languagetool.rules.Rule; +import org.languagetool.rules.patterns.PatternRule; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Extract tokens from suggestions. + */ +class SuggestionExtractor { + + private final static Pattern SUGGESTION_PATTERN = Pattern.compile("<suggestion.*?>(.*?)</suggestion>"); + private final static Pattern BACK_REFERENCE_PATTERN = Pattern.compile("\\\\" + "\\d+"); + private final Language language; + + public SuggestionExtractor(Language language) { + this.language = language; + } + + /** + * Get the tokens of simple suggestions, i.e. those that don't use back references. + */ + List<String> getSuggestionTokens(Rule rule) { + final List<String> wordsToBeIgnored = new ArrayList<String>(); + if (rule instanceof PatternRule) { + final PatternRule patternRule = (PatternRule) rule; + final String message = patternRule.getMessage(); + final List<String> suggestions = getSimpleSuggestions(message); + final List<String> tokens = getSuggestionTokens(suggestions); + wordsToBeIgnored.addAll(tokens); + } + return wordsToBeIgnored; + } + + /** + * Get suggestions that don't use back references or regular + * @param message + * @return + */ + List<String> getSimpleSuggestions(String message) { + final Matcher matcher = SUGGESTION_PATTERN.matcher(message); + int startPos = 0; + final List<String> suggestions = new ArrayList<String>(); + while (matcher.find(startPos)) { + final String suggestion = matcher.group(1); + startPos = matcher.end(); + if (isSimpleSuggestion(suggestion)) { + suggestions.add(suggestion); + } + } + return suggestions; + } + + private boolean isSimpleSuggestion(String suggestion) { + if (suggestion.contains("<match")) { + return false; + } + final Matcher matcher = BACK_REFERENCE_PATTERN.matcher(suggestion); + return !matcher.find(); + } + + private List<String> getSuggestionTokens(List<String> suggestions) { + final List<String> tokens = new ArrayList<String>(); + for (String suggestion : suggestions) { + final List<String> suggestionTokens = language.getWordTokenizer().tokenize(suggestion); + tokens.addAll(suggestionTokens); + } + return tokens; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs