Author: joern
Date: Fri Jan 3 14:08:22 2014
New Revision: 1555098
URL: http://svn.apache.org/r1555098
Log:
OPENNLP-72 Replaced all String.toLowerCase invocations with
StringUtil.toLowerCase
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/SimpleLemmatizer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
Fri Jan 3 14:08:22 2014
@@ -35,6 +35,7 @@ import opennlp.tools.dictionary.serializ
import opennlp.tools.dictionary.serializer.EntryInserter;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.StringUtil;
/**
* This class is a dictionary.
@@ -81,7 +82,7 @@ public class Dictionary implements Itera
@Override
public int hashCode() {
// if lookup is too slow optimize this
- return this.stringList.toString().toLowerCase().hashCode();
+ return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
}
@Override
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/PortugueseContractionUtility.java
Fri Jan 3 14:08:22 2014
@@ -21,6 +21,8 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
+import opennlp.tools.util.StringUtil;
+
/**
* Utility class to handle Portuguese contractions.
* <p>
@@ -190,7 +192,7 @@ public class PortugueseContractionUtilit
}
- String leftLower = parts[parts.length - 1].toLowerCase();
+ String leftLower = StringUtil.toLowerCase(parts[parts.length - 1]);
key = leftLower + "+" + right;
if (CONTRACTIONS.containsKey(key)) {
String r = CONTRACTIONS.get(key);
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java
Fri Jan 3 14:08:22 2014
@@ -34,6 +34,7 @@ import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.StringUtil;
public class Muc6NameSampleStreamFactory extends
AbstractSampleStreamFactory<NameSample> {
@@ -57,7 +58,7 @@ public class Muc6NameSampleStreamFactory
new DirectorySampleStream(params.getData(), new FileFilter() {
public boolean accept(File file) {
- return file.getName().toLowerCase().endsWith(".sgm");
+ return StringUtil.toLowerCase(file.getName()).endsWith(".sgm");
}
}, false), Charset.forName("UTF-8"));
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java
Fri Jan 3 14:08:22 2014
@@ -32,6 +32,7 @@ import opennlp.tools.tokenize.Whitespace
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
+import opennlp.tools.util.StringUtil;
/**
* Name Sample Stream parser for the OntoNotes 4.0 corpus.
@@ -129,8 +130,7 @@ public class OntoNotesNameSampleStream e
int typeEnd = token.indexOf("\"", typeBegin.length());
- entityType = token.substring(typeBegin.length(), typeEnd)
- .toLowerCase();
+ entityType =
StringUtil.toLowerCase(token.substring(typeBegin.length(), typeEnd));
}
if (token.contains(">")) {
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/SimpleLemmatizer.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/SimpleLemmatizer.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/SimpleLemmatizer.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/SimpleLemmatizer.java
Fri Jan 3 14:08:22 2014
@@ -28,6 +28,8 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import opennlp.tools.util.StringUtil;
+
public class SimpleLemmatizer implements DictionaryLemmatizer {
public final Set<String> constantTags = new
HashSet<String>(Arrays.asList("NNP","NP00000"));
@@ -56,7 +58,7 @@ public class SimpleLemmatizer implements
keys.addAll(Arrays.asList(word,postag));
}
else {
- keys.addAll(Arrays.asList(word.toLowerCase(),postag));
+ keys.addAll(Arrays.asList(StringUtil.toLowerCase(word),postag));
}
return keys;
}
@@ -76,7 +78,7 @@ public class SimpleLemmatizer implements
lemma = word;
}
else {
- lemma = word.toLowerCase();
+ lemma = StringUtil.toLowerCase(word);
}
return lemma;
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
Fri Jan 3 14:08:22 2014
@@ -33,6 +33,7 @@ import opennlp.tools.dictionary.serializ
import opennlp.tools.dictionary.serializer.EntryInserter;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.StringUtil;
/**
* The {@link NGramModel} can be used to crate ngrams and character ngrams.
@@ -182,8 +183,8 @@ public class NGramModel implements Itera
for (int textIndex = 0;
textIndex + lengthIndex - 1 < chars.length(); textIndex++) {
- String gram =
- chars.substring(textIndex, textIndex + lengthIndex).toLowerCase();
+ String gram = StringUtil.toLowerCase(
+ chars.substring(textIndex, textIndex + lengthIndex));
add(new StringList(new String[]{gram}));
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java
Fri Jan 3 14:08:22 2014
@@ -153,7 +153,7 @@ public class POSDictionary implements It
return dictionary.get(word);
}
else {
- return dictionary.get(word.toLowerCase());
+ return dictionary.get(StringUtil.toLowerCase(word));
}
}
@@ -325,7 +325,7 @@ public class POSDictionary implements It
if (this.caseSensitive) {
return dictionary.put(word, tags);
} else {
- return dictionary.put(word.toLowerCase(), tags);
+ return dictionary.put(StringUtil.toLowerCase(word), tags);
}
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java
Fri Jan 3 14:08:22 2014
@@ -39,6 +39,7 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.StringUtil;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.StringPattern;
import opennlp.tools.util.model.ModelType;
@@ -415,7 +416,7 @@ public class POSTaggerME implements POST
if (dict.isCaseSensitive()) {
word = words[i];
} else {
- word = words[i].toLowerCase();
+ word = StringUtil.toLowerCase(words[i]);
}
if (!newEntries.containsKey(word)) {
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
Fri Jan 3 14:08:22 2014
@@ -21,6 +21,7 @@ import java.util.List;
import opennlp.tools.ngram.NGramModel;
import opennlp.tools.util.StringList;
+import opennlp.tools.util.StringUtil;
/**
* The {@link CharacterNgramFeatureGenerator} uses character ngrams to
@@ -52,7 +53,7 @@ public class CharacterNgramFeatureGenera
for (StringList tokenList : model) {
if (tokenList.size() > 0) {
- features.add("ng=" + tokenList.getToken(0).toLowerCase());
+ features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
}
}
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
Fri Jan 3 14:08:22 2014
@@ -20,6 +20,8 @@ package opennlp.tools.util.featuregen;
import java.util.List;
import java.util.regex.Pattern;
+import opennlp.tools.util.StringUtil;
+
/**
@@ -111,7 +113,7 @@ public class FastTokenClassFeatureGenera
features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);
if (generateWordAndClassFeature) {
- features.add(TOKEN_AND_CLASS_PREFIX + "=" +
tokens[index].toLowerCase()+","+wordClass);
+ features.add(TOKEN_AND_CLASS_PREFIX + "=" +
StringUtil.toLowerCase(tokens[index]) +","+wordClass);
}
}
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
Fri Jan 3 14:08:22 2014
@@ -19,6 +19,8 @@ package opennlp.tools.util.featuregen;
import java.util.List;
+import opennlp.tools.util.StringUtil;
+
/**
* Generates features for different for the class of the token.
@@ -43,7 +45,7 @@ public class TokenClassFeatureGenerator
features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);
if (generateWordAndClassFeature) {
- features.add(TOKEN_AND_CLASS_PREFIX + "=" +
tokens[index].toLowerCase()+","+wordClass);
+ features.add(TOKEN_AND_CLASS_PREFIX + "=" +
StringUtil.toLowerCase(tokens[index]) + "," + wordClass);
}
}
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
Fri Jan 3 14:08:22 2014
@@ -20,6 +20,8 @@ package opennlp.tools.util.featuregen;
import java.util.List;
+import opennlp.tools.util.StringUtil;
+
/**
* Generates a feature which contains the token itself.
*/
@@ -38,7 +40,7 @@ public class TokenFeatureGenerator exten
public void createFeatures(List<String> features, String[] tokens, int
index, String[] preds) {
if (lowercase) {
- features.add(WORD_PREFIX + "=" + tokens[index].toLowerCase());
+ features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]));
}
else {
features.add(WORD_PREFIX + "=" + tokens[index]);
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java?rev=1555098&r1=1555097&r2=1555098&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
Fri Jan 3 14:08:22 2014
@@ -23,6 +23,7 @@ import java.util.regex.Pattern;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.StringUtil;
/**
* Partitions tokens into sub-tokens based on character classes and generates
@@ -55,7 +56,7 @@ public class TokenPatternFeatureGenerato
String[] tokenized = tokenizer.tokenize(toks[index]);
if (tokenized.length == 1) {
- feats.add("st=" + toks[index].toLowerCase());
+ feats.add("st=" + StringUtil.toLowerCase(toks[index]));
return;
}
@@ -79,7 +80,7 @@ public class TokenPatternFeatureGenerato
pattern.append(FeatureGeneratorUtil.tokenFeature(tokenized[i]));
if (!noLetters.matcher(tokenized[i]).find()) {
- feats.add("st=" + tokenized[i].toLowerCase());
+ feats.add("st=" + StringUtil.toLowerCase(tokenized[i]));
}
}