This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 7cf55eeb OPENNLP-1585 Reduce creation of String objects for prefixes
in several FeatureGenerator classes - extract constants from prefixes in
several FeatureGenerator classes - improves JavaDoc along the path
7cf55eeb is described below
commit 7cf55eeb26e5691b97ebe28a3afab5a1ea4d89b7
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Jul 1 11:18:53 2024 +0200
OPENNLP-1585 Reduce creation of String objects for prefixes in several
FeatureGenerator classes
- extract constants from prefixes in several FeatureGenerator classes
- improves JavaDoc along the path
---
.../main/java/opennlp/tools/ngram/NGramModel.java | 9 +++---
.../AdditionalContextFeatureGenerator.java | 4 ++-
.../featuregen/CharacterNgramFeatureGenerator.java | 6 ++--
.../tools/util/featuregen/GeneratorFactory.java | 2 +-
.../util/featuregen/PrefixFeatureGenerator.java | 4 ++-
.../SentenceFeatureGeneratorFactory.java | 1 -
.../tools/util/featuregen/StringPattern.java | 32 +++++++++++++++++-----
.../util/featuregen/SuffixFeatureGenerator.java | 4 ++-
.../featuregen/TokenClassFeatureGenerator.java | 10 +++----
.../util/featuregen/TokenFeatureGenerator.java | 6 ++--
.../featuregen/TokenPatternFeatureGenerator.java | 12 +++++---
11 files changed, 59 insertions(+), 31 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
index db1beee0..87572d7f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java
@@ -46,16 +46,16 @@ public class NGramModel implements Iterable<StringList> {
private final Map<StringList, Integer> mNGrams = new LinkedHashMap<>();
/**
- * Initializes an empty instance.
+ * Instantiates an empty {@link NGramModel} instance.
*/
public NGramModel() {
}
/**
- * Initializes the current instance.
+ * Instantiates a {@link NGramModel} via an {@link InputStream} reference.
*
* @param in the serialized model stream
- * @throws IOException
+ * @throws IOException Thrown if errors occurred reading from {@code in}.
*/
public NGramModel(InputStream in) throws IOException {
DictionaryEntryPersistor.create(in, entry -> {
@@ -67,8 +67,7 @@ public class NGramModel implements Iterable<StringList> {
countValueString = entry.attributes().getValue(COUNT);
if (countValueString == null) {
- throw new InvalidFormatException(
- "The count attribute must be set!");
+ throw new InvalidFormatException("The count attribute must be set!");
}
count = Integer.parseInt(countValueString);
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
index 24fdf5c9..c35a1c1a 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java
@@ -26,6 +26,8 @@ import java.util.List;
*/
public class AdditionalContextFeatureGenerator implements
AdaptiveFeatureGenerator {
+ private static final String PREFIX = "ne=";
+
private String[][] additionalContext;
@Override
@@ -36,7 +38,7 @@ public class AdditionalContextFeatureGenerator implements
AdaptiveFeatureGenerat
String[] context = additionalContext[index];
for (String s : context) {
- features.add("ne=" + s);
+ features.add(PREFIX + s);
}
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
index 50cb2522..62a0b109 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGenerator.java
@@ -29,6 +29,8 @@ import opennlp.tools.util.StringUtil;
*/
public class CharacterNgramFeatureGenerator implements
AdaptiveFeatureGenerator {
+ private static final String PREFIX = "ng=";
+
private final int minLength;
private final int maxLength;
@@ -45,7 +47,7 @@ public class CharacterNgramFeatureGenerator implements
AdaptiveFeatureGenerator
/**
* Initializes a {@link CharacterNgramFeatureGenerator} with
- * min 2 length and max 5 length of ngrams.
+ * min length of {@code 2} and max length of {@code 5} for ngrams.
*/
public CharacterNgramFeatureGenerator() {
this(2, 5);
@@ -58,7 +60,7 @@ public class CharacterNgramFeatureGenerator implements
AdaptiveFeatureGenerator
for (StringList tokenList : model) {
if (tokenList.size() > 0) {
- features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
+ features.add(PREFIX + StringUtil.toLowerCase(tokenList.getToken(0)));
}
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index efbae0ec..7dccaed6 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -516,7 +516,7 @@ public class GeneratorFactory {
}
/**
- * @return null if the subclass uses {@link #resourceManager} to
instantiate
+ * @return {@code null} if the subclass uses {@link #resourceManager} to
instantiate
* @throws InvalidFormatException
*/
public abstract AdaptiveFeatureGenerator create() throws
InvalidFormatException;
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 2e10195f..6466feef 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -21,6 +21,8 @@ import java.util.List;
public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
+ private static final String PREFIX = "pre=";
+
static final int DEFAULT_MAX_LENGTH = 4;
private final int prefixLength;
@@ -38,7 +40,7 @@ public class PrefixFeatureGenerator implements
AdaptiveFeatureGenerator {
String[] previousOutcomes) {
String[] prefs = getPrefixes(tokens[index]);
for (String pref : prefs) {
- features.add("pre=" + pref);
+ features.add(PREFIX + pref);
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
index 3ffefaa6..d452fe7e 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
@@ -31,7 +31,6 @@ public class SentenceFeatureGeneratorFactory
@Override
public AdaptiveFeatureGenerator create() throws InvalidFormatException {
- String beginFeatureString = generatorElement.getAttribute("begin");
return new SentenceFeatureGenerator(getBool("begin", true), getBool("end",
true));
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java
index af4cb96b..500b5a24 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java
@@ -141,49 +141,49 @@ public class StringPattern {
}
/**
- * @return true if all characters are letters.
+ * @return {@code true} if all characters are letters.
*/
public boolean isAllLetter() {
return (pattern & ALL_LETTERS) > 0;
}
/**
- * @return true if first letter is capital.
+ * @return {@code true} if first letter is capital.
*/
public boolean isInitialCapitalLetter() {
return (pattern & INITAL_CAPITAL_LETTER) > 0;
}
/**
- * @return true if all letters are capital.
+ * @return {@code true} if all letters are capital.
*/
public boolean isAllCapitalLetter() {
return (pattern & ALL_CAPITAL_LETTER) > 0;
}
/**
- * @return true if all letters are lower case.
+ * @return {@code true} if all letters are lower case.
*/
public boolean isAllLowerCaseLetter() {
return (pattern & ALL_LOWERCASE_LETTER) > 0;
}
/**
- * @return true if all chars are digits.
+ * @return {@code true} if all chars are digits.
*/
public boolean isAllDigit() {
return (pattern & ALL_DIGIT) > 0;
}
/**
- * @return true if all chars are hiragana.
+ * @return {@code true} if all chars are hiragana.
*/
public boolean isAllHiragana() {
return (pattern & ALL_HIRAGANA) > 0;
}
/**
- * @return true if all chars are katakana.
+ * @return {@code true} if all chars are katakana.
*/
public boolean isAllKatakana() {
return (pattern & ALL_KATAKANA) > 0;
@@ -196,26 +196,44 @@ public class StringPattern {
return digits;
}
+ /**
+ * @return {@code true} if a period is contained.
+ */
public boolean containsPeriod() {
return (pattern & CONTAINS_PERIOD) > 0;
}
+ /**
+ * @return {@code true} if a comma is contained.
+ */
public boolean containsComma() {
return (pattern & CONTAINS_COMMA) > 0;
}
+ /**
+ * @return {@code true} if a slash is contained.
+ */
public boolean containsSlash() {
return (pattern & CONTAINS_SLASH) > 0;
}
+ /**
+ * @return {@code true} if a digit is contained.
+ */
public boolean containsDigit() {
return (pattern & CONTAINS_DIGIT) > 0;
}
+ /**
+ * @return {@code true} if a hypen is contained.
+ */
public boolean containsHyphen() {
return (pattern & CONTAINS_HYPHEN) > 0;
}
+ /**
+ * @return {@code true} if a letters are contained.
+ */
public boolean containsLetters() {
return (pattern & CONTAINS_LETTERS) > 0;
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
index f1a18d83..e890cf08 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
@@ -21,6 +21,8 @@ import java.util.List;
public class SuffixFeatureGenerator implements AdaptiveFeatureGenerator {
+ private static final String PREFIX = "suf=";
+
static final int DEFAULT_MAX_LENGTH = 4;
private final int suffixLength;
@@ -38,7 +40,7 @@ public class SuffixFeatureGenerator implements
AdaptiveFeatureGenerator {
String[] previousOutcomes) {
String[] suffs = getSuffixes(tokens[index]);
for (String suff : suffs) {
- features.add("suf=" + suff);
+ features.add(PREFIX + suff);
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
index 56546c37..07a8d40f 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGenerator.java
@@ -23,12 +23,12 @@ import opennlp.tools.util.StringUtil;
/**
- * Generates features for different for the class of the token.
+ * Generates features for the class of the token.
*/
public class TokenClassFeatureGenerator implements AdaptiveFeatureGenerator {
- private static final String TOKEN_CLASS_PREFIX = "wc";
- private static final String TOKEN_AND_CLASS_PREFIX = "w&c";
+ private static final String TOKEN_CLASS_PREFIX = "wc=";
+ private static final String TOKEN_AND_CLASS_PREFIX = "w&c=";
private final boolean generateWordAndClassFeature;
@@ -43,10 +43,10 @@ public class TokenClassFeatureGenerator implements
AdaptiveFeatureGenerator {
@Override
public void createFeatures(List<String> features, String[] tokens, int
index, String[] preds) {
String wordClass = FeatureGeneratorUtil.tokenFeature(tokens[index]);
- features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);
+ features.add(TOKEN_CLASS_PREFIX + wordClass);
if (generateWordAndClassFeature) {
- features.add(TOKEN_AND_CLASS_PREFIX + "=" +
StringUtil.toLowerCase(tokens[index]) +
+ features.add(TOKEN_AND_CLASS_PREFIX +
StringUtil.toLowerCase(tokens[index]) +
"," + wordClass);
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
index ced4a55c..53119e06 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGenerator.java
@@ -27,7 +27,7 @@ import opennlp.tools.util.StringUtil;
*/
public class TokenFeatureGenerator implements AdaptiveFeatureGenerator {
- private static final String WORD_PREFIX = "w";
+ private static final String WORD_PREFIX = "w=";
private final boolean lowercase;
/**
@@ -49,10 +49,10 @@ public class TokenFeatureGenerator implements
AdaptiveFeatureGenerator {
@Override
public void createFeatures(List<String> features, String[] tokens, int
index, String[] preds) {
if (lowercase) {
- features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]));
+ features.add(WORD_PREFIX + StringUtil.toLowerCase(tokens[index]));
}
else {
- features.add(WORD_PREFIX + "=" + tokens[index]);
+ features.add(WORD_PREFIX + tokens[index]);
}
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
index 99adfc98..512230a5 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java
@@ -31,6 +31,10 @@ import opennlp.tools.util.StringUtil;
*/
public class TokenPatternFeatureGenerator implements AdaptiveFeatureGenerator {
+ private static final String SUB_TOKEN_PREFIX = "st=" ;
+ private static final String SUB_TOKEN_PART2_PREFIX = "pt2=" ;
+ private static final String SUB_TOKEN_PART3_PREFIX = "pt3=" ;
+
private final Pattern noLetters = Pattern.compile("[^a-zA-Z]");
private final Tokenizer tokenizer;
@@ -57,7 +61,7 @@ public class TokenPatternFeatureGenerator implements
AdaptiveFeatureGenerator {
String[] tokenized = tokenizer.tokenize(toks[index]);
if (tokenized.length == 1) {
- feats.add("st=" + StringUtil.toLowerCase(toks[index]));
+ feats.add(SUB_TOKEN_PREFIX + StringUtil.toLowerCase(toks[index]));
return;
}
@@ -68,12 +72,12 @@ public class TokenPatternFeatureGenerator implements
AdaptiveFeatureGenerator {
for (int i = 0; i < tokenized.length; i++) {
if (i < tokenized.length - 1) {
- feats.add("pt2=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) +
+ feats.add(SUB_TOKEN_PART2_PREFIX +
FeatureGeneratorUtil.tokenFeature(tokenized[i]) +
FeatureGeneratorUtil.tokenFeature(tokenized[i + 1]));
}
if (i < tokenized.length - 2) {
- feats.add("pt3=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) +
+ feats.add(SUB_TOKEN_PART3_PREFIX +
FeatureGeneratorUtil.tokenFeature(tokenized[i]) +
FeatureGeneratorUtil.tokenFeature(tokenized[i + 1]) +
FeatureGeneratorUtil.tokenFeature(tokenized[i + 2]));
}
@@ -81,7 +85,7 @@ public class TokenPatternFeatureGenerator implements
AdaptiveFeatureGenerator {
pattern.append(FeatureGeneratorUtil.tokenFeature(tokenized[i]));
if (!noLetters.matcher(tokenized[i]).find()) {
- feats.add("st=" + StringUtil.toLowerCase(tokenized[i]));
+ feats.add(SUB_TOKEN_PREFIX + StringUtil.toLowerCase(tokenized[i]));
}
}