This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1684-Reduce-creation-of-String-instances-in-BrownBigramFeatureGenerator in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 2672b0daaab555e167c63ddd5b08518f1c4b5661 Author: Martin Wiesner <[email protected]> AuthorDate: Tue Dec 24 15:03:21 2024 +0100 OPENNLP-1684 Reduce creation of String instances in BrownBigramFeatureGenerator --- .../util/featuregen/BrownBigramFeatureGenerator.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java index f8bf0c8b..b16606ce 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java @@ -24,6 +24,10 @@ import java.util.List; */ public class BrownBigramFeatureGenerator implements AdaptiveFeatureGenerator { + private static final String BROWNCLUSTER = "browncluster"; + private static final String FEATURE_NEXT_BROWNCLUSTER_BASE = BROWNCLUSTER + ",n" + BROWNCLUSTER + "="; + private static final String FEATURE_PREV_BROWNCLUSTER_BASE = "p" + BROWNCLUSTER + "," + BROWNCLUSTER + "="; + private final BrownCluster brownCluster; /** @@ -38,20 +42,18 @@ public class BrownBigramFeatureGenerator implements AdaptiveFeatureGenerator { public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) { - List<String> wordClasses = BrownTokenClasses.getWordClasses(tokens[index], brownCluster); + List<String> wc = BrownTokenClasses.getWordClasses(tokens[index], brownCluster); if (index > 0) { - List<String> prevWordClasses = BrownTokenClasses.getWordClasses(tokens[index - 1], brownCluster); - for (int i = 0; i < wordClasses.size() && i < prevWordClasses.size(); i++) { - features.add("p" + "browncluster" + "," + "browncluster" + "=" - + prevWordClasses.get(i) + "," + wordClasses.get(i)); + List<String> prevWC = BrownTokenClasses.getWordClasses(tokens[index - 1], brownCluster); + for (int i = 0; i < wc.size() && i < prevWC.size(); i++) { + features.add(FEATURE_PREV_BROWNCLUSTER_BASE + prevWC.get(i) + "," + wc.get(i)); } } if (index + 1 < tokens.length) { List<String> nextWordClasses = BrownTokenClasses.getWordClasses(tokens[index + 1], brownCluster); - for (int i = 0; i < wordClasses.size() && i < nextWordClasses.size(); i++) { - features.add("browncluster" + "," + "n" + "browncluster" + "=" - + wordClasses.get(i) + "," + nextWordClasses.get(i)); + for (int i = 0; i < wc.size() && i < nextWordClasses.size(); i++) { + features.add(FEATURE_NEXT_BROWNCLUSTER_BASE + wc.get(i) + "," + nextWordClasses.get(i)); } } }
