This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new cb4a685d OPENNLP-1759: Optimize computation of hashCode in StringList
(#814)
cb4a685d is described below
commit cb4a685d2a756f6d2a4842d6071c63c3cf3b4d39
Author: Martin Wiesner <[email protected]>
AuthorDate: Thu Jul 10 19:38:35 2025 +0200
OPENNLP-1759: Optimize computation of hashCode in StringList (#814)
---
.../main/java/opennlp/tools/util/StringList.java | 32 ++++++++--------------
.../uima/dictionary/DictionaryResourceTest.java | 16 +++++------
2 files changed, 18 insertions(+), 30 deletions(-)
diff --git
a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/StringList.java
b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/StringList.java
index 004292bd..a9a438b1 100644
---
a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/StringList.java
+++
b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/util/StringList.java
@@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Objects;
+import java.util.stream.Collectors;
import opennlp.tools.util.jvm.StringInterners;
@@ -30,9 +31,11 @@ import opennlp.tools.util.jvm.StringInterners;
public class StringList implements Iterable<String> {
private final String[] tokens;
-
private final boolean caseSensitive;
+ // It is safe to use caching of the hashCode for this class
+ private transient Integer hashCode = null; // initial value is uncomputed
+
/**
* Initializes a {@link StringList} instance. By default, this instance is
case-sensitive.
* <p>
@@ -53,7 +56,7 @@ public class StringList implements Iterable<String> {
*
* @param tokens The string parts of the new {@link StringList}.
* Must not be an empty tokens array or {@code null}.
- *
+ *
* @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public StringList(String... tokens) {
@@ -73,7 +76,6 @@ public class StringList implements Iterable<String> {
* @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public StringList(boolean isCaseSensitive, String... tokens) {
-
Objects.requireNonNull(tokens, "tokens must not be null");
if (tokens.length == 0) {
@@ -81,7 +83,6 @@ public class StringList implements Iterable<String> {
}
this.tokens = new String[tokens.length];
-
for (int i = 0; i < tokens.length; i++) {
this.tokens[i] = StringInterners.intern(tokens[i]);
}
@@ -161,8 +162,11 @@ public class StringList implements Iterable<String> {
@Override
public int hashCode() {
- // if lookup is too slow optimize this
- return StringUtil.toLowerCase(toString()).hashCode();
+ if (hashCode == null) {
+ // compute once and cache to safe CPU cycles during use
+ this.hashCode = StringUtil.toLowerCase(String.join(",",
tokens)).hashCode();
+ }
+ return hashCode;
}
@Override
@@ -184,21 +188,7 @@ public class StringList implements Iterable<String> {
*/
@Override
public String toString() {
- StringBuilder string = new StringBuilder();
-
- string.append('[');
-
- for (int i = 0; i < size(); i++) {
- string.append(getToken(i));
-
- if (i < size() - 1) {
- string.append(',');
- }
- }
-
- string.append(']');
-
- return string.toString();
+ return Arrays.stream(tokens).collect(Collectors.joining(",", "[", "]"));
}
/**
diff --git
a/opennlp-extensions/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
b/opennlp-extensions/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
index 9f1804b0..a5d6956c 100644
---
a/opennlp-extensions/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
+++
b/opennlp-extensions/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
@@ -38,6 +38,7 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.StringList;
import opennlp.uima.AbstractTest;
import opennlp.uima.util.CasUtil;
@@ -69,16 +70,13 @@ public class DictionaryResourceTest extends AbstractTest {
public void testDictionaryWasLoaded() {
try {
- DictionaryResource dic = (DictionaryResource) AE.getResourceManager()
+ final DictionaryResource dic = (DictionaryResource)
AE.getResourceManager()
.getResource("/opennlp.uima.Dictionary");
- // simple check if ordering always is the same...
- Assertions.assertEquals(
- "[[Berlin], [Stockholm], [New,York], [London], [Copenhagen],
[Paris]]",
- dic.getDictionary().toString());
- // else we can do a simple test like this
- Assertions.assertEquals(6,
- dic.getDictionary().asStringSet().size(), "There should be six
entries in the dictionary");
- Assertions.assertTrue(dic.getDictionary().contains(new
StringList("London")),
+ final Dictionary d = dic.getDictionary();
+ Assertions.assertNotNull(d);
+ Assertions.assertEquals(6, d.asStringSet().size(),
+ "There should be six entries in the dictionary");
+ Assertions.assertTrue(d.contains(new StringList("London")),
"London should be in the dictionary");
} catch (Exception e) {
Assertions.fail("Dictionary was not loaded.");