This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 9fa715e9 OPENNLP-421 - Remove StringListWrapper
9fa715e9 is described below
commit 9fa715e9ea272c6bd610e950e21c45fcd942e6db
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Dec 19 08:47:26 2023 +0100
OPENNLP-421 - Remove StringListWrapper
---
.../java/opennlp/tools/dictionary/Dictionary.java | 89 +++++++---------------
.../tools/namefind/DictionaryNameFinder.java | 9 ++-
.../main/java/opennlp/tools/util/StringList.java | 83 +++++++++++++++-----
3 files changed, 96 insertions(+), 85 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index f007f6b4..a9a5de7d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -32,7 +32,6 @@ import opennlp.tools.dictionary.serializer.Attributes;
import opennlp.tools.dictionary.serializer.DictionaryEntryPersistor;
import opennlp.tools.dictionary.serializer.Entry;
import opennlp.tools.util.StringList;
-import opennlp.tools.util.StringUtil;
import opennlp.tools.util.model.DictionarySerializer;
import opennlp.tools.util.model.SerializableArtifact;
@@ -43,56 +42,7 @@ import opennlp.tools.util.model.SerializableArtifact;
* @see Iterable
*/
public class Dictionary implements Iterable<StringList>, SerializableArtifact {
-
- private class StringListWrapper {
-
- private final StringList stringList;
-
- private StringListWrapper(StringList stringList) {
- this.stringList = stringList;
- }
-
- private StringList getStringList() {
- return stringList;
- }
-
- @Override
- public boolean equals(Object obj) {
-
- boolean result;
-
- if (obj == this) {
- result = true;
- }
- else if (obj instanceof StringListWrapper other) {
-
- if (isCaseSensitive) {
- result = this.stringList.equals(other.getStringList());
- }
- else {
- result = this.stringList.compareToIgnoreCase(other.getStringList());
- }
- }
- else {
- result = false;
- }
-
- return result;
- }
-
- @Override
- public int hashCode() {
- // if lookup is too slow optimize this
- return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
- }
-
- @Override
- public String toString() {
- return this.stringList.toString();
- }
- }
-
- private final Set<StringListWrapper> entrySet = new HashSet<>();
+ private final Set<StringList> entrySet = new HashSet<>();
private final boolean isCaseSensitive;
private int minTokenCount = 99999;
private int maxTokenCount = 0;
@@ -131,7 +81,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
* @param tokens the new entry
*/
public void put(StringList tokens) {
- entrySet.add(new StringListWrapper(tokens));
+ entrySet.add(applyCaseSensitivity(tokens));
minTokenCount = StrictMath.min(minTokenCount, tokens.size());
maxTokenCount = StrictMath.max(maxTokenCount, tokens.size());
}
@@ -151,7 +101,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
* @return {@code true} if it contains the entry, {@code false} otherwise.
*/
public boolean contains(StringList tokens) {
- return entrySet.contains(new StringListWrapper(tokens));
+ return entrySet.contains(applyCaseSensitivity(tokens));
}
/**
@@ -160,7 +110,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
* @param tokens The tokens to be filtered out (= removed).
*/
public void remove(StringList tokens) {
- entrySet.remove(new StringListWrapper(tokens));
+ entrySet.remove(applyCaseSensitivity(tokens));
}
/**
@@ -168,7 +118,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
*/
@Override
public Iterator<StringList> iterator() {
- final Iterator<StringListWrapper> entries = entrySet.iterator();
+ final Iterator<StringList> entries = entrySet.iterator();
return new Iterator<>() {
@@ -179,7 +129,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
@Override
public StringList next() {
- return entries.next().getStringList();
+ return entries.next();
}
@Override
@@ -308,7 +258,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
@Override
public Iterator<String> iterator() {
- final Iterator<StringListWrapper> entries = entrySet.iterator();
+ final Iterator<StringList> entries = entrySet.iterator();
return new Iterator<>() {
@Override
@@ -317,7 +267,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
}
@Override
public String next() {
- return entries.next().getStringList().getToken(0);
+ return entries.next().getToken(0);
}
@Override
public void remove() {
@@ -337,7 +287,7 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
if (obj instanceof String str) {
- result = entrySet.contains(new StringListWrapper(new
StringList(str)));
+ result = entrySet.contains(new StringList(isCaseSensitive, str));
}
return result;
@@ -353,13 +303,13 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
return false;
}
Iterator<String> toCheckIter = toCheck.iterator();
- for (StringListWrapper entry : entrySet) {
+ for (StringList entry : entrySet) {
if (isCaseSensitive) {
- if (!entry.stringList.equals(new StringList(toCheckIter.next()))) {
+ if (!entry.equals(new StringList(true, toCheckIter.next()))) {
return false;
}
} else {
- if (!entry.stringList.compareToIgnoreCase(new
StringList(toCheckIter.next()))) {
+ if (!entry.compareToIgnoreCase(new StringList(false,
toCheckIter.next()))) {
return false;
}
}
@@ -383,4 +333,19 @@ public class Dictionary implements Iterable<StringList>,
SerializableArtifact {
public Class<?> getArtifactSerializerClass() {
return DictionarySerializer.class;
}
+
+ /**
+ * @return {@code true}, if this {@link Dictionary} is case-sensitive.
+ */
+ public boolean isCaseSensitive() {
+ return isCaseSensitive;
+ }
+
+ private StringList applyCaseSensitivity(StringList list) {
+ if (isCaseSensitive) {
+ return list.toCaseSensitive();
+ } else {
+ return list.toCaseInsensitive();
+ }
+ }
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
index 4f050e5f..5831c7b5 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
@@ -44,7 +44,7 @@ public class DictionaryNameFinder implements TokenNameFinder {
* @param type the name type used for the produced spans. Must not be {@code
null}.
*/
public DictionaryNameFinder(Dictionary dictionary, String type) {
- mDictionary = Objects.requireNonNull(dictionary, "dictionary must not be
null");
+ this.mDictionary = Objects.requireNonNull(dictionary, "dictionary must not
be null");
this.type = Objects.requireNonNull(type, "type must not be null");
}
@@ -61,7 +61,8 @@ public class DictionaryNameFinder implements TokenNameFinder {
@Override
public Span[] find(String[] textTokenized) {
List<Span> namesFound = new LinkedList<>();
-
+ final boolean caseSensitive = mDictionary.isCaseSensitive();
+ final int maxTokenCount = mDictionary.getMaxTokenCount();
for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) {
Span nameFound = null;
String[] tokensSearching;
@@ -69,14 +70,14 @@ public class DictionaryNameFinder implements
TokenNameFinder {
for (int offsetTo = offsetFrom; offsetTo < textTokenized.length;
offsetTo++) {
int lengthSearching = offsetTo - offsetFrom + 1;
- if (lengthSearching > mDictionary.getMaxTokenCount()) {
+ if (lengthSearching > maxTokenCount) {
break;
} else {
tokensSearching = new String[lengthSearching];
System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0,
lengthSearching);
- StringList entryForSearch = new StringList(tokensSearching);
+ StringList entryForSearch = new StringList(caseSensitive,
tokensSearching);
if (mDictionary.contains(entryForSearch)) {
nameFound = new Span(offsetFrom, offsetTo + 1, type);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
index 3acad9f7..004292bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
@@ -31,8 +31,10 @@ public class StringList implements Iterable<String> {
private final String[] tokens;
+ private final boolean caseSensitive;
+
/**
- * Initializes a {@link StringList} instance.
+ * Initializes a {@link StringList} instance. By default, this instance is
case-sensitive.
* <p>
* Note: <br>
* Token String will be interned via {@link StringInterners}.
@@ -40,11 +42,11 @@ public class StringList implements Iterable<String> {
* @param singleToken One single token
*/
public StringList(String singleToken) {
- tokens = new String[]{StringInterners.intern(singleToken)};
+ this(true, singleToken);
}
/**
- * Initializes a {@link StringList} instance.
+ * Initializes a {@link StringList} instance. By default, this instance is
case-sensitive.
* <p>
* Note: <br>
* Token Strings will be interned via {@link StringInterners}.
@@ -55,6 +57,22 @@ public class StringList implements Iterable<String> {
* @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public StringList(String... tokens) {
+ this(true, tokens);
+ }
+
+ /**
+ * Initializes a {@link StringList} instance.
+ * <p>
+ * Note: <br>
+ * Token Strings will be interned via {@link StringInterners}.
+ *
+ * @param isCaseSensitive Whether it will operate case-sensitive, or not.
+ * @param tokens The string parts of the new {@link StringList}.
+ * Must not be an empty tokens array or {@code null}.
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
+ */
+ public StringList(boolean isCaseSensitive, String... tokens) {
Objects.requireNonNull(tokens, "tokens must not be null");
@@ -67,6 +85,8 @@ public class StringList implements Iterable<String> {
for (int i = 0; i < tokens.length; i++) {
this.tokens[i] = StringInterners.intern(tokens[i]);
}
+
+ this.caseSensitive = isCaseSensitive;
}
/**
@@ -127,44 +147,40 @@ public class StringList implements Iterable<String> {
* @return {@code true} if identically with ignore the case, {@code false}
otherwise.
*/
public boolean compareToIgnoreCase(StringList tokens) {
-
if (size() == tokens.size()) {
for (int i = 0; i < size(); i++) {
-
- if (getToken(i).compareToIgnoreCase(
- tokens.getToken(i)) != 0) {
+ if (getToken(i).compareToIgnoreCase(tokens.getToken(i)) != 0) {
return false;
}
}
- }
- else {
+ } else {
return false;
}
-
return true;
}
@Override
public int hashCode() {
- return Arrays.hashCode(tokens);
+ // if lookup is too slow optimize this
+ return StringUtil.toLowerCase(toString()).hashCode();
}
@Override
public boolean equals(Object obj) {
- if (this == obj) {
+ if (obj == this) {
return true;
+ } else if (obj instanceof StringList tokenList) {
+ if (caseSensitive) {
+ return Arrays.equals(tokens, tokenList.tokens);
+ } else {
+ return compareToIgnoreCase(tokenList);
+ }
}
-
- if (obj instanceof StringList tokenList) {
-
- return Arrays.equals(tokens, tokenList.tokens);
- }
-
return false;
}
/**
- * @return A human-readable representation of this {@link Span}.
+ * @return A human-readable representation of this {@link StringList}.
*/
@Override
public String toString() {
@@ -184,4 +200,33 @@ public class StringList implements Iterable<String> {
return string.toString();
}
+
+ /**
+ * @return {@code true}, if this {@link StringList} is case-sensitive.
+ */
+ public boolean isCaseSensitive() {
+ return caseSensitive;
+ }
+
+ /**
+ * @return If this {@link StringList} is case-insensitive,
+ * the same instance is returned. Otherwise, a new object is returned.
+ */
+ public StringList toCaseInsensitive() {
+ if (isCaseSensitive()) {
+ return new StringList(false, tokens);
+ }
+ return this;
+ }
+
+ /**
+ * @return If this {@link StringList} is case-sensitive,
+ * the same instance is returned. Otherwise, a new object is returned.
+ */
+ public StringList toCaseSensitive() {
+ if (!isCaseSensitive()) {
+ return new StringList(true, tokens);
+ }
+ return this;
+ }
}