This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/main by this push:
     new 9fa715e9 OPENNLP-421 - Remove StringListWrapper
9fa715e9 is described below

commit 9fa715e9ea272c6bd610e950e21c45fcd942e6db
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Dec 19 08:47:26 2023 +0100

    OPENNLP-421 - Remove StringListWrapper
---
 .../java/opennlp/tools/dictionary/Dictionary.java  | 89 +++++++---------------
 .../tools/namefind/DictionaryNameFinder.java       |  9 ++-
 .../main/java/opennlp/tools/util/StringList.java   | 83 +++++++++++++++-----
 3 files changed, 96 insertions(+), 85 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java 
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index f007f6b4..a9a5de7d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -32,7 +32,6 @@ import opennlp.tools.dictionary.serializer.Attributes;
 import opennlp.tools.dictionary.serializer.DictionaryEntryPersistor;
 import opennlp.tools.dictionary.serializer.Entry;
 import opennlp.tools.util.StringList;
-import opennlp.tools.util.StringUtil;
 import opennlp.tools.util.model.DictionarySerializer;
 import opennlp.tools.util.model.SerializableArtifact;
 
@@ -43,56 +42,7 @@ import opennlp.tools.util.model.SerializableArtifact;
  * @see Iterable
  */
 public class Dictionary implements Iterable<StringList>, SerializableArtifact {
-
-  private class StringListWrapper {
-
-    private final StringList stringList;
-
-    private StringListWrapper(StringList stringList) {
-      this.stringList = stringList;
-    }
-
-    private StringList getStringList() {
-      return stringList;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-
-      boolean result;
-
-      if (obj == this) {
-        result = true;
-      }
-      else if (obj instanceof StringListWrapper other) {
-
-        if (isCaseSensitive) {
-          result = this.stringList.equals(other.getStringList());
-        }
-        else {
-          result = this.stringList.compareToIgnoreCase(other.getStringList());
-        }
-      }
-      else {
-        result = false;
-      }
-
-      return result;
-    }
-
-    @Override
-    public int hashCode() {
-      // if lookup is too slow optimize this
-      return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
-    }
-
-    @Override
-    public String toString() {
-      return this.stringList.toString();
-    }
-  }
-
-  private final Set<StringListWrapper> entrySet = new HashSet<>();
+  private final Set<StringList> entrySet = new HashSet<>();
   private final boolean isCaseSensitive;
   private int minTokenCount = 99999;
   private int maxTokenCount = 0;
@@ -131,7 +81,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
    * @param tokens the new entry
    */
   public void put(StringList tokens) {
-    entrySet.add(new StringListWrapper(tokens));
+    entrySet.add(applyCaseSensitivity(tokens));
     minTokenCount = StrictMath.min(minTokenCount, tokens.size());
     maxTokenCount = StrictMath.max(maxTokenCount, tokens.size());
   }
@@ -151,7 +101,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
    * @return {@code true} if it contains the entry, {@code false} otherwise.
    */
   public boolean contains(StringList tokens) {
-    return entrySet.contains(new StringListWrapper(tokens));
+    return entrySet.contains(applyCaseSensitivity(tokens));
   }
 
   /**
@@ -160,7 +110,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
    * @param tokens The tokens to be filtered out (= removed).
    */
   public void remove(StringList tokens) {
-    entrySet.remove(new StringListWrapper(tokens));
+    entrySet.remove(applyCaseSensitivity(tokens));
   }
 
   /**
@@ -168,7 +118,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
    */
   @Override
   public Iterator<StringList> iterator() {
-    final Iterator<StringListWrapper> entries = entrySet.iterator();
+    final Iterator<StringList> entries = entrySet.iterator();
 
     return new Iterator<>() {
 
@@ -179,7 +129,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
 
       @Override
       public StringList next() {
-        return entries.next().getStringList();
+        return entries.next();
       }
 
       @Override
@@ -308,7 +258,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
 
       @Override
       public Iterator<String> iterator() {
-        final Iterator<StringListWrapper> entries = entrySet.iterator();
+        final Iterator<StringList> entries = entrySet.iterator();
 
         return new Iterator<>() {
           @Override
@@ -317,7 +267,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
           }
           @Override
           public String next() {
-            return entries.next().getStringList().getToken(0);
+            return entries.next().getToken(0);
           }
           @Override
           public void remove() {
@@ -337,7 +287,7 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
 
         if (obj instanceof String str) {
 
-          result = entrySet.contains(new StringListWrapper(new 
StringList(str)));
+          result = entrySet.contains(new StringList(isCaseSensitive, str));
 
         }
         return result;
@@ -353,13 +303,13 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
           return false;
         }
         Iterator<String> toCheckIter = toCheck.iterator();
-        for (StringListWrapper entry : entrySet) {
+        for (StringList entry : entrySet) {
           if (isCaseSensitive) {
-            if (!entry.stringList.equals(new StringList(toCheckIter.next()))) {
+            if (!entry.equals(new StringList(true, toCheckIter.next()))) {
               return false;
             }
           } else {
-            if (!entry.stringList.compareToIgnoreCase(new 
StringList(toCheckIter.next()))) {
+            if (!entry.compareToIgnoreCase(new StringList(false, 
toCheckIter.next()))) {
               return false;
             }
           }
@@ -383,4 +333,19 @@ public class Dictionary implements Iterable<StringList>, 
SerializableArtifact {
   public Class<?> getArtifactSerializerClass() {
     return DictionarySerializer.class;
   }
+
+  /**
+   * @return {@code true}, if this {@link Dictionary} is case-sensitive.
+   */
+  public boolean isCaseSensitive() {
+    return isCaseSensitive;
+  }
+
+  private StringList applyCaseSensitivity(StringList list) {
+    if (isCaseSensitive) {
+      return list.toCaseSensitive();
+    } else {
+      return list.toCaseInsensitive();
+    }
+  }
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
index 4f050e5f..5831c7b5 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java
@@ -44,7 +44,7 @@ public class DictionaryNameFinder implements TokenNameFinder {
    * @param type the name type used for the produced spans. Must not be {@code 
null}.
    */
   public DictionaryNameFinder(Dictionary dictionary, String type) {
-    mDictionary = Objects.requireNonNull(dictionary, "dictionary must not be 
null");
+    this.mDictionary = Objects.requireNonNull(dictionary, "dictionary must not 
be null");
     this.type = Objects.requireNonNull(type, "type must not be null");
   }
 
@@ -61,7 +61,8 @@ public class DictionaryNameFinder implements TokenNameFinder {
   @Override
   public Span[] find(String[] textTokenized) {
     List<Span> namesFound = new LinkedList<>();
-
+    final boolean caseSensitive = mDictionary.isCaseSensitive();
+    final int maxTokenCount = mDictionary.getMaxTokenCount();
     for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) {
       Span nameFound = null;
       String[] tokensSearching;
@@ -69,14 +70,14 @@ public class DictionaryNameFinder implements 
TokenNameFinder {
       for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; 
offsetTo++) {
         int lengthSearching = offsetTo - offsetFrom + 1;
 
-        if (lengthSearching > mDictionary.getMaxTokenCount()) {
+        if (lengthSearching > maxTokenCount) {
           break;
         } else {
           tokensSearching = new String[lengthSearching];
           System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0,
               lengthSearching);
 
-          StringList entryForSearch = new StringList(tokensSearching);
+          StringList entryForSearch = new StringList(caseSensitive, 
tokensSearching);
 
           if (mDictionary.contains(entryForSearch)) {
             nameFound = new Span(offsetFrom, offsetTo + 1, type);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
index 3acad9f7..004292bd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
@@ -31,8 +31,10 @@ public class StringList implements Iterable<String> {
 
   private final String[] tokens;
 
+  private final boolean caseSensitive;
+
   /**
-   * Initializes a {@link StringList} instance.
+   * Initializes a {@link StringList} instance. By default, this instance is 
case-sensitive.
    * <p>
    * Note: <br>
    * Token String will be interned via {@link StringInterners}.
@@ -40,11 +42,11 @@ public class StringList implements Iterable<String> {
    * @param singleToken One single token
    */
   public StringList(String singleToken) {
-    tokens = new String[]{StringInterners.intern(singleToken)};
+    this(true, singleToken);
   }
 
   /**
-   * Initializes a {@link StringList} instance.
+   * Initializes a {@link StringList} instance. By default, this instance is 
case-sensitive.
    * <p>
    * Note: <br>
    * Token Strings will be interned via {@link StringInterners}.
@@ -55,6 +57,22 @@ public class StringList implements Iterable<String> {
    * @throws IllegalArgumentException Thrown if parameters were invalid.
    */
   public StringList(String... tokens) {
+    this(true, tokens);
+  }
+
+  /**
+   * Initializes a {@link StringList} instance.
+   * <p>
+   * Note: <br>
+   * Token Strings will be interned via {@link StringInterners}.
+   *
+   * @param isCaseSensitive Whether it will operate case-sensitive, or not.
+   * @param tokens The string parts of the new {@link StringList}.
+   *               Must not be an empty tokens array or {@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters were invalid.
+   */
+  public StringList(boolean isCaseSensitive, String... tokens) {
 
     Objects.requireNonNull(tokens, "tokens must not be null");
 
@@ -67,6 +85,8 @@ public class StringList implements Iterable<String> {
     for (int i = 0; i < tokens.length; i++) {
       this.tokens[i] = StringInterners.intern(tokens[i]);
     }
+
+    this.caseSensitive = isCaseSensitive;
   }
 
   /**
@@ -127,44 +147,40 @@ public class StringList implements Iterable<String> {
    * @return {@code true} if identically with ignore the case, {@code false} 
otherwise.
    */
   public boolean compareToIgnoreCase(StringList tokens) {
-
     if (size() == tokens.size()) {
       for (int i = 0; i < size(); i++) {
-
-        if (getToken(i).compareToIgnoreCase(
-            tokens.getToken(i)) != 0) {
+        if (getToken(i).compareToIgnoreCase(tokens.getToken(i)) != 0) {
           return false;
         }
       }
-    }
-    else {
+    } else {
       return false;
     }
-
     return true;
   }
 
   @Override
   public int hashCode() {
-    return Arrays.hashCode(tokens);
+    // if lookup is too slow optimize this
+    return StringUtil.toLowerCase(toString()).hashCode();
   }
 
   @Override
   public boolean equals(Object obj) {
-    if (this == obj) {
+    if (obj == this) {
       return true;
+    } else if (obj instanceof StringList tokenList) {
+      if (caseSensitive) {
+        return Arrays.equals(tokens, tokenList.tokens);
+      } else {
+        return compareToIgnoreCase(tokenList);
+      }
     }
-
-    if (obj instanceof StringList tokenList) {
-
-      return Arrays.equals(tokens, tokenList.tokens);
-    }
-
     return false;
   }
 
   /**
-   * @return A human-readable representation of this {@link Span}.
+   * @return A human-readable representation of this {@link StringList}.
    */
   @Override
   public String toString() {
@@ -184,4 +200,33 @@ public class StringList implements Iterable<String> {
 
     return string.toString();
   }
+
+  /**
+   * @return {@code true}, if this {@link StringList} is case-sensitive.
+   */
+  public boolean isCaseSensitive() {
+    return caseSensitive;
+  }
+
+  /**
+   * @return If this {@link StringList} is case-insensitive,
+   * the same instance is returned. Otherwise, a new object is returned.
+   */
+  public StringList toCaseInsensitive() {
+    if (isCaseSensitive()) {
+      return new StringList(false, tokens);
+    }
+    return this;
+  }
+
+  /**
+   * @return If this {@link StringList} is case-sensitive,
+   * the same instance is returned. Otherwise, a new object is returned.
+   */
+  public StringList toCaseSensitive() {
+    if (!isCaseSensitive()) {
+      return new StringList(true, tokens);
+    }
+    return this;
+  }
 }

Reply via email to