This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1654-Add-thread-safe-version-of-NameFinderME
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 4558088135ba5841ecb92d0e51ad086388742e9a
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Nov 24 18:06:43 2024 +0100

    OPENNLP-1654 Add thread-safe version of NameFinderME
    - adds ThreadSafeNameFinderME
    - adds additional constructor to ThreadSafeTokenizerME & 
ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME
    - improves existing JavaDoc along the path
---
 .../tools/lemmatizer/ThreadSafeLemmatizerME.java   |  7 +--
 .../ThreadSafeNameFinderME.java}                   | 51 ++++++++++++----------
 .../tools/postag/ThreadSafePOSTaggerME.java        |  7 +--
 .../sentdetect/ThreadSafeSentenceDetectorME.java   | 35 ++++++++++++---
 .../tools/tokenize/ThreadSafeTokenizerME.java      | 32 +++++++++++---
 5 files changed, 90 insertions(+), 42 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
index e63c27d3..ba84c83b 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
@@ -24,17 +24,18 @@ import opennlp.tools.commons.ThreadSafe;
 /**
  * A thread-safe version of the {@link LemmatizerME}. Using it is completely 
transparent.
  * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation 
is
  * lightweight because the model is not duplicated, if you have many 
long-running threads,
  * you may run into memory problems.
- * </p>
  * <p>
  * Be careful when using this in a Jakarta EE application, for example.
  * </p>
  * The user is responsible for clearing the {@link ThreadLocal}.
  *
  * @see Lemmatizer
+ * @see LemmatizerME
  */
 @ThreadSafe
 public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
similarity index 58%
copy from 
opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
copy to 
opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
index 17ea14e8..fec411cd 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
@@ -15,63 +15,66 @@
  * limitations under the License.
  */
 
-package opennlp.tools.sentdetect;
+package opennlp.tools.namefind;
 
 import opennlp.tools.commons.ThreadSafe;
 import opennlp.tools.util.Span;
 
 /**
- * A thread-safe version of SentenceDetectorME. Using it is completely 
transparent. You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * A thread-safe version of {@link NameFinderME}. Using it is completely 
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation 
is
  * lightweight because the model is not duplicated, if you have many 
long-running threads,
  * you may run into memory problems.
- * </p>
  * <p>
  * Be careful when using this in a Jakarta EE application, for example.
  * </p>
  * The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see NameFinderME
+ * @see TokenNameFinder
  */
 @ThreadSafe
-public class ThreadSafeSentenceDetectorME implements SentenceDetector, 
AutoCloseable {
+public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable {
 
-  private final SentenceModel model;
+  private final TokenNameFinderModel model;
 
-  private final ThreadLocal<SentenceDetectorME> threadLocal =
-      new ThreadLocal<>();
+  private final ThreadLocal<NameFinderME> threadLocal = new ThreadLocal<>();
 
-  public ThreadSafeSentenceDetectorME(SentenceModel model) {
+  /**
+   * Initializes a {@link ThreadSafeNameFinderME} with the specified {@code 
model}.
+   *
+   * @param model A valid {@link TokenNameFinderModel}.
+   */
+  public ThreadSafeNameFinderME(TokenNameFinderModel model) {
     super();
     this.model = model;
   }
 
   // If a thread-local version exists, return it. Otherwise, create, then 
return.
-  private SentenceDetectorME getSD() {
-    SentenceDetectorME sd = threadLocal.get();
+  private NameFinderME getNameFinder() {
+    NameFinderME sd = threadLocal.get();
     if (sd == null) {
-      sd = new SentenceDetectorME(model);
+      sd = new NameFinderME(model);
       threadLocal.set(sd);
     }
     return sd;
   }
 
-  public double[] getSentenceProbabilities() {
-    return getSD().getSentenceProbabilities();
-  }
-
   @Override
-  public String[] sentDetect(CharSequence s) {
-    return getSD().sentDetect(s);
+  public void close() {
+    threadLocal.remove();
   }
 
   @Override
-  public Span[] sentPosDetect(CharSequence s) {
-    return getSD().sentPosDetect(s);
+  public Span[] find(String[] tokens) {
+    return getNameFinder().find(tokens);
   }
 
   @Override
-  public void close() {
-    threadLocal.remove();
+  public void clearAdaptiveData() {
+    getNameFinder().clearAdaptiveData();
   }
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
index 36984212..ce3a6fef 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
@@ -26,17 +26,18 @@ import opennlp.tools.util.Sequence;
 /**
  * A thread-safe version of the {@link POSTaggerME}. Using it is completely 
transparent.
  * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ *
+ * @implNote
+ * This  implementation uses a {@link ThreadLocal}. Although the 
implementation is
  * lightweight because the model is not duplicated, if you have many 
long-running threads,
  * you may run into memory problems.
- * </p>
  * <p>
  * Be careful when using this in a Jakarta EE application, for example.
  * </p>
  * The user is responsible for clearing the {@link ThreadLocal}.
  *
  * @see POSTagger
+ * @see POSTaggerME
  */
 @ThreadSafe
 public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
index 17ea14e8..5d3106f3 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
@@ -17,30 +17,51 @@
 
 package opennlp.tools.sentdetect;
 
+import java.io.IOException;
+
 import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.util.DownloadUtil;
 import opennlp.tools.util.Span;
 
 /**
- * A thread-safe version of SentenceDetectorME. Using it is completely 
transparent. You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * A thread-safe version of {@link SentenceDetectorME}. Using it is completely 
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation 
is
  * lightweight because the model is not duplicated, if you have many 
long-running threads,
  * you may run into memory problems.
- * </p>
  * <p>
  * Be careful when using this in a Jakarta EE application, for example.
  * </p>
  * The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see SentenceDetector
+ * @see SentenceDetectorME
  */
 @ThreadSafe
 public class ThreadSafeSentenceDetectorME implements SentenceDetector, 
AutoCloseable {
 
   private final SentenceModel model;
 
-  private final ThreadLocal<SentenceDetectorME> threadLocal =
-      new ThreadLocal<>();
+  private final ThreadLocal<SentenceDetectorME> threadLocal = new 
ThreadLocal<>();
+
+  /**
+   * Initializes a {@link ThreadSafeSentenceDetectorME} by downloading a 
default model
+   * for a given {@code language}.
+   *
+   * @param language An ISO conform language code.
+   * @throws IOException Thrown if the model could not be downloaded or saved.
+   */
+  public ThreadSafeSentenceDetectorME(String language) throws IOException {
+    this(DownloadUtil.downloadModel(language, 
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class));
+  }
 
+  /**
+   * Initializes a {@link ThreadSafeSentenceDetectorME} with the specified 
{@code model}.
+   *
+   * @param model A valid {@link SentenceModel}.
+   */
   public ThreadSafeSentenceDetectorME(SentenceModel model) {
     super();
     this.model = model;
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
index 3ebbd1e3..90166caf 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
@@ -17,21 +17,27 @@
 
 package opennlp.tools.tokenize;
 
+import java.io.IOException;
+
 import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.util.DownloadUtil;
 import opennlp.tools.util.Span;
 
 /**
- * A thread-safe version of TokenizerME. Using it is completely transparent. 
You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * A thread-safe version of {@link TokenizerME}. Using it is completely 
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation 
is
  * lightweight because the model is not duplicated, if you have many 
long-running threads,
  * you may run into memory problems.
- * </p>
  * <p>
  * Be careful when using this in a Jakarta EE application, for example.
  * </p>
  * The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see Tokenizer
+ * @see TokenizerME
  */
 @ThreadSafe
 public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
@@ -40,6 +46,22 @@ public class ThreadSafeTokenizerME implements Tokenizer, 
AutoCloseable {
 
   private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>();
 
+  /**
+   * Initializes a {@link ThreadSafeTokenizerME} by downloading a default model
+   * for a given {@code language}.
+   *
+   * @param language An ISO conform language code.
+   * @throws IOException Thrown if the model could not be downloaded or saved.
+   */
+  public ThreadSafeTokenizerME(String language) throws IOException {
+    this(DownloadUtil.downloadModel(language, 
DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class));
+  }
+
+  /**
+   * Initializes a {@link ThreadSafeTokenizerME} with the specified {@code 
model}.
+   *
+   * @param model A valid {@link TokenizerModel}.
+   */
   public ThreadSafeTokenizerME(TokenizerModel model) {
     super();
     this.model = model;

Reply via email to