(opennlp) branch main updated: OPENNLP-1620 - It should be possible to remove the allocated ThreadLocal

rzo1 Thu, 17 Oct 2024 04:31:50 -0700

This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git



The following commit(s) were added to refs/heads/main by this push:
     new ccca8ca5 OPENNLP-1620 - It should be possible to remove the allocated 
ThreadLocal
ccca8ca5 is described below

commit ccca8ca5d5f95ae9af9b23126e6611058605bb17
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Oct 15 13:25:30 2024 +0200

    OPENNLP-1620 - It should be possible to remove the allocated ThreadLocal
---
 .../tools/postag/ThreadSafePOSTaggerME.java        | 16 ++++++++++++++-
 .../sentdetect/ThreadSafeSentenceDetectorME.java   | 24 +++++++++++++++-------
 .../tools/tokenize/ThreadSafeTokenizerME.java      | 22 ++++++++++++++++----
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
index 52419ddf..b567f1ea 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
@@ -23,9 +23,18 @@ import opennlp.tools.util.Sequence;
 /**
  * A thread-safe version of the POSTaggerME. Using it is completely 
transparent. You can use it in
  * a single-threaded context as well, it only incurs a minimal overhead.
+ * <p>
+ * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * lightweight because the model is not duplicated, if you have many 
long-running threads,
+ * you may run into memory problems.
+ * </p>
+ * <p>
+ * Be careful when using this in a Jakarta EE application, for example.
+ * </p>
+ * The user is responsible for clearing the {@link ThreadLocal}.
  */
 @ThreadSafe
-public class ThreadSafePOSTaggerME implements POSTagger {
+public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {
 
   private final POSModel model;
 
@@ -64,4 +73,9 @@ public class ThreadSafePOSTaggerME implements POSTagger {
   public Sequence[] topKSequences(String[] sentence, Object[] 
additionaContext) {
     return getTagger().topKSequences(sentence, additionaContext);
   }
+
+  @Override
+  public void close() {
+    threadLocal.remove();
+  }
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
index 99abc6fb..17ea14e8 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
@@ -24,16 +24,21 @@ import opennlp.tools.util.Span;
  * A thread-safe version of SentenceDetectorME. Using it is completely 
transparent. You can use it in
  * a single-threaded context as well, it only incurs a minimal overhead.
  * <p>
- * Note, however, that this implementation uses a ThreadLocal. Although the 
implementation is
- * lightweight as the model is not duplicated, if you have many long-running 
threads, you may run
- * into memory issues. Be careful when you use this in a JEE application, for 
example.
+ * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * lightweight because the model is not duplicated, if you have many 
long-running threads,
+ * you may run into memory problems.
+ * </p>
+ * <p>
+ * Be careful when using this in a Jakarta EE application, for example.
+ * </p>
+ * The user is responsible for clearing the {@link ThreadLocal}.
  */
 @ThreadSafe
-public class ThreadSafeSentenceDetectorME implements SentenceDetector {
+public class ThreadSafeSentenceDetectorME implements SentenceDetector, 
AutoCloseable {
 
   private final SentenceModel model;
 
-  private final ThreadLocal<SentenceDetectorME> sentenceDetectorThreadLocal =
+  private final ThreadLocal<SentenceDetectorME> threadLocal =
       new ThreadLocal<>();
 
   public ThreadSafeSentenceDetectorME(SentenceModel model) {
@@ -43,10 +48,10 @@ public class ThreadSafeSentenceDetectorME implements 
SentenceDetector {
 
   // If a thread-local version exists, return it. Otherwise, create, then 
return.
   private SentenceDetectorME getSD() {
-    SentenceDetectorME sd = sentenceDetectorThreadLocal.get();
+    SentenceDetectorME sd = threadLocal.get();
     if (sd == null) {
       sd = new SentenceDetectorME(model);
-      sentenceDetectorThreadLocal.set(sd);
+      threadLocal.set(sd);
     }
     return sd;
   }
@@ -64,4 +69,9 @@ public class ThreadSafeSentenceDetectorME implements 
SentenceDetector {
   public Span[] sentPosDetect(CharSequence s) {
     return getSD().sentPosDetect(s);
   }
+
+  @Override
+  public void close() {
+    threadLocal.remove();
+  }
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
index b92dd5e0..3ebbd1e3 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
@@ -23,13 +23,22 @@ import opennlp.tools.util.Span;
 /**
  * A thread-safe version of TokenizerME. Using it is completely transparent. 
You can use it in
  * a single-threaded context as well, it only incurs a minimal overhead.
+ * <p>
+ * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * lightweight because the model is not duplicated, if you have many 
long-running threads,
+ * you may run into memory problems.
+ * </p>
+ * <p>
+ * Be careful when using this in a Jakarta EE application, for example.
+ * </p>
+ * The user is responsible for clearing the {@link ThreadLocal}.
  */
 @ThreadSafe
-public class ThreadSafeTokenizerME implements Tokenizer {
+public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
 
   private final TokenizerModel model;
 
-  private final ThreadLocal<TokenizerME> tokenizerThreadLocal = new 
ThreadLocal<>();
+  private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>();
 
   public ThreadSafeTokenizerME(TokenizerModel model) {
     super();
@@ -37,10 +46,10 @@ public class ThreadSafeTokenizerME implements Tokenizer {
   }
 
   private TokenizerME getTokenizer() {
-    TokenizerME tokenizer = tokenizerThreadLocal.get();
+    TokenizerME tokenizer = threadLocal.get();
     if (tokenizer == null) {
       tokenizer = new TokenizerME(model);
-      tokenizerThreadLocal.set(tokenizer);
+      threadLocal.set(tokenizer);
     }
     return tokenizer;
   }
@@ -58,4 +67,9 @@ public class ThreadSafeTokenizerME implements Tokenizer {
   public double[] getProbabilities() {
     return getTokenizer().getTokenProbabilities();
   }
+
+  @Override
+  public void close() {
+    threadLocal.remove();
+  }
 }

(opennlp) branch main updated: OPENNLP-1620 - It should be possible to remove the allocated ThreadLocal

Reply via email to