This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1654-Add-thread-safe-version-of-NameFinderME in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 4558088135ba5841ecb92d0e51ad086388742e9a Author: Martin Wiesner <[email protected]> AuthorDate: Sun Nov 24 18:06:43 2024 +0100 OPENNLP-1654 Add thread-safe version of NameFinderME - adds ThreadSafeNameFinderME - adds additional constructor to ThreadSafeTokenizerME & ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME - improves existing JavaDoc along the path --- .../tools/lemmatizer/ThreadSafeLemmatizerME.java | 7 +-- .../ThreadSafeNameFinderME.java} | 51 ++++++++++++---------- .../tools/postag/ThreadSafePOSTaggerME.java | 7 +-- .../sentdetect/ThreadSafeSentenceDetectorME.java | 35 ++++++++++++--- .../tools/tokenize/ThreadSafeTokenizerME.java | 32 +++++++++++--- 5 files changed, 90 insertions(+), 42 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java index e63c27d3..ba84c83b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java @@ -24,17 +24,18 @@ import opennlp.tools.commons.ThreadSafe; /** * A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * <p> - * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * + * @implNote + * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. - * </p> * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> * The user is responsible for clearing the {@link ThreadLocal}. * * @see Lemmatizer + * @see LemmatizerME */ @ThreadSafe public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable { diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java similarity index 58% copy from opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java copy to opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java index 17ea14e8..fec411cd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java @@ -15,63 +15,66 @@ * limitations under the License. */ -package opennlp.tools.sentdetect; +package opennlp.tools.namefind; import opennlp.tools.commons.ThreadSafe; import opennlp.tools.util.Span; /** - * A thread-safe version of SentenceDetectorME. Using it is completely transparent. You can use it in - * a single-threaded context as well, it only incurs a minimal overhead. - * <p> - * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * A thread-safe version of {@link NameFinderME}. Using it is completely transparent. + * You can use it in a single-threaded context as well, it only incurs a minimal overhead. + * + * @implNote + * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. - * </p> * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> * The user is responsible for clearing the {@link ThreadLocal}. + * + * @see NameFinderME + * @see TokenNameFinder */ @ThreadSafe -public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable { +public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable { - private final SentenceModel model; + private final TokenNameFinderModel model; - private final ThreadLocal<SentenceDetectorME> threadLocal = - new ThreadLocal<>(); + private final ThreadLocal<NameFinderME> threadLocal = new ThreadLocal<>(); - public ThreadSafeSentenceDetectorME(SentenceModel model) { + /** + * Initializes a {@link ThreadSafeNameFinderME} with the specified {@code model}. + * + * @param model A valid {@link TokenNameFinderModel}. + */ + public ThreadSafeNameFinderME(TokenNameFinderModel model) { super(); this.model = model; } // If a thread-local version exists, return it. Otherwise, create, then return. - private SentenceDetectorME getSD() { - SentenceDetectorME sd = threadLocal.get(); + private NameFinderME getNameFinder() { + NameFinderME sd = threadLocal.get(); if (sd == null) { - sd = new SentenceDetectorME(model); + sd = new NameFinderME(model); threadLocal.set(sd); } return sd; } - public double[] getSentenceProbabilities() { - return getSD().getSentenceProbabilities(); - } - @Override - public String[] sentDetect(CharSequence s) { - return getSD().sentDetect(s); + public void close() { + threadLocal.remove(); } @Override - public Span[] sentPosDetect(CharSequence s) { - return getSD().sentPosDetect(s); + public Span[] find(String[] tokens) { + return getNameFinder().find(tokens); } @Override - public void close() { - threadLocal.remove(); + public void clearAdaptiveData() { + getNameFinder().clearAdaptiveData(); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java index 36984212..ce3a6fef 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java @@ -26,17 +26,18 @@ import opennlp.tools.util.Sequence; /** * A thread-safe version of the {@link POSTaggerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * <p> - * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * + * @implNote + * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. - * </p> * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> * The user is responsible for clearing the {@link ThreadLocal}. * * @see POSTagger + * @see POSTaggerME */ @ThreadSafe public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable { diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java index 17ea14e8..5d3106f3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java @@ -17,30 +17,51 @@ package opennlp.tools.sentdetect; +import java.io.IOException; + import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.util.DownloadUtil; import opennlp.tools.util.Span; /** - * A thread-safe version of SentenceDetectorME. Using it is completely transparent. You can use it in - * a single-threaded context as well, it only incurs a minimal overhead. - * <p> - * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * A thread-safe version of {@link SentenceDetectorME}. Using it is completely transparent. + * You can use it in a single-threaded context as well, it only incurs a minimal overhead. + * + * @implNote + * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. - * </p> * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> * The user is responsible for clearing the {@link ThreadLocal}. + * + * @see SentenceDetector + * @see SentenceDetectorME */ @ThreadSafe public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable { private final SentenceModel model; - private final ThreadLocal<SentenceDetectorME> threadLocal = - new ThreadLocal<>(); + private final ThreadLocal<SentenceDetectorME> threadLocal = new ThreadLocal<>(); + + /** + * Initializes a {@link ThreadSafeSentenceDetectorME} by downloading a default model + * for a given {@code language}. + * + * @param language An ISO conform language code. + * @throws IOException Thrown if the model could not be downloaded or saved. + */ + public ThreadSafeSentenceDetectorME(String language) throws IOException { + this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class)); + } + /** + * Initializes a {@link ThreadSafeSentenceDetectorME} with the specified {@code model}. + * + * @param model A valid {@link SentenceModel}. + */ public ThreadSafeSentenceDetectorME(SentenceModel model) { super(); this.model = model; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java index 3ebbd1e3..90166caf 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java @@ -17,21 +17,27 @@ package opennlp.tools.tokenize; +import java.io.IOException; + import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.util.DownloadUtil; import opennlp.tools.util.Span; /** - * A thread-safe version of TokenizerME. Using it is completely transparent. You can use it in - * a single-threaded context as well, it only incurs a minimal overhead. - * <p> - * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * A thread-safe version of {@link TokenizerME}. Using it is completely transparent. + * You can use it in a single-threaded context as well, it only incurs a minimal overhead. + * + * @implNote + * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. - * </p> * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> * The user is responsible for clearing the {@link ThreadLocal}. + * + * @see Tokenizer + * @see TokenizerME */ @ThreadSafe public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { @@ -40,6 +46,22 @@ public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>(); + /** + * Initializes a {@link ThreadSafeTokenizerME} by downloading a default model + * for a given {@code language}. + * + * @param language An ISO conform language code. + * @throws IOException Thrown if the model could not be downloaded or saved. + */ + public ThreadSafeTokenizerME(String language) throws IOException { + this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class)); + } + + /** + * Initializes a {@link ThreadSafeTokenizerME} with the specified {@code model}. + * + * @param model A valid {@link TokenizerModel}. + */ public ThreadSafeTokenizerME(TokenizerModel model) { super(); this.model = model;
