This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 7b385368 OPENNLP-1654 Add thread-safe version of NameFinderME - adds
ThreadSafeNameFinderME - adds additional constructor to ThreadSafeTokenizerME &
ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME -
improves existing JavaDoc along the path
7b385368 is described below
commit 7b385368b12d080860485bdfdfb3dcac3d86243e
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Nov 24 18:06:43 2024 +0100
OPENNLP-1654 Add thread-safe version of NameFinderME
- adds ThreadSafeNameFinderME
- adds additional constructor to ThreadSafeTokenizerME &
ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME
- improves existing JavaDoc along the path
---
.../tools/lemmatizer/ThreadSafeLemmatizerME.java | 7 +--
.../ThreadSafeNameFinderME.java} | 51 ++++++++++++----------
.../tools/postag/ThreadSafePOSTaggerME.java | 7 +--
.../sentdetect/ThreadSafeSentenceDetectorME.java | 35 ++++++++++++---
.../tools/tokenize/ThreadSafeTokenizerME.java | 32 +++++++++++---
5 files changed, 90 insertions(+), 42 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
index e63c27d3..ba84c83b 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
@@ -24,17 +24,18 @@ import opennlp.tools.commons.ThreadSafe;
/**
* A thread-safe version of the {@link LemmatizerME}. Using it is completely
transparent.
* You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation
is
* lightweight because the model is not duplicated, if you have many
long-running threads,
* you may run into memory problems.
- * </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see Lemmatizer
+ * @see LemmatizerME
*/
@ThreadSafe
public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
similarity index 58%
copy from
opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
copy to
opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
index 17ea14e8..fec411cd 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java
@@ -15,63 +15,66 @@
* limitations under the License.
*/
-package opennlp.tools.sentdetect;
+package opennlp.tools.namefind;
import opennlp.tools.commons.ThreadSafe;
import opennlp.tools.util.Span;
/**
- * A thread-safe version of SentenceDetectorME. Using it is completely
transparent. You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ * A thread-safe version of {@link NameFinderME}. Using it is completely
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation
is
* lightweight because the model is not duplicated, if you have many
long-running threads,
* you may run into memory problems.
- * </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see NameFinderME
+ * @see TokenNameFinder
*/
@ThreadSafe
-public class ThreadSafeSentenceDetectorME implements SentenceDetector,
AutoCloseable {
+public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable {
- private final SentenceModel model;
+ private final TokenNameFinderModel model;
- private final ThreadLocal<SentenceDetectorME> threadLocal =
- new ThreadLocal<>();
+ private final ThreadLocal<NameFinderME> threadLocal = new ThreadLocal<>();
- public ThreadSafeSentenceDetectorME(SentenceModel model) {
+ /**
+ * Initializes a {@link ThreadSafeNameFinderME} with the specified {@code
model}.
+ *
+ * @param model A valid {@link TokenNameFinderModel}.
+ */
+ public ThreadSafeNameFinderME(TokenNameFinderModel model) {
super();
this.model = model;
}
// If a thread-local version exists, return it. Otherwise, create, then
return.
- private SentenceDetectorME getSD() {
- SentenceDetectorME sd = threadLocal.get();
+ private NameFinderME getNameFinder() {
+ NameFinderME sd = threadLocal.get();
if (sd == null) {
- sd = new SentenceDetectorME(model);
+ sd = new NameFinderME(model);
threadLocal.set(sd);
}
return sd;
}
- public double[] getSentenceProbabilities() {
- return getSD().getSentenceProbabilities();
- }
-
@Override
- public String[] sentDetect(CharSequence s) {
- return getSD().sentDetect(s);
+ public void close() {
+ threadLocal.remove();
}
@Override
- public Span[] sentPosDetect(CharSequence s) {
- return getSD().sentPosDetect(s);
+ public Span[] find(String[] tokens) {
+ return getNameFinder().find(tokens);
}
@Override
- public void close() {
- threadLocal.remove();
+ public void clearAdaptiveData() {
+ getNameFinder().clearAdaptiveData();
}
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
index 36984212..a1e29283 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java
@@ -26,17 +26,18 @@ import opennlp.tools.util.Sequence;
/**
* A thread-safe version of the {@link POSTaggerME}. Using it is completely
transparent.
* You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation
is
* lightweight because the model is not duplicated, if you have many
long-running threads,
* you may run into memory problems.
- * </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see POSTagger
+ * @see POSTaggerME
*/
@ThreadSafe
public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
index 17ea14e8..5d3106f3 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java
@@ -17,30 +17,51 @@
package opennlp.tools.sentdetect;
+import java.io.IOException;
+
import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.util.DownloadUtil;
import opennlp.tools.util.Span;
/**
- * A thread-safe version of SentenceDetectorME. Using it is completely
transparent. You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ * A thread-safe version of {@link SentenceDetectorME}. Using it is completely
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation
is
* lightweight because the model is not duplicated, if you have many
long-running threads,
* you may run into memory problems.
- * </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see SentenceDetector
+ * @see SentenceDetectorME
*/
@ThreadSafe
public class ThreadSafeSentenceDetectorME implements SentenceDetector,
AutoCloseable {
private final SentenceModel model;
- private final ThreadLocal<SentenceDetectorME> threadLocal =
- new ThreadLocal<>();
+ private final ThreadLocal<SentenceDetectorME> threadLocal = new
ThreadLocal<>();
+
+ /**
+ * Initializes a {@link ThreadSafeSentenceDetectorME} by downloading a
default model
+ * for a given {@code language}.
+ *
+ * @param language An ISO conform language code.
+ * @throws IOException Thrown if the model could not be downloaded or saved.
+ */
+ public ThreadSafeSentenceDetectorME(String language) throws IOException {
+ this(DownloadUtil.downloadModel(language,
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class));
+ }
+ /**
+ * Initializes a {@link ThreadSafeSentenceDetectorME} with the specified
{@code model}.
+ *
+ * @param model A valid {@link SentenceModel}.
+ */
public ThreadSafeSentenceDetectorME(SentenceModel model) {
super();
this.model = model;
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
index 3ebbd1e3..90166caf 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java
@@ -17,21 +17,27 @@
package opennlp.tools.tokenize;
+import java.io.IOException;
+
import opennlp.tools.commons.ThreadSafe;
+import opennlp.tools.util.DownloadUtil;
import opennlp.tools.util.Span;
/**
- * A thread-safe version of TokenizerME. Using it is completely transparent.
You can use it in
- * a single-threaded context as well, it only incurs a minimal overhead.
- * <p>
- * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ * A thread-safe version of {@link TokenizerME}. Using it is completely
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
+ *
+ * @implNote
+ * This implementation uses a {@link ThreadLocal}. Although the implementation
is
* lightweight because the model is not duplicated, if you have many
long-running threads,
* you may run into memory problems.
- * </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see Tokenizer
+ * @see TokenizerME
*/
@ThreadSafe
public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
@@ -40,6 +46,22 @@ public class ThreadSafeTokenizerME implements Tokenizer,
AutoCloseable {
private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>();
+ /**
+ * Initializes a {@link ThreadSafeTokenizerME} by downloading a default model
+ * for a given {@code language}.
+ *
+ * @param language An ISO conform language code.
+ * @throws IOException Thrown if the model could not be downloaded or saved.
+ */
+ public ThreadSafeTokenizerME(String language) throws IOException {
+ this(DownloadUtil.downloadModel(language,
DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class));
+ }
+
+ /**
+ * Initializes a {@link ThreadSafeTokenizerME} with the specified {@code
model}.
+ *
+ * @param model A valid {@link TokenizerModel}.
+ */
public ThreadSafeTokenizerME(TokenizerModel model) {
super();
this.model = model;