This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new ec09b7e4 OPENNLP-1653 Add thread-safe version of LemmatizerME (#691)
ec09b7e4 is described below
commit ec09b7e42c924ebf87087a11be44371223e61320
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Nov 24 10:38:23 2024 +0100
OPENNLP-1653 Add thread-safe version of LemmatizerME (#691)
---
.../tools/lemmatizer/ThreadSafeLemmatizerME.java | 80 ++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
new file mode 100644
index 00000000..e63c27d3
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.lemmatizer;
+
+import java.util.List;
+
+import opennlp.tools.commons.ThreadSafe;
+
+/**
+ * A thread-safe version of the {@link LemmatizerME}. Using it is completely
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a
minimal overhead.
+ * <p>
+ * Note, however, that this implementation uses a {@link ThreadLocal}.
Although the implementation is
+ * lightweight because the model is not duplicated, if you have many
long-running threads,
+ * you may run into memory problems.
+ * </p>
+ * <p>
+ * Be careful when using this in a Jakarta EE application, for example.
+ * </p>
+ * The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see Lemmatizer
+ */
+@ThreadSafe
+public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
+
+ private final LemmatizerModel model;
+
+ private final ThreadLocal<LemmatizerME> threadLocal = new ThreadLocal<>();
+
+ /**
+ * Initializes a {@link ThreadSafeLemmatizerME} with the specified {@code
model}.
+ *
+ * @param model A valid {@link LemmatizerModel}.
+ */
+ public ThreadSafeLemmatizerME(LemmatizerModel model) {
+ super();
+ this.model = model;
+ }
+
+ private LemmatizerME getLemmatizer() {
+ LemmatizerME tagger = threadLocal.get();
+ if (tagger == null) {
+ tagger = new LemmatizerME(model);
+ threadLocal.set(tagger);
+ }
+ return tagger;
+ }
+
+ @Override
+ public String[] lemmatize(String[] toks, String[] tags) {
+ return getLemmatizer().lemmatize(toks, tags);
+ }
+
+ @Override
+ public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
+ return getLemmatizer().lemmatize(toks, tags);
+ }
+
+ @Override
+ public void close() {
+ threadLocal.remove();
+ }
+
+}