This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 3d50fce WIP.
3d50fce is described below
commit 3d50fcefcaf2034bc5f60a7affc864991f078b82
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jan 4 17:02:00 2022 +0300
WIP.
---
.../scala/org/apache/nlpcraft/NCModelClient.java | 82 ++------
.../org/apache/nlpcraft/NCModelConfigAdapter.java | 170 ----------------
.../org/apache/nlpcraft/NCModelConfigBuilder.java | 215 +++++++++++++++++++++
.../nlpcraft/internal/NCModelClientImpl.scala | 112 +++++++++++
.../parser/opennlp/en/NCEnOpenNlpTokenParser.java | 5 +-
.../opennlp/en/NCEnOpenNlpTokenParserStemmer.java | 22 +++
.../parser/opennlp/en/impl/NCEnOpenNlpImpl.scala | 6 +-
.../en/impl/NCEnOpenNlpTokenParserStemmerImpl.java | 13 ++
.../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 4 +-
.../apache/nlpcraft/nlp/util/NCTestConfig.scala | 6 +-
10 files changed, 385 insertions(+), 250 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
index 3fe9fdb..ec0cdef 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft;
+import org.apache.nlpcraft.internal.NCModelClientImpl;
+
import java.util.Map;
import java.util.List;
import java.util.concurrent.*;
@@ -25,87 +27,25 @@ import java.util.concurrent.*;
*
*/
public class NCModelClient implements NCLifecycle {
- private final NCModel mdl;
+ // TODO: move NCModelClientImpl under rigth package.
+ private final NCModelClientImpl impl;
/**
*
* @param mdl
*/
public NCModelClient(NCModel mdl) {
- this.mdl = mdl;
- }
-
- /**
- *
- * @throws NCException
- */
- private static void verify() throws NCException {
- // TODO:
- }
-
- private static void start(ExecutorService s, List<? extends NCLifecycle>
list, NCModelConfig cfg) {
- assert s != null;
-
- if (list != null)
- list.forEach(p -> s.execute(() -> p.start(cfg)));
- }
-
- private static void stop(ExecutorService s, List<? extends NCLifecycle>
list) {
- assert s != null;
-
- if (list != null)
- list.forEach(p -> s.execute(() -> p.stop()));
- }
-
- private static void stopExecutorService(ExecutorService s) {
- try {
- s.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
- }
- catch (InterruptedException e) {
- throw new NCException("Thread interrupted.", e);
- }
- }
-
- private static ExecutorService getExecutorService() {
- return
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+ this.impl = new NCModelClientImpl(mdl);
}
@Override
public void start(NCModelConfig cfg) {
- verify();
-
- cfg.getTokenParser().start(cfg);
-
- ExecutorService s = getExecutorService();
-
- // TODO: start and stop validators.
-
- try {
- start(s, cfg.getEntityParsers(), cfg);
- start(s, cfg.getEntityEnrichers(), cfg);
- start(s, cfg.getTokenEnrichers(), cfg);
- }
- finally {
- stopExecutorService(s);
- }
+ impl.start(cfg);
}
@Override
public void stop() {
- NCModelConfig cfg = mdl.getConfig();
- ExecutorService s = getExecutorService();
-
- try {
- stop(s, cfg.getTokenEnrichers());
- stop(s, cfg.getEntityEnrichers());
- stop(s, cfg.getEntityParsers());
- stop(s, cfg.getTokenEnrichers());
- }
- finally {
- stopExecutorService(s);
- }
-
- cfg.getTokenParser().stop();
+ impl.stop();
}
/**
@@ -117,7 +57,7 @@ public class NCModelClient implements NCLifecycle {
* @throws NCException
*/
public CompletableFuture<NCResult> ask(String txt, Map<String, Object>
data, String usrId) {
- return null; // TODO
+ return impl.ask(txt, data, usrId);
}
/**
@@ -129,7 +69,7 @@ public class NCModelClient implements NCLifecycle {
* @throws NCException
*/
public NCResult askSync(String txt, Map<String, Object> data, String
usrId) {
- return null; // TODO
+ return impl.askSync(txt, data, usrId);
}
/**
@@ -138,7 +78,7 @@ public class NCModelClient implements NCLifecycle {
* @throws NCException
*/
public void clearConversation(String usrId) {
- // TODO
+ impl.clearConversation(usrId);
}
/**
@@ -147,6 +87,6 @@ public class NCModelClient implements NCLifecycle {
* @throws NCException
*/
public void clearDialog(String usrId) {
- // TODO
+ impl.clearDialog(usrId);
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
deleted file mode 100644
index 5f3585c..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft;
-
-import java.util.*;
-
-/**
- *
- */
-// TODO: validation for constructor and all setters.
-// TODO: do builder instead of it.
-public class NCModelConfigAdapter extends NCPropertyMapAdapter implements
NCModelConfig {
- private final String id;
- private final String name;
- private final String version;
- private final NCTokenParser tokParser;
- private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
- private final List<NCEntityEnricher> entEnrichers = new ArrayList<>();
- private final List<NCEntityParser> entParsers = new ArrayList<>();
- private final List<NCTokenValidator> tokenValidators = new ArrayList<>();
- private final List<NCEntityValidator> entityValidators = new ArrayList<>();
- private final List<NCVariantValidator> variantsFilters = new ArrayList<>();
-
- /**
- *
- * @param id
- * @param name
- * @param version
- * @param tokParser
- */
- public NCModelConfigAdapter(String id, String name, String version,
NCTokenParser tokParser, NCEntityParser entParser) {
- Objects.requireNonNull(id, "ID cannot be null.");
- Objects.requireNonNull(name, "Name cannot be null.");
- Objects.requireNonNull(version, "Version cannot be null.");
- Objects.requireNonNull(tokParser, "Token parser cannot be null.");
- Objects.requireNonNull(entParser, "Entity parser cannot be null.");
-
- this.id = id;
- this.name = name;
- this.version = version;
- this.tokParser = tokParser;
-
- entParsers.add(entParser);
- }
-
- /**
- *
- * @param entParser
- */
- public void addEntityParser(NCEntityParser entParser) {
- Objects.requireNonNull(entParser, "Entity parser cannot be null.");
-
- entParsers.add(entParser);
- }
-
- /**
- *
- * @param tokEnricher
- */
- public void addTokenEnricher(NCTokenEnricher tokEnricher) {
- Objects.requireNonNull(tokEnricher, "Token enricher cannot be null.");
-
- tokEnrichers.add(tokEnricher);
- }
-
- /**
- *
- * @param entEnricher
- */
- public void addEntityEnricher(NCEntityEnricher entEnricher) {
- Objects.requireNonNull(entEnricher, "Entity enricher cannot be null.");
-
- entEnrichers.add(entEnricher);
- }
-
- /**
- *
- * @param entParser
- */
- public void addEntityParser(NCTokenValidator tokValidator) {
- Objects.requireNonNull(tokValidator, "Token validator cannot be
null.");
-
- tokenValidators.add(tokValidator);
- }
-
- /**
- *
- * @param entValidator
- */
- public void addEntityParser(NCEntityValidator entValidator) {
- Objects.requireNonNull(entValidator, "Entity validator cannot be
null.");
-
- entityValidators.add(entValidator);
- }
-
- /**
- *
- * @param variantFilter
- */
- public void addVariantFilter(NCVariantValidator variantFilter) {
- Objects.requireNonNull(variantFilter, "Variant filter cannot be
null.");
-
- variantsFilters.add(variantFilter);
- }
-
- @Override
- public String getId() {
- return id;
- }
-
- @Override
- public String getName() {
- return name;
- }
-
- @Override
- public String getVersion() {
- return version;
- }
-
- @Override
- public List<NCTokenEnricher> getTokenEnrichers() {
- return tokEnrichers;
- }
-
- @Override
- public List<NCEntityEnricher> getEntityEnrichers() {
- return entEnrichers;
- }
-
- @Override
- public NCTokenParser getTokenParser() {
- return tokParser;
- }
-
- @Override
- public List<NCEntityParser> getEntityParsers() {
- return entParsers;
- }
-
- @Override
- public List<NCTokenValidator> getTokenValidators() {
- return tokenValidators;
- }
-
- @Override
- public List<NCEntityValidator> getEntityValidators() {
- return entityValidators;
- }
-
- @Override
- public List<NCVariantValidator> getVariantValidators() {
- return variantsFilters;
- }
-}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
new file mode 100644
index 0000000..58e0b4e
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ *
+ */
+// TODO: validation for constructor and all setters.
+public class NCModelConfigBuilder {
+ private abstract class NCModelConfigAdapter extends NCPropertyMapAdapter
implements NCModelConfig { }
+
+ private final String id;
+ private final String name;
+ private final String version;
+
+ private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
+ private final List<NCEntityEnricher> entEnrichers = new ArrayList<>();
+ private final List<NCEntityParser> entParsers = new ArrayList<>();
+ private final List<NCTokenValidator> tokenValidators = new ArrayList<>();
+ private final List<NCEntityValidator> entityValidators = new ArrayList<>();
+ private final List<NCVariantValidator> variantsValidators = new
ArrayList<>();
+
+ private String description;
+ private String origin;
+
+ private NCTokenParser tokParser;
+
+ public NCModelConfigBuilder(String id, String name, String version) {
+ Objects.requireNonNull(id, "Id cannot be null.");
+ Objects.requireNonNull(name, "Name cannot be null.");
+ Objects.requireNonNull(version, "Varsion cannot be null");
+
+ this.id = id;
+ this.name = name;
+ this.version = version;
+ }
+
+ public NCModelConfigBuilder withDescription(String description) {
+ this.description = description;
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withOrigin(String origin) {
+ this.origin = origin;
+
+ return this;
+ }
+
+
+ public NCModelConfigBuilder withTokenParser(NCTokenParser tokParser) {
+ this.tokParser = tokParser;
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withTokenEnrichers(List<NCTokenEnricher>
tokEnrichers) {
+ this.tokEnrichers.addAll(tokEnrichers);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withTokenEnricher(NCTokenEnricher tokEnricher)
{
+ Objects.requireNonNull(tokEnrichers, "Argument cannot be null.");
+
+ this.tokEnrichers.add(tokEnricher);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityEnrichers(List<NCEntityEnricher>
entEnrichers) {
+ this.entEnrichers.addAll(entEnrichers);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityEnricher(NCEntityEnricher
entEnricher) {
+ this.entEnrichers.add(entEnricher);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityParsers(List<NCEntityParser>
entParsers) {
+ this.entParsers.addAll(entParsers);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityParser(NCEntityParser entParser) {
+ this.entParsers.add(entParser);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withTokenValidators(List<NCTokenValidator>
tokenValidators) {
+ this.tokenValidators.addAll(tokenValidators);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withTokenValidator(NCTokenValidator
tokenValidator) {
+ this.tokenValidators.add(tokenValidator);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityValidators(List<NCEntityValidator>
entityValidators) {
+ this.entityValidators.addAll(entityValidators);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withEntityValidator(NCEntityValidator
entityValidator) {
+ this.entityValidators.add(entityValidator);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withVariantValidators(List<NCVariantValidator>
variantsValidators) {
+ this.variantsValidators.addAll(variantsValidators);
+
+ return this;
+ }
+
+ public NCModelConfigBuilder withVariantValidator(NCVariantValidator
variantsValidator) {
+ this.variantsValidators.add(variantsValidator);
+
+ return this;
+ }
+
+ public NCModelConfig make() {
+ // TODO: validate.
+
+ return new NCModelConfigAdapter() {
+ @Override
+ public NCTokenParser getTokenParser() {
+ return tokParser;
+ }
+
+ @Override
+ public List<NCTokenEnricher> getTokenEnrichers() {
+ return tokEnrichers;
+ }
+
+ @Override
+ public List<NCEntityEnricher> getEntityEnrichers() {
+ return entEnrichers;
+ }
+
+ @Override
+ public List<NCEntityParser> getEntityParsers() {
+ return entParsers;
+ }
+
+ @Override
+ public List<NCTokenValidator> getTokenValidators() {
+ return tokenValidators;
+ }
+
+ @Override
+ public List<NCEntityValidator> getEntityValidators() {
+ return entityValidators;
+ }
+
+ @Override
+ public List<NCVariantValidator> getVariantValidators() {
+ return variantsValidators;
+ }
+
+ @Override
+ public String getId() {
+ return id;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String getVersion() {
+ return version;
+ }
+
+ @Override
+ public String getDescription() {
+ return description != null ? description :
super.getDescription();
+ }
+
+ @Override
+ public String getOrigin() {
+ return origin != null ? origin : super.getOrigin();
+ }
+ };
+ }
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala
new file mode 100644
index 0000000..22419fb
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.internal
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCUtils
+import java.util.concurrent.*
+import java.util.List as JList
+import java.util.Map as JMap
+
+// TODO: move it to right package.
+class NCModelClientImpl(mdl: NCModel) extends NCLifecycle:
+ /**
+ *
+ * @throws NCException */
+ private def verify(): Unit = () // TODO:
+
+ /**
+ *
+ * @param s
+ * @param list
+ * @param cfg
+ * @tparam T
+ */
+ private def start[T <: NCLifecycle](s: ExecutorService, list: JList[T],
cfg: NCModelConfig): Unit =
+ assert(s != null)
+
+ if list != null then list.forEach(p => s.execute(() => p.start(cfg)))
+
+ /**
+ *
+ * @param s
+ * @param list
+ * @tparam T
+ */
+ private def stop[T <: NCLifecycle](s: ExecutorService, list: JList[T]):
Unit =
+ assert(s != null)
+
+ if list != null then list.forEach(p => s.execute(() => p.stop()))
+
+ /**
+ *
+ * @param s
+ */
+ private def stopExecutorService(s: ExecutorService): Unit =
+ try
+ s.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS)
+ catch
+ case e: InterruptedException => throw new NCException("Thread
interrupted.", e)
+
+ /**
+ *
+ * @return
+ */
+ private def getExecutorService: ExecutorService =
Executors.newFixedThreadPool(Runtime.getRuntime.availableProcessors)
+
+ override def start(cfg: NCModelConfig): Unit =
+ verify()
+
+ cfg.getTokenParser.start(cfg)
+
+ val s = getExecutorService
+
+ try
+ start(s, cfg.getEntityParsers, cfg)
+ start(s, cfg.getEntityEnrichers, cfg)
+ start(s, cfg.getTokenEnrichers, cfg)
+ start(s, cfg.getTokenValidators, cfg)
+ start(s, cfg.getEntityValidators, cfg)
+ start(s, cfg.getVariantValidators, cfg)
+ finally
+ stopExecutorService(s)
+
+ override def stop(): Unit = {
+ val cfg = mdl.getConfig
+
+ val s = getExecutorService
+
+ try
+ stop(s, cfg.getVariantValidators)
+ stop(s, cfg.getEntityValidators)
+ stop(s, cfg.getTokenValidators)
+ stop(s, cfg.getTokenEnrichers)
+ stop(s, cfg.getEntityEnrichers)
+ stop(s, cfg.getEntityParsers)
+ stop(s, cfg.getTokenEnrichers)
+ finally
+ stopExecutorService(s)
+
+ cfg.getTokenParser.stop()
+ }
+
+ // TODO: implement
+ def ask(txt: String, data: JMap[String, AnyRef], usrId: String):
CompletableFuture[NCResult] = null
+ def askSync(txt: String, data: JMap[String, AnyRef], usrId: String):
NCResult = null
+ def clearConversation(usrId: String): Unit = ()
+ def clearDialog(usrId: String): Unit = ()
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
index 2c59def..59357e5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
@@ -55,13 +55,14 @@ public class NCEnOpenNlpTokenParser implements
NCTokenParser {
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @throws NCException
*/
- public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
+ public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc, NCEnOpenNlpTokenParserStemmer stemmer) {
Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be
null.");
Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
+ Objects.requireNonNull(stemmer, "Stemmer cannot be null.");
try {
- impl = new NCEnOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc);
+ impl = new NCEnOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc,
stemmer);
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java
new file mode 100644
index 0000000..57ab323
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
+
+public interface NCEnOpenNlpTokenParserStemmer {
+ String stem(String s);
+}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
index f7714a3..60d2172 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
@@ -24,6 +24,7 @@ import opennlp.tools.stemmer.*
import opennlp.tools.tokenize.*
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCUtils
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParserStemmer
import java.io.*
import java.util
@@ -38,15 +39,13 @@ import scala.jdk.CollectionConverters.*
* @param posMdlSrc
* @param lemmaDicSrc
*/
-class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String, lemmaDicSrc: String)
extends NCTokenParser with LazyLogging:
- @volatile private var stemmer: PorterStemmer = _
+class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String, lemmaDicSrc: String,
stemmer: NCEnOpenNlpTokenParserStemmer) extends NCTokenParser with LazyLogging:
@volatile var tagger: POSTaggerME = _
@volatile var lemmatizer: DictionaryLemmatizer = _
@volatile var tokenizer: TokenizerME = _
override def start(cfg: NCModelConfig): Unit =
NCUtils.execPar(
- () => stemmer = new PorterStemmer,
() =>
tagger = new POSTaggerME(new
POSModel(NCUtils.getStream(posMdlSrc)))
logger.trace(s"Loaded resource: $posMdlSrc")
@@ -65,7 +64,6 @@ class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String,
lemmaDicSrc: String) e
lemmatizer = null
tagger = null
tokenizer = null
- stemmer = null
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java
new file mode 100644
index 0000000..2c8c9e8
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java
@@ -0,0 +1,13 @@
+package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl;
+
+import opennlp.tools.stemmer.PorterStemmer;
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParserStemmer;
+
+public class NCEnOpenNlpTokenParserStemmerImpl implements
NCEnOpenNlpTokenParserStemmer {
+ private PorterStemmer stemmer = new PorterStemmer();
+
+ @Override
+ public String stem(String s) {
+ return stemmer.stem(s);
+ }
+}
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index c30f536..82f213d 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -19,6 +19,7 @@ package
org.apache.nlpcraft.nlp.benchmark.token.parser.opennlp;
import org.apache.nlpcraft.nlp.benchmark.NCBenchmarkAdapter;
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser;
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.NCEnOpenNlpTokenParserStemmerImpl;
import org.apache.nlpcraft.nlp.util.NCTestUtils;
import org.junit.jupiter.api.Disabled;
import org.openjdk.jmh.annotations.*;
@@ -54,7 +55,8 @@ public class NCEnOpenNlpTokenParserBenchmark extends
NCBenchmarkAdapter {
NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
- "opennlp/en-lemmatizer.dict"
+ "opennlp/en-lemmatizer.dict",
+ new NCEnOpenNlpTokenParserStemmerImpl()
);
p.start(null); // TODO: fix it.
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index 5e32ee1..4934c11 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -18,7 +18,8 @@
package org.apache.nlpcraft.nlp.util
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
+import org.apache.nlpcraft.nlp.token.parser.opennlp.en.*
+import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.*
import java.util.{Optional, ArrayList as JAList, List as JList}
@@ -31,7 +32,8 @@ object NCTestConfig:
new NCEnOpenNlpTokenParser(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
- "opennlp/en-lemmatizer.dict"
+ "opennlp/en-lemmatizer.dict",
+ new NCEnOpenNlpTokenParserStemmerImpl()
)
override def getTokenParser: NCTokenParser = p