This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-472 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push: new 480f0ae WIP. 480f0ae is described below commit 480f0aecd07a0b035bc6186fcbbea3de5be1db0b Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Tue Jan 11 23:03:34 2022 +0300 WIP. --- .../parser/stanford/NCStanfordEntityParser.java | 10 ++ .../stanford/NCStanfordEntityParserSpec.scala | 4 +- .../stanford/NCStanfordTokenParserSpec.scala | 2 +- .../nlpcraft/nlp/utils/NCStanfordTestConfig.scala | 4 +- .../org/apache/nlpcraft/NCEntityEnricher.java | 2 +- .../scala/org/apache/nlpcraft/NCEntityParser.java | 2 +- .../org/apache/nlpcraft/NCEntityValidator.java | 2 +- .../{NCTokenParser.java => NCLifecycle.java} | 21 ++-- .../main/scala/org/apache/nlpcraft/NCModel.java | 6 ++ .../scala/org/apache/nlpcraft/NCModelAdapter.java | 16 ++- .../scala/org/apache/nlpcraft/NCModelClient.java | 4 + .../scala/org/apache/nlpcraft/NCModelConfig.java | 78 +++++++-------- .../{NCTokenParser.java => NCModelPipeline.java} | 43 +++++++- ...figBuilder.java => NCModelPipelineBuilder.java} | 109 ++++----------------- .../scala/org/apache/nlpcraft/NCTokenEnricher.java | 2 +- .../scala/org/apache/nlpcraft/NCTokenParser.java | 1 - .../org/apache/nlpcraft/NCTokenValidator.java | 2 +- .../org/apache/nlpcraft/NCVariantValidator.java | 2 +- .../nlpcraft/internal/NCModelClientImpl.scala | 40 ++++++-- .../nlpcraft/internal/NCRequestProcessor.scala | 20 ++-- .../apache/nlpcraft/internal/util/NCUtils.scala | 16 ++- .../nlp/entity/parser/nlp/NCNlpEntityParser.java | 10 ++ .../parser/opennlp/NCOpenNlpEntityParser.java | 10 ++ .../opennlp/impl/NCOpenNlpEntityParserImpl.scala | 4 +- .../parser/semantic/NCSemanticEntityParser.java | 9 ++ .../semantic/impl/NCSemanticEntityParserImpl.scala | 4 +- .../token/enricher/en/NCBracketsTokenEnricher.java | 10 ++ .../enricher/en/NCDictionaryTokenEnricher.java | 10 ++ .../token/enricher/en/NCLanguageTokenEnricher.java | 10 ++ .../token/enricher/en/NCQuotesTokenEnricher.java | 10 ++ .../enricher/en/NCStopWordsTokenEnricher.java | 11 ++- .../en/N\320\241SwearWordsTokenEnricher.java" | 10 ++ .../token/enricher/en/impl/NCDictionaryImpl.scala | 4 +- .../token/enricher/en/impl/NCStopWordsImpl.scala | 4 +- .../nlpcraft/internal/NCRequestProcessorSpec.scala | 20 ++-- .../entity/parser/nlp/NCNlpEntityParserSpec.scala | 4 +- .../parser/opennlp/NCOpenNlpEntityParserSpec.scala | 4 +- .../semantic/NCSemanticEntityParserJsonSpec.scala | 6 +- .../semantic/NCSemanticEntityParserSpec.scala | 14 +-- .../semantic/NCSemanticEntityParserYamlSpec.scala | 6 +- .../enricher/en/NCBracketsTokenEnricherSpec.scala | 4 +- .../en/NCDictionaryTokenEnricherSpec.scala | 4 +- .../enricher/en/NCLanguageTokenEnricherSpec.scala | 2 +- .../enricher/en/NCQuotesTokenEnricherSpec.scala | 4 +- .../enricher/en/NCStopWordsEnricherSpec.scala | 4 +- .../en/NCSwearWordsTokenEnricherSpec.scala | 2 +- .../parser/opennlp/NCOpenNlpTokenParserSpec.scala | 4 +- .../apache/nlpcraft/nlp/util/NCTestConfig.scala | 14 ++- .../apache/nlpcraft/nlp/util/NCTestRequest.scala | 2 +- 49 files changed, 353 insertions(+), 233 deletions(-) diff --git a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java index 443cb8a..c676d37 100644 --- a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java +++ b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java @@ -74,4 +74,14 @@ public class NCStanfordEntityParser implements NCEntityParser { public List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { return impl.parse(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala index 8cd23f4..18c4b18 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala @@ -36,10 +36,10 @@ class NCStanfordEntityParserSpec: def test(): Unit = val txt = "Los Angeles, 23 August, 23 and sergeyka...@apache.org" - val toks = NCStanfordTestConfig.EN.getTokenParser.tokenize(txt) + val toks = NCStanfordTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) NCTestUtils.printTokens(toks.asScala.toSeq) - val res = parser.parse(NCTestRequest(txt), NCStanfordTestConfig.EN, toks) + val res = parser.parse(NCTestRequest(txt), NCTestConfig.CFG, toks) NCTestUtils.printEntities(txt, res.asScala.toSeq) require(res.size() == 4) \ No newline at end of file diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala index e55272d..24fd34f 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala @@ -32,7 +32,7 @@ class NCStanfordTokenParserSpec: @Test def test(): Unit = val toks = - NCStanfordTestConfig.EN.getTokenParser.tokenize("I had a lunch with brand names 'AAA'").asScala.toSeq + NCStanfordTestConfig.EN_PIPELINE.getTokenParser.tokenize("I had a lunch with brand names 'AAA'").asScala.toSeq require(toks.length > 1) NCTestUtils.printTokens(toks) diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/utils/NCStanfordTestConfig.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/utils/NCStanfordTestConfig.scala index ec145bf..937a339 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/utils/NCStanfordTestConfig.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/utils/NCStanfordTestConfig.scala @@ -22,7 +22,7 @@ import org.apache.nlpcraft.* import org.apache.nlpcraft.nlp.entity.parser.stanford.NCStanfordEntityParser import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParser import org.apache.nlpcraft.nlp.token.parser.stanford.NCStanfordTokenParser -import org.apache.nlpcraft.nlp.util.NCTestConfig +import org.apache.nlpcraft.nlp.util.NCTestPipeline import java.util.{Optional, Properties, ArrayList as JAList, List as JList} import scala.jdk.CollectionConverters.* @@ -36,4 +36,4 @@ object NCStanfordTestConfig: props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner") new StanfordCoreNLP(props) - final val EN = NCTestConfig(new NCStanfordTokenParser(STANFORD)) \ No newline at end of file + final val EN_PIPELINE = NCTestPipeline(new NCStanfordTokenParser(STANFORD)) \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java index 943168d..06a8d64 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java @@ -22,7 +22,7 @@ import java.util.*; /** * */ -public interface NCEntityEnricher { +public interface NCEntityEnricher extends NCLifecycle { /** * * @param req diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java index 46f1b55..ce4d7e7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java @@ -22,7 +22,7 @@ import java.util.List; /** * */ -public interface NCEntityParser { +public interface NCEntityParser extends NCLifecycle { /** * * @param req diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java index 72c9f94..b671761 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java @@ -22,7 +22,7 @@ import java.util.List; /** * TODO: */ -public interface NCEntityValidator { +public interface NCEntityValidator extends NCLifecycle { /** * TODO: Checks parsed entities and throws exceptions, if necessary. * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCLifecycle.java similarity index 80% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCLifecycle.java index 225728d..d45a1a2 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCLifecycle.java @@ -17,18 +17,23 @@ package org.apache.nlpcraft; -import java.util.List; - /** - * + * TODO: */ -public interface NCTokenParser { +public interface NCLifecycle { + /** + * + * @param cfg + */ + default void onStart(NCModelConfig cfg) { + // No-op. + } + /** - * TODO: request and config. * * @param cfg - * @param text - * @return */ - List<NCToken> tokenize(String text); + default void onStop(NCModelConfig cfg) { + // No-op. + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java index 881a11b..cd82e99 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java @@ -31,6 +31,12 @@ public interface NCModel { /** * + * @return + */ + NCModelPipeline getPipeline(); + + /** + * * @param vrn * @return */ diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelAdapter.java index ac8a1b5..4fdfa4f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelAdapter.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelAdapter.java @@ -17,22 +17,36 @@ package org.apache.nlpcraft; +import java.util.Objects; + /** * */ public class NCModelAdapter implements NCModel { private final NCModelConfig cfg; + private final NCModelPipeline pipeline; /** * * @param cfg + * @param pipeline */ - public NCModelAdapter(NCModelConfig cfg) { + public NCModelAdapter(NCModelConfig cfg, NCModelPipeline pipeline) { + // TODO: error texts. + Objects.requireNonNull(cfg, "Config cannot be null."); + Objects.requireNonNull(pipeline, "Pipeline cannot be null."); + this.cfg = cfg; + this.pipeline = pipeline; } @Override public NCModelConfig getConfig() { return cfg; } + + @Override + public NCModelPipeline getPipeline() { + return pipeline; + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java index 87bbbff..c7ad0e7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java @@ -79,4 +79,8 @@ public class NCModelClient { public void clearDialog(String usrId) { impl.clearDialog(usrId); } + + public void close() { + impl.close(); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java index 330e4ec..bd24105 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java @@ -22,70 +22,60 @@ import java.util.*; /** * */ -public interface NCModelConfig extends NCPropertyMap { - /** - * - * @return - */ - NCTokenParser getTokenParser(); - - /** - * - * @return - */ - List<NCTokenEnricher> getTokenEnrichers(); +public class NCModelConfig extends NCPropertyMapAdapter { + private final String id, name, version; + private String desc, origin; /** - * - * @return + * TODO: + * @param id + * @param name + * @param version */ - List<NCEntityEnricher> getEntityEnrichers(); + public NCModelConfig(String id, String name, String version) { + // TODO: error texts. + Objects.requireNonNull(id, "Id cannot be null."); + Objects.requireNonNull(name, "Name cannot be null."); + Objects.requireNonNull(version, "Version cannot be null."); - /** - * - * @return - */ - List<NCEntityParser> getEntityParsers(); - - /** - * - * @return - */ - List<NCTokenValidator> getTokenValidators(); + this.id = id; + this.name = name; + this.version = version; + } - /** - * - * @return - */ - List<NCEntityValidator> getEntityValidators(); - - /** - * - * @return - */ - List<NCVariantValidator> getVariantValidators(); + public NCModelConfig(String id, String name, String version, String desc, String origin) { + this(id, name, version); + this.desc = desc; + this.origin = origin; + } /** * Gets unique, <i>immutable</i> ID of this model. * * @return Unique, <i>immutable</i> ID of this model. */ - String getId(); + public String getId() { + return id; + } /** * Gets descriptive name of this model. * * @return Descriptive name for this model. */ - String getName(); + public String getName() { + return name; + } /** * Gets the version of this model using semantic versioning. * * @return A version compatible with (<a href="http://www.semver.org">www.semver.org</a>) specification. */ - String getVersion(); + public String getVersion() { + return version; + } /** * Gets optional short model description. This can be displayed by the management tools. @@ -93,8 +83,8 @@ public interface NCModelConfig extends NCPropertyMap { * * @return Optional short model description. Can return <code>null</code>. */ - default String getDescription() { - return null; + public String getDescription() { + return desc; } /** @@ -103,7 +93,7 @@ public interface NCModelConfig extends NCPropertyMap { * * @return Origin of this model like name of the class, file path or URL. */ - default String getOrigin() { - return getClass().getCanonicalName(); + public String getOrigin() { + return origin != null ? origin : getClass().getCanonicalName(); } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipeline.java similarity index 60% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipeline.java index 225728d..968ecdd 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipeline.java @@ -22,13 +22,46 @@ import java.util.List; /** * */ -public interface NCTokenParser { +public interface NCModelPipeline { /** - * TODO: request and config. * - * @param cfg - * @param text * @return */ - List<NCToken> tokenize(String text); + NCTokenParser getTokenParser(); + + /** + * + * @return + */ + List<NCTokenEnricher> getTokenEnrichers(); + + /** + * + * @return + */ + List<NCEntityEnricher> getEntityEnrichers(); + + /** + * + * @return + */ + List<NCEntityParser> getEntityParsers(); + + /** + * + * @return + */ + List<NCTokenValidator> getTokenValidators(); + + /** + * + * @return + */ + List<NCEntityValidator> getEntityValidators(); + + /** + * + * @return + */ + List<NCVariantValidator> getVariantValidators(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java similarity index 66% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java index 7d6ec00..a57c7ea 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java @@ -24,11 +24,8 @@ import java.util.Objects; /** * TODO: */ -public class NCModelConfigBuilder { - private final String id, name, version; - - private String desc, origin; - private NCTokenParser tokParser; +public class NCModelPipelineBuilder { + private final NCTokenParser tokParser; private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>(); private final List<NCEntityEnricher> entEnrichers = new ArrayList<>(); @@ -44,55 +41,18 @@ public class NCModelConfigBuilder { * @param name * @param version */ - public NCModelConfigBuilder(String id, String name, String version) { - // TODO: error texts. - Objects.requireNonNull(id, "Id cannot be null."); - Objects.requireNonNull(name, "Name cannot be null."); - Objects.requireNonNull(version, "Version cannot be null."); - - this.id = id; - this.name = name; - this.version = version; - } - - /** - * @param desc - * @return - */ - public NCModelConfigBuilder withDescription(String desc) { - this.desc = desc; - - return this; - } - - /** - * @param origin - * @return - */ - public NCModelConfigBuilder withOrigin(String origin) { - this.origin = origin; - - return this; - } - - /** - * @param tokParser - * @return - */ - public NCModelConfigBuilder withTokenParser(NCTokenParser tokParser) { + public NCModelPipelineBuilder(NCTokenParser tokParser) { // TODO: error texts. - Objects.requireNonNull(tokParser, "Parser cannot be null."); + Objects.requireNonNull(tokParser, "Token parser cannot be null."); this.tokParser = tokParser; - - return this; } /** * @param tokEnrichers * @return */ - public NCModelConfigBuilder withTokenEnrichers(List<NCTokenEnricher> tokEnrichers) { + public NCModelPipelineBuilder withTokenEnrichers(List<NCTokenEnricher> tokEnrichers) { // TODO: error texts. Objects.requireNonNull(tokEnrichers, "Enrichers cannot be null."); tokEnrichers.forEach(p -> Objects.requireNonNull(p, "Enrichers cannot be null.")); @@ -106,7 +66,7 @@ public class NCModelConfigBuilder { * @param tokEnricher * @return */ - public NCModelConfigBuilder withTokenEnricher(NCTokenEnricher tokEnricher) { + public NCModelPipelineBuilder withTokenEnricher(NCTokenEnricher tokEnricher) { // TODO: error texts. Objects.requireNonNull(tokEnricher, "Enricher cannot be null."); @@ -119,7 +79,7 @@ public class NCModelConfigBuilder { * @param entEnrichers * @return */ - public NCModelConfigBuilder withEntityEnrichers(List<NCEntityEnricher> entEnrichers) { + public NCModelPipelineBuilder withEntityEnrichers(List<NCEntityEnricher> entEnrichers) { // TODO: error texts. Objects.requireNonNull(entEnrichers, "Enrichers cannot be null."); entEnrichers.forEach(p -> Objects.requireNonNull(p, "Enrichers cannot be null.")); @@ -133,7 +93,7 @@ public class NCModelConfigBuilder { * @param entEnricher * @return */ - public NCModelConfigBuilder withEntityEnricher(NCEntityEnricher entEnricher) { + public NCModelPipelineBuilder withEntityEnricher(NCEntityEnricher entEnricher) { // TODO: error texts. Objects.requireNonNull(entEnricher, "Enricher cannot be null."); @@ -146,7 +106,7 @@ public class NCModelConfigBuilder { * @param entParsers * @return */ - public NCModelConfigBuilder withEntityParsers(List<NCEntityParser> entParsers) { + public NCModelPipelineBuilder withEntityParsers(List<NCEntityParser> entParsers) { // TODO: error texts. Objects.requireNonNull(entParsers, "Parsers cannot be null."); entParsers.forEach(p -> Objects.requireNonNull(p, "Parsers cannot be null.")); @@ -160,7 +120,7 @@ public class NCModelConfigBuilder { * @param entParser * @return */ - public NCModelConfigBuilder withEntityParser(NCEntityParser entParser) { + public NCModelPipelineBuilder withEntityParser(NCEntityParser entParser) { // TODO: error texts. Objects.requireNonNull(entParser, "Parser cannot be null."); @@ -173,7 +133,7 @@ public class NCModelConfigBuilder { * @param tokenValidators * @return */ - public NCModelConfigBuilder withTokenValidators(List<NCTokenValidator> tokenValidators) { + public NCModelPipelineBuilder withTokenValidators(List<NCTokenValidator> tokenValidators) { // TODO: error texts. Objects.requireNonNull(tokenValidators, "Validators cannot be null."); tokenValidators.forEach(p -> Objects.requireNonNull(p, "Validators cannot be null.")); @@ -187,7 +147,7 @@ public class NCModelConfigBuilder { * @param tokenValidator * @return */ - public NCModelConfigBuilder withTokenValidator(NCTokenValidator tokenValidator) { + public NCModelPipelineBuilder withTokenValidator(NCTokenValidator tokenValidator) { // TODO: error texts. Objects.requireNonNull(tokenValidator, "Validator cannot be null."); @@ -200,7 +160,7 @@ public class NCModelConfigBuilder { * @param entityValidators * @return */ - public NCModelConfigBuilder withEntityValidators(List<NCEntityValidator> entityValidators) { + public NCModelPipelineBuilder withEntityValidators(List<NCEntityValidator> entityValidators) { // TODO: error texts. Objects.requireNonNull(entityValidators, "Validators cannot be null."); entityValidators.forEach(p -> Objects.requireNonNull(p, "Validators cannot be null.")); @@ -214,7 +174,7 @@ public class NCModelConfigBuilder { * @param entityValidator * @return */ - public NCModelConfigBuilder withEntityValidator(NCEntityValidator entityValidator) { + public NCModelPipelineBuilder withEntityValidator(NCEntityValidator entityValidator) { Objects.requireNonNull(entityValidator, "Validators cannot be null."); this.entityValidators.add(entityValidator); @@ -226,7 +186,7 @@ public class NCModelConfigBuilder { * @param variantValidators * @return */ - public NCModelConfigBuilder withVariantValidators(List<NCVariantValidator> variantValidators) { + public NCModelPipelineBuilder withVariantValidators(List<NCVariantValidator> variantValidators) { Objects.requireNonNull(variantValidators, "Validators cannot be null."); variantValidators.forEach(p -> Objects.requireNonNull(p, "Validators cannot be null.")); @@ -239,7 +199,7 @@ public class NCModelConfigBuilder { * @param variantValidator * @return */ - public NCModelConfigBuilder withVariantValidator(NCVariantValidator variantValidator) { + public NCModelPipelineBuilder withVariantValidator(NCVariantValidator variantValidator) { Objects.requireNonNull(variantValidator, "Validator cannot be null."); this.variantValidators.add(variantValidator); @@ -250,33 +210,12 @@ public class NCModelConfigBuilder { /** * @return */ - public NCModelConfig build() { + public NCModelPipeline build() { // TODO: error texts and exception type. - if (tokParser == null) - throw new IllegalArgumentException("Token parser must be defined."); - else if (entParsers.isEmpty()) + if (entParsers.isEmpty()) throw new IllegalArgumentException("At least one entity parser must be defined."); - abstract class NCModelConfigImpl extends NCPropertyMapAdapter implements NCModelConfig { - // No.op - } - - return new NCModelConfigImpl() { - @Override - public String getId() { - return id; - } - - @Override - public String getName() { - return name; - } - - @Override - public String getVersion() { - return version; - } - + return new NCModelPipeline() { @Override public NCTokenParser getTokenParser() { return tokParser; @@ -311,16 +250,6 @@ public class NCModelConfigBuilder { public List<NCVariantValidator> getVariantValidators() { return variantValidators; } - - @Override - public String getDescription() { - return desc != null ? desc : super.getDescription(); - } - - @Override - public String getOrigin() { - return origin != null ? origin : super.getOrigin(); - } }; } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java index 31e6d09..ab78f76 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java @@ -22,7 +22,7 @@ import java.util.*; /** * */ -public interface NCTokenEnricher { +public interface NCTokenEnricher extends NCLifecycle { /** * * @param req diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java index 225728d..66200a6 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java @@ -24,7 +24,6 @@ import java.util.List; */ public interface NCTokenParser { /** - * TODO: request and config. * * @param cfg * @param text diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java index c9fd134..7a336d8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java @@ -22,7 +22,7 @@ import java.util.List; /** * TODO: */ -public interface NCTokenValidator { +public interface NCTokenValidator extends NCLifecycle { /** * TODO: Checks parsed tokens and throws exceptions, if necessary. * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java index ed1acdc..8f28ff5 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java @@ -22,7 +22,7 @@ import java.util.List; /** * TODO: */ -public interface NCVariantValidator { +public interface NCVariantValidator extends NCLifecycle { /** * TODO: Filters all found variants. * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala index 6f4c051..703fa6f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala @@ -26,6 +26,7 @@ import java.util.List as JList import java.util.Map as JMap import java.util.concurrent.atomic.AtomicReference import scala.concurrent.ExecutionContext +import scala.collection.mutable import scala.jdk.CollectionConverters.* // TODO: move it to right package. @@ -35,15 +36,18 @@ import scala.jdk.CollectionConverters.* * @param mdl */ class NCModelClientImpl(mdl: NCModel) extends LazyLogging: - verify(mdl.getConfig) + private var srvs: Seq[NCLifecycle] = _ - private val proc = NCRequestProcessor(mdl) + verifyAndStart(mdl.getConfig, mdl.getPipeline) + + private val reqProc = NCRequestProcessor(mdl) /** * - * @throws NCException + * @param cfg + * @param pipeline */ - private def verify(cfg: NCModelConfig): Unit = + private def verifyAndStart(cfg: NCModelConfig, pipeline: NCModelPipeline): Unit = // TODO: error texts. def check(obj: AnyRef, name: String): Unit = if obj == null then throw new NCException(s"Element cannot be null: '$name'") @@ -54,8 +58,19 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging: check(cfg.getId, "Id") check(cfg.getName, "Name") check(cfg.getVersion, "Version") - check(cfg.getTokenParser, "Token parser") - checkList(cfg.getEntityParsers, "Entity parsers") + check(pipeline.getTokenParser, "Token parser") + checkList(pipeline.getEntityParsers, "Entity parsers") + + val buf = mutable.ArrayBuffer.empty[NCLifecycle] ++ pipeline.getEntityParsers.asScala + if (pipeline.getTokenEnrichers != null) buf ++= pipeline.getTokenEnrichers.asScala + if (pipeline.getEntityEnrichers != null) buf ++= pipeline.getEntityEnrichers.asScala + if (pipeline.getTokenValidators != null) buf ++= pipeline.getTokenValidators.asScala + if (pipeline.getEntityValidators != null) buf ++= pipeline.getEntityValidators.asScala + if (pipeline.getVariantValidators != null) buf ++= pipeline.getVariantValidators.asScala + + srvs = buf.toSeq + + NCUtils.execPar(srvs.map(p => () => p.onStart(mdl.getConfig))*)(ExecutionContext.Implicits.global) /** * @@ -64,7 +79,7 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging: * @param usrId * @return */ - def ask(txt: String, data: JMap[String, AnyRef], usrId: String): CompletableFuture[NCResult] = proc.ask(txt, data, usrId) + def ask(txt: String, data: JMap[String, AnyRef], usrId: String): CompletableFuture[NCResult] = reqProc.ask(txt, data, usrId) /** * @@ -73,7 +88,7 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging: * @param usrId * @return */ - def askSync(txt: String, data: JMap[String, AnyRef], usrId: String): NCResult = proc.askSync(txt, data, usrId) + def askSync(txt: String, data: JMap[String, AnyRef], usrId: String): NCResult = reqProc.askSync(txt, data, usrId) // TODO: implement @@ -87,4 +102,11 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging: * * @param usrId */ - def clearDialog(usrId: String): Unit = () \ No newline at end of file + def clearDialog(usrId: String): Unit = () + + /** + * + */ + def close(): Unit = + reqProc.close() + NCUtils.execPar(srvs.map(p => () => p.onStop(mdl.getConfig))*)(ExecutionContext.Implicits.global) \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCRequestProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCRequestProcessor.scala index 263777e..8020db3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCRequestProcessor.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCRequestProcessor.scala @@ -49,6 +49,7 @@ class NCRequestProcessor(mdl: NCModel) extends LazyLogging : // TODO: shutdown. private val pool = new java.util.concurrent.ForkJoinPool() + private val pipeline = mdl.getPipeline private val cfg = mdl.getConfig private var tokParser: NCTokenParser = _ private var tokEnrichers: Seq[NCTokenEnricher] = _ @@ -65,13 +66,13 @@ class NCRequestProcessor(mdl: NCModel) extends LazyLogging : private def init(): Unit = def nvl[T](list: JList[T]): Seq[T] = if list == null then Seq.empty else list.asScala.toSeq - this.tokParser = cfg.getTokenParser - this.tokEnrichers = nvl(cfg.getTokenEnrichers) - this.entEnrichers = nvl(cfg.getEntityEnrichers) - this.entParsers = nvl(cfg.getEntityParsers) - this.tokenValidators = nvl(cfg.getTokenValidators) - this.entityValidators = nvl(cfg.getEntityValidators) - this.variantValidators = nvl(cfg.getVariantValidators) + this.tokParser = pipeline.getTokenParser + this.tokEnrichers = nvl(pipeline.getTokenEnrichers) + this.entEnrichers = nvl(pipeline.getEntityEnrichers) + this.entParsers = nvl(pipeline.getEntityParsers) + this.tokenValidators = nvl(pipeline.getTokenValidators) + this.entityValidators = nvl(pipeline.getEntityValidators) + this.variantValidators = nvl(pipeline.getVariantValidators) require(tokParser != null && entParsers.nonEmpty) @@ -184,3 +185,8 @@ class NCRequestProcessor(mdl: NCModel) extends LazyLogging : throw new NCException(txt) fut.completeAsync(() => matchAndExecute(prepare(txt, data, usrId, check))) + + /** + * + */ + def close(): Unit = NCUtils.shutdownPool(pool) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala index cb82486..653de1e 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala @@ -23,7 +23,7 @@ import org.apache.nlpcraft.internal.ansi.NCAnsi.* import java.io.* import java.net.* -import java.util.concurrent.CopyOnWriteArrayList +import java.util.concurrent.{CopyOnWriteArrayList, ExecutorService, TimeUnit} import java.util.regex.Pattern import java.util.zip.* import java.util.{Random, UUID} @@ -912,3 +912,17 @@ object NCUtils extends LazyLogging: if !errs.isEmpty then errs.forEach(e => logger.error("Error during service starting.", e)) // TODO: error message. throw new NCException("Some service cannot be started.") // TODO: error message. + + /** + * Shuts down executor service and waits for its finish. + * + * @param es Executor service. + */ + def shutdownPool(es: ExecutorService): Unit = + if es != null then + es.shutdown() + + try + es.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS) + catch + case _: InterruptedException => () // Safely ignore. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java index b3c52a2..53165e7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java @@ -43,4 +43,14 @@ public class NCNlpEntityParser implements NCEntityParser { public List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { return impl.parse(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java index b5b9663..465c941 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java @@ -67,4 +67,14 @@ public class NCOpenNlpEntityParser implements NCEntityParser { public List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { return impl.parse(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala index cd5228a..1fa9f28 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala @@ -41,11 +41,11 @@ class NCOpenNlpEntityParserImpl(resources: JList[String]) extends NCEntityParser private var finders: Seq[NameFinderME] = _ - start() + init() private case class Holder(start: Int, end: Int, name: String, probability: Double) - private def start(): Unit = + private def init(): Unit = val finders = mutable.ArrayBuffer.empty[NameFinderME] NCUtils.execPar( diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java index 00ed4f4..cb80fb0 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java @@ -88,4 +88,13 @@ public class NCSemanticEntityParser implements NCEntityParser { return impl.parse(req, cfg, toks); } + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala index 50d1118..e373199 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala @@ -159,9 +159,9 @@ class NCSemanticEntityParserImpl( private var synsHolder: NCSemanticSynonymsHolder = _ private var elemsMap: Map[String, NCSemanticElement] = _ - start() + init() - private def start(): Unit = + private def init(): Unit = val (macros, elements, elemsMap) = def toMap(elems: Seq[NCSemanticElement]): Map[String, NCSemanticElement] = elems.map(p => p.getId -> p).toMap diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java index 32d209d..4de1e7f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java @@ -36,4 +36,14 @@ public class NCBracketsTokenEnricher implements NCTokenEnricher { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java index f94f5fe..11d3bd7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java @@ -36,4 +36,14 @@ public class NCDictionaryTokenEnricher implements NCTokenEnricher { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java index ee9f3ce..e280521 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java @@ -36,4 +36,14 @@ public class NCLanguageTokenEnricher implements NCTokenEnricher { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java index 28ca259..9e30ae9 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java @@ -36,4 +36,14 @@ public class NCQuotesTokenEnricher implements NCTokenEnricher { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java index 6f20865..fedfbbd 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java @@ -43,10 +43,19 @@ public class NCStopWordsTokenEnricher implements NCTokenEnricher { impl = new NCStopWordsImpl(null, null); } - @Override public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" index 1befe90..544cb1c 100644 --- "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" +++ "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" @@ -48,4 +48,14 @@ public class NСSwearWordsTokenEnricher implements NCTokenEnricher { assert impl != null; impl.enrich(req, cfg, toks); } + + @Override + public void onStart(NCModelConfig cfg) { + impl.onStart(cfg); + } + + @Override + public void onStop(NCModelConfig cfg) { + impl.onStop(cfg); + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala index c4d910d..e581184 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala @@ -28,8 +28,8 @@ import java.util.List as JList class NCDictionaryImpl extends NCTokenEnricher: private var dict: Set[String] = _ - start() + init() - private def start(): Unit = dict = NCUtils.readResource("moby/354984si.ngl", "iso-8859-1").toSet + private def init(): Unit = dict = NCUtils.readResource("moby/354984si.ngl", "iso-8859-1").toSet override def enrich(req: NCRequest, cfg: NCModelConfig, toks: JList[NCToken]): Unit = toks.forEach(t => t.put("dict", dict.contains(t.getLemma))) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala index 13ef1d0..ec87e3f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala @@ -117,7 +117,7 @@ class NCStopWordsImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) ext private var stopWords: StopWordHolder = _ private var exceptions: StopWordHolder = _ - start() + init() private def read(path: String): Set[String] = NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet @@ -281,7 +281,7 @@ class NCStopWordsImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) ext /** * */ - private def start(): Unit = + private def init(): Unit = addStems = if addStopsSet == null then Set.empty else addStopsSet.asScala.toSet.map(stem) exclStems = if exclStopsSet == null then Set.empty else exclStopsSet.asScala.toSet.map(stem) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCRequestProcessorSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCRequestProcessorSpec.scala index 76aeddc..885a2c8 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCRequestProcessorSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCRequestProcessorSpec.scala @@ -38,13 +38,15 @@ class NCRequestProcessorSpec: @Test def test(): Unit = def test(txt: String, variantCnt: Int, elements: NCSemanticElement*): Unit = - val cfg = NCTestConfig.EN.clone() + val pipeline = NCTestConfig.EN_PIPELINE.clone() - val parser = new NCSemanticEntityParser(new NCEnPorterStemmer, cfg.getTokenParser, elements.asJava) - cfg.getEntityParsers.clear() - cfg.getEntityParsers.add(parser) + val parser = new NCSemanticEntityParser(new NCEnPorterStemmer, pipeline.getTokenParser, elements.asJava) + pipeline.getEntityParsers.clear() + pipeline.getEntityParsers.add(parser) - val res = new NCRequestProcessor(new NCModelAdapter(cfg)).prepare(txt, null, "userId", () => ()) + val res = new NCRequestProcessor( + new NCModelAdapter(NCTestConfig.CFG, pipeline) + ).prepare(txt, null, "userId", () => ()) println(s"Variants count: ${res.variants.size}") for ((v, idx) <- res.variants.zipWithIndex) @@ -57,9 +59,9 @@ class NCRequestProcessorSpec: test("t1 t2", 2, NCSemanticTestElement("t1", "t2"), NCSemanticTestElement("t2")) private def mkSlowModel(delayMs: Long, itersCnt: Int): NCModel = - val cfg = NCTestConfig.EN.clone() + val pipeline = NCTestConfig.EN_PIPELINE.clone() - cfg.getEntityParsers.clear() + pipeline.getEntityParsers.clear() def mkSlowParser(i: Int) = new NCEntityParser: @@ -68,9 +70,9 @@ class NCRequestProcessorSpec: Thread.sleep(delayMs) java.util.Collections.emptyList() - (0 until itersCnt).foreach(i => cfg.getEntityParsers.add(mkSlowParser(i))) + (0 until itersCnt).foreach(i => pipeline.getEntityParsers.add(mkSlowParser(i))) - new NCModelAdapter(cfg) + new NCModelAdapter(NCTestConfig.CFG, pipeline) @Test def testCancel(): Unit = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala index 152fc08..b14e90d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala @@ -38,8 +38,8 @@ class NCNlpEntityParserSpec: @Test def test(): Unit = val req = NCTestRequest("I had the lunch") - val toks = NCTestConfig.EN.getTokenParser.tokenize(req.txt) - val entities = parser.parse(req, NCTestConfig.EN, toks).asScala.toSeq + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(req.txt) + val entities = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(req.txt, entities) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala index 6e9d33d..d9f0035 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala @@ -46,8 +46,8 @@ class NCOpenNlpEntityParserSpec: private def check(txt: String, expected: String): Unit = val req = NCTestRequest(txt) - val toks = NCTestConfig.EN.getTokenParser.tokenize(txt) - val ents = parser.parse(req, NCTestConfig.EN, toks).asScala.toSeq + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) + val ents = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala index 7da5b39..bffef2c 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala @@ -37,7 +37,7 @@ import scala.jdk.OptionConverters.RichOptional class NCSemanticEntityParserJsonSpec: private val parser = new NCSemanticEntityParser( new NCEnPorterStemmer(), - NCTestConfig.EN.getTokenParser, + NCTestConfig.EN_PIPELINE.getTokenParser, "models/alarm_model.json" ) @@ -45,8 +45,8 @@ class NCSemanticEntityParserJsonSpec: val req = NCTestRequest(txt) val ents = parser.parse( req, - NCTestConfig.EN, - NCTestConfig.EN.getTokenParser.tokenize(req.txt) + NCTestConfig.CFG, + NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(req.txt) ).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala index b909816..93490eb 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala @@ -62,7 +62,7 @@ class NCSemanticEntityParserSpec: private val parser = new NCSemanticEntityParser( new NCEnPorterStemmer, - NCTestConfig.EN.getTokenParser, + NCTestConfig.EN_PIPELINE.getTokenParser, Seq( // Standard. NCSemanticTestElement("t1", synonyms = Set("t1")), @@ -95,13 +95,13 @@ class NCSemanticEntityParserSpec: txt: String, id: String, value: Option[String] = None, elemData: Option[Map[String, Any]] = None ): Unit = val req = NCTestRequest(txt) - val toks = NCTestConfig.EN.getTokenParser.tokenize(txt) + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) - stopWordsEnricher.enrich(req, NCTestConfig.EN, toks) + stopWordsEnricher.enrich(req, NCTestConfig.CFG, toks) NCTestUtils.printTokens(toks.asScala.toSeq) - val ents = parser.parse(req, NCTestConfig.EN, toks).asScala.toSeq + val ents = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) require(ents.size == 1) @@ -123,13 +123,13 @@ class NCSemanticEntityParserSpec: */ private def checkMultiple(txt: String, ids: String*): Unit = val req = NCTestRequest(txt) - val toks = NCTestConfig.EN.getTokenParser.tokenize(txt) + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) - stopWordsEnricher.enrich(req, NCTestConfig.EN, toks) + stopWordsEnricher.enrich(req, NCTestConfig.CFG, toks) NCTestUtils.printTokens(toks.asScala.toSeq) - val ents = parser.parse(req, NCTestConfig.EN, toks).asScala.toSeq + val ents = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) require(ents.size == ids.size) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala index 39981c8..f653a73 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala @@ -35,7 +35,7 @@ import scala.jdk.OptionConverters.RichOptional class NCSemanticEntityParserYamlSpec: private val parser = new NCSemanticEntityParser( new NCEnPorterStemmer, - NCTestConfig.EN.getTokenParser, + NCTestConfig.EN_PIPELINE.getTokenParser, "models/lightswitch_model.yaml" ) @@ -43,8 +43,8 @@ class NCSemanticEntityParserYamlSpec: val req = NCTestRequest(txt) val ents = parser.parse( req, - NCTestConfig.EN, - NCTestConfig.EN.getTokenParser.tokenize(req.txt) + NCTestConfig.CFG, + NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(req.txt) ).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala index 2514a0a..3980c89 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala @@ -36,8 +36,8 @@ class NCBracketsTokenEnricherSpec: * @param brackets */ private def check(txt: String, brackets: Set[Integer]): Unit = - val toks = NCTestConfig.EN.getTokenParser.tokenize(txt) - enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toks) + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) + enricher.enrich(NCTestRequest(txt), NCTestConfig.CFG, toks) val seq = toks.asScala.toSeq NCTestUtils.printTokens(seq) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala index c5f5a2b..0ad6673 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala @@ -31,12 +31,12 @@ class NCDictionaryTokenEnricherSpec: @Test def test(): Unit = - val toks = NCTestConfig.EN.getTokenParser.tokenize("milk XYZ").asScala.toSeq + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize("milk XYZ").asScala.toSeq require(toks.head.getOpt[Boolean]("dict:en").isEmpty) require(toks.last.getOpt[Boolean]("dict:en").isEmpty) - enricher.enrich(null, NCTestConfig.EN, toks.asJava) + enricher.enrich(null, NCTestConfig.CFG, toks.asJava) NCTestUtils.printTokens(toks) require(toks.head.get[Boolean]("dict")) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala index 6cd94c6..4bc27f6 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala @@ -31,7 +31,7 @@ class NCLanguageTokenEnricherSpec: @Test def test(): Unit = - val toks = NCTestConfig.EN.getTokenParser.tokenize("english русский").asScala.toSeq + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize("english русский").asScala.toSeq require(toks.head.getOpt[Boolean]("lang:en").isEmpty) require(toks.last.getOpt[Boolean]("lang:en").isEmpty) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala index 1b35559..199c934 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala @@ -36,10 +36,10 @@ class NCQuotesTokenEnricherSpec: * @param quotes */ private def check(txt: String, quotes: Set[Integer]): Unit = - val toks = NCTestConfig.EN.getTokenParser.tokenize(txt) + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) val toksSeq = toks.asScala.toSeq - enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toks) + enricher.enrich(NCTestRequest(txt), NCTestConfig.CFG, toks) NCTestUtils.printTokens(toksSeq) toksSeq.foreach (tok => require(!(tok.get[Boolean]("quoted") ^ quotes.contains(tok.getIndex)))) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala index 16f9646..0f5c849 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala @@ -36,13 +36,13 @@ class NCStopWordsEnricherSpec: * @param boolVals */ private def test(enricher: NCStopWordsTokenEnricher, txt: String, boolVals: Boolean*): Unit = - val toksList = NCTestConfig.EN.getTokenParser.tokenize(txt) + val toksList = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) require(toksList.size == boolVals.size) val toks = toksList.asScala.toSeq toks.foreach(tok => require(tok.getOpt[Boolean]("stopword").isEmpty)) - enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toksList) + enricher.enrich(NCTestRequest(txt), NCTestConfig.CFG, toksList) NCTestUtils.printTokens(toks) toks.zip(boolVals).foreach { (tok, boolVal) => require(tok.get[Boolean]("stopword") == boolVal) } diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala index 598e815..0d3435b 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala @@ -32,7 +32,7 @@ class NCSwearWordsTokenEnricherSpec: @Test def test(): Unit = - val toks = NCTestConfig.EN.getTokenParser.tokenize("english ass").asScala.toSeq + val toks = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize("english ass").asScala.toSeq require(toks.head.getOpt[Boolean]("swear:en").isEmpty) require(toks.last.getOpt[Boolean]("swear:en").isEmpty) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala index 08bc05c..1b6b5ee 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala @@ -35,9 +35,9 @@ class NCOpenNlpTokenParserSpec: private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword") private def test(txt: String, validate: Seq[NCToken] => _): Unit = - val toksList = NCTestConfig.EN.getTokenParser.tokenize(txt) + val toksList = NCTestConfig.EN_PIPELINE.getTokenParser.tokenize(txt) - enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toksList) + enricher.enrich(NCTestRequest(txt), NCTestConfig.CFG, toksList) val toks = toksList.asScala.toSeq assert(toks.nonEmpty) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala index bde08b7..b35b15d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.nlp.util import org.apache.nlpcraft.* import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParser -import org.apache.nlpcraft.nlp.util.NCTestConfig.* +import org.apache.nlpcraft.nlp.util.NCTestPipeline.* import java.util.{Optional, ArrayList as JAList, List as JList} @@ -27,12 +27,9 @@ import java.util.{Optional, ArrayList as JAList, List as JList} * * @param tokParser */ -case class NCTestConfig(tokParser: NCTokenParser) extends NCPropertyMapAdapter with NCModelConfig with Cloneable: +case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter with NCModelPipeline with Cloneable: require(tokParser != null) - override val getId = "testId" - override val getName = "test" - override val getVersion = "1.0" override val getTokenParser: NCTokenParser = tokParser override val getTokenEnrichers = new JAList[NCTokenEnricher]() override val getEntityEnrichers = new JAList[NCEntityEnricher]() @@ -41,8 +38,8 @@ case class NCTestConfig(tokParser: NCTokenParser) extends NCPropertyMapAdapter w override val getEntityValidators = new JAList[NCEntityValidator]() override val getVariantValidators = new JAList[NCVariantValidator]() - override def clone(): NCTestConfig = - val copy = NCTestConfig(this.tokParser) + override def clone(): NCTestPipeline = + val copy = NCTestPipeline(this.tokParser) copy.getTokenEnrichers.addAll(this.getTokenEnrichers) copy.getEntityEnrichers.addAll(this.getEntityEnrichers) @@ -55,7 +52,8 @@ case class NCTestConfig(tokParser: NCTokenParser) extends NCPropertyMapAdapter w /** * */ object NCTestConfig: - final val EN = NCTestConfig( + final val CFG = new NCModelConfig("testId", "test", "1.0") + final val EN_PIPELINE = NCTestPipeline( new NCOpenNlpTokenParser( "opennlp/en-token.bin", "opennlp/en-pos-maxent.bin", diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestRequest.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestRequest.scala index 1819ee7..ac568fa 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestRequest.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestRequest.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.nlp.util import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.util.NCTestConfig.* +import org.apache.nlpcraft.nlp.util.NCTestPipeline.* import java.util import java.util.Map as JMap