This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new ebd6a60 Model builder refactoring.
ebd6a60 is described below
commit ebd6a60724a20e26d633a01fedaaed2fb8a612e8
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Mar 16 22:30:39 2022 +0300
Model builder refactoring.
---
.../apache/nlpcraft/examples/time/EchoModel.java | 2 +-
.../lightswitch/LightSwitchGroovyModel.groovy | 2 +-
.../examples/lightswitch/LightSwitchJavaModel.java | 2 +-
.../examples/lightswitch/LightSwitchKotlinModel.kt | 2 +-
.../lightswitch/LightSwitchScalaModel.scala | 2 +-
.../apache/nlpcraft/examples/time/TimeModel.java | 2 +-
.../apache/nlpcraft/NCModelPipelineBuilder.java | 107 ++++++++++-----------
.../apache/nlpcraft/nlp/NCEntityEnricherSpec.scala | 4 +-
.../nlpcraft/nlp/NCEntityValidatorSpec.scala | 4 +-
.../apache/nlpcraft/nlp/NCTokenEnricherSpec.scala | 4 +-
.../apache/nlpcraft/nlp/NCTokenValidatorSpec.scala | 4 +-
.../apache/nlpcraft/nlp/NCVariantFilterSpec.scala | 4 +-
12 files changed, 74 insertions(+), 65 deletions(-)
diff --git
a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
index a514717..9715f70 100644
---
a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
+++
b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
@@ -39,7 +39,7 @@ public class EchoModel extends NCModelAdapter {
public EchoModel() {
super(
new NCModelConfig("nlpcraft.echo.ex", "Echo Example Model", "1.0"),
- new NCModelPipelineBuilder().withLanguage("EN").build()
+ new NCModelPipelineBuilder().build()
);
}
diff --git
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
index 27d3999..722d2c1 100644
---
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
+++
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
@@ -35,7 +35,7 @@ class LightSwitchGroovyModel extends NCModelAdapter {
LightSwitchGroovyModel() {
super(
new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch
Example Model", "1.0"),
- new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN",
"lightswitch_model.yaml").build()
+ new NCModelPipelineBuilder().withSemantic("EN",
"lightswitch_model.yaml").build()
)
}
diff --git
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
index b3058b8..eb050a8 100644
---
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
+++
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
@@ -38,7 +38,7 @@ public class LightSwitchJavaModel extends NCModelAdapter {
public LightSwitchJavaModel() {
super(
new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch
Example Model", "1.0"),
- new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN",
"lightswitch_model.yaml").build()
+ new NCModelPipelineBuilder().withSemantic("EN",
"lightswitch_model.yaml").build()
);
}
diff --git
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
index 72a4e58..0082944 100644
---
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
+++
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
@@ -37,7 +37,7 @@ import java.util.stream.Collectors
*/
class LightSwitchKotlinModel : NCModelAdapter(
NCModelConfig("nlpcraft.lightswitch.kotlin.ex", "LightSwitch Example
Model", "1.0"),
- NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN",
"lightswitch_model.yaml").build()
+ NCModelPipelineBuilder().withSemantic("EN",
"lightswitch_model.yaml").build()
) {
/**
* Intent and its on-match callback.
diff --git
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
index 451dc4d..0852975 100644
---
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
+++
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
@@ -40,7 +40,7 @@ import
org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
class LightSwitchScalaModel extends NCModelAdapter(
new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example
Model", "1.0"),
- new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN",
"lightswitch_model.yaml").build()
+ new NCModelPipelineBuilder().withSemantic("EN",
"lightswitch_model.yaml").build()
):
/**
* Intent and its on-match callback.
diff --git
a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
index 365f503..127c464 100644
---
a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
+++
b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
@@ -75,7 +75,7 @@ public class TimeModel extends NCModelAdapter {
public TimeModel() {
super(
new NCModelConfig("nlpcraft.time.ex", "Time Example Model", "1.0"),
- new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN",
"time_model.yaml").build()
+ new NCModelPipelineBuilder().withSemantic("EN",
"time_model.yaml").build()
);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index b8c08fa..1fd8500 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -49,6 +49,30 @@ public class NCModelPipelineBuilder {
private Optional<NCVariantFilter> varFilter = Optional.empty();
/**
+ *
+ * @return
+ */
+ private static NCSemanticStemmer mkEnStemmer() {
+ return new NCSemanticStemmer() {
+ private final PorterStemmer ps = new PorterStemmer();
+
+ @Override
+ public synchronized String stem(String txt) {
+ return ps.stem(txt.toLowerCase()); // TODO:
+ }
+ };
+ }
+
+ /**
+ *
+ * @return
+ */
+ private NCOpenNLPTokenParser mkEnOpenNlpTokenParser() {
+ return new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
+ }
+
+
+ /**
* @param tokEnrichers
* @return This instance for call chaining.
*/
@@ -196,31 +220,21 @@ public class NCModelPipelineBuilder {
return this;
}
- public NCModelPipelineBuilder withLanguage(String lang) {
- Objects.requireNonNull(lang, "Language cannot be null.");
-
- switch (lang.toUpperCase()) {
- case "EN":
- tokParser = new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
-
- tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- ));
- tokEnrichers.add(new NCEnStopWordsTokenEnricher());
- tokEnrichers.add(new
NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
- tokEnrichers.add(new NCEnQuotesTokenEnricher());
- tokEnrichers.add(new NCEnDictionaryTokenEnricher());
- tokEnrichers.add(new NCEnBracketsTokenEnricher());
-
- this.entParsers.addAll(entParsers);
-
- break;
- default:
- throw new IllegalArgumentException("Unsupported language: " +
lang);
- }
-
- return this;
+ /**
+ *
+ */
+ private void setEnComponents() {
+ tokParser = mkEnOpenNlpTokenParser();
+
+ tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher(
+ NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+ NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
+ ));
+ tokEnrichers.add(new NCEnStopWordsTokenEnricher());
+ tokEnrichers.add(new
NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
+ tokEnrichers.add(new NCEnQuotesTokenEnricher());
+ tokEnrichers.add(new NCEnDictionaryTokenEnricher());
+ tokEnrichers.add(new NCEnBracketsTokenEnricher());
}
/**
@@ -231,23 +245,15 @@ public class NCModelPipelineBuilder {
* @return
*/
public NCModelPipelineBuilder withSemantic(String lang, Map<String,
String> macros, List<NCSemanticElement> elms) {
+ Objects.requireNonNull(lang, "Language cannot be null.");
+ Objects.requireNonNull(elms, "Model elements cannot be null.");
+ if (elms.isEmpty()) throw new IllegalArgumentException("Model elements
cannot be empty.");
+
switch (lang.toUpperCase()) {
case "EN":
- this.entParsers.add(
- new NCSemanticEntityParser(
- new NCSemanticStemmer() {
- private final PorterStemmer ps = new
PorterStemmer();
-
- @Override
- public synchronized String stem(String txt) {
- return ps.stem(txt.toLowerCase()); // TODO:
- }
- },
- new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
- macros,
- elms
- )
- );
+ setEnComponents();
+
+ this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(),
mkEnOpenNlpTokenParser(), macros, elms));
break;
@@ -275,22 +281,14 @@ public class NCModelPipelineBuilder {
* @return
*/
public NCModelPipelineBuilder withSemantic(String lang, String src) {
+ Objects.requireNonNull(lang, "Language cannot be null.");
+ Objects.requireNonNull(src, "Model source cannot be null.");
+
switch (lang.toUpperCase()) {
case "EN":
- this.entParsers.add(
- new NCSemanticEntityParser(
- new NCSemanticStemmer() {
- private final PorterStemmer ps = new
PorterStemmer();
-
- @Override
- public synchronized String stem(String txt) {
- return ps.stem(txt.toLowerCase()); // TODO:
- }
- },
- new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
- src
- )
- );
+ setEnComponents();
+
+ this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(),
mkEnOpenNlpTokenParser(), src));
break;
@@ -301,6 +299,7 @@ public class NCModelPipelineBuilder {
return this;
}
+
/**
* @return
*/
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
index f5414e4..8f05dde 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
@@ -18,7 +18,9 @@
package org.apache.nlpcraft.nlp
import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.NCTestUtils
import org.junit.jupiter.api.Test
@@ -38,7 +40,7 @@ class NCEntityEnricherSpec:
private def mkBuilder(): NCModelPipelineBuilder =
new NCModelPipelineBuilder().
- withLanguage("EN").
+ withTokenParser(new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
// For intents matching, we have to add at least one entity
parser.
withEntityParser(new NCNLPEntityParser)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
index de6070f..0b40526 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
@@ -18,6 +18,8 @@
package org.apache.nlpcraft.nlp
import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.NCTestUtils
import org.junit.jupiter.api.Test
@@ -34,7 +36,7 @@ class NCEntityValidatorSpec:
NCTestUtils.askSomething(mdl, ok)
- private def mkBuilder(): NCModelPipelineBuilder = new
NCModelPipelineBuilder().withLanguage("EN")
+ private def mkBuilder(): NCModelPipelineBuilder = new
NCModelPipelineBuilder().withTokenParser(new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")))
private def mkPipeline(apply: NCModelPipelineBuilder =>
NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build()
@Test
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
index 485369d..802742c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
@@ -18,7 +18,9 @@
package org.apache.nlpcraft.nlp
import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.NCTestUtils
import org.junit.jupiter.api.Test
@@ -38,7 +40,7 @@ class NCTokenEnricherSpec:
private def mkBuilder(): NCModelPipelineBuilder =
new NCModelPipelineBuilder().
- withLanguage("EN").
+ withTokenParser(new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
// For intents matching, we have to add at least one entity
parser.
withEntityParser(new NCNLPEntityParser)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
index e7b0a4a..e9c938c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
@@ -18,6 +18,8 @@
package org.apache.nlpcraft.nlp
import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.NCTestUtils
import org.junit.jupiter.api.Test
@@ -34,7 +36,7 @@ class NCTokenValidatorSpec:
NCTestUtils.askSomething(mdl, ok)
- private def mkBuilder(): NCModelPipelineBuilder = new
NCModelPipelineBuilder().withLanguage("EN")
+ private def mkBuilder(): NCModelPipelineBuilder = new
NCModelPipelineBuilder().withTokenParser(new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")))
private def mkPipeline(apply: NCModelPipelineBuilder =>
NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build()
@Test
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
index a9274be..d5b7142 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
@@ -18,7 +18,9 @@
package org.apache.nlpcraft.nlp
import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.NCTestUtils
import org.junit.jupiter.api.Test
@@ -39,7 +41,7 @@ class NCVariantFilterSpec:
private def mkBuilder(): NCModelPipelineBuilder =
new NCModelPipelineBuilder().
- withLanguage("EN").
+ withTokenParser(new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
// For intents matching, we have to add at least one entity
parser.
withEntityParser(new NCNLPEntityParser)