This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new c28f449 Tests infrastructure improved.
c28f449 is described below
commit c28f44965492ae2d7dd37c7b78ed727ea6f3b09e
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Mar 17 11:20:23 2022 +0300
Tests infrastructure improved.
---
.../parser/stanford/NCStanfordNLPTokenParser.java | 3 ++-
.../stanford/NCStanfordNLPEntityParserSpec.scala | 2 +-
.../stanford/NCStanfordNLPTokenParserSpec.scala | 3 +--
.../nlpcraft/nlp/util/stanford/NCTestConfig.scala | 5 +----
.../nlpcraft/internal/impl/NCModelClientSpec.scala | 4 +---
.../internal/impl/NCModelPingPongSpec.scala | 4 +---
.../internal/impl/NCModelPipelineManagerSpec.scala | 6 ++---
.../internal/impl/scan/NCTestModelJava.java | 2 +-
.../nlp/entity/parser/NCNLPEntityParserSpec.scala | 2 +-
.../entity/parser/NCOpenNLPEntityParserSpec.scala | 26 +++++++++++++---------
.../semantic/NCSemanticEntityParserJsonSpec.scala | 8 ++-----
.../semantic/NCSemanticEntityParserLemmaSpec.scala | 14 ++++--------
.../semantic/NCSemanticEntityParserSpec.scala | 24 ++++++++------------
.../semantic/NCSemanticEntityParserYamlSpec.scala | 8 ++-----
.../enricher/NCBracketsTokenEnricherSpec.scala | 2 +-
.../enricher/NCDictionaryTokenEnricherSpec.scala | 10 ++-------
.../token/enricher/NCQuotesTokenEnricherSpec.scala | 9 ++------
.../token/enricher/NCStopWordsEnricherSpec.scala | 9 ++------
.../enricher/NCSwearWordsTokenEnricherSpec.scala | 2 +-
.../token/enricher/impl/NCStopWordsImplSpec.scala | 2 +-
.../token/parser/NCOpenNLPTokenParserSpec.scala | 13 +++--------
.../apache/nlpcraft/nlp/util/NCTestConfig.scala | 12 ++++------
.../apache/nlpcraft/nlp/util/NCTestConfigJava.java | 16 +++++++------
.../nlpcraft/nlp/util/NCTestModelAdapter.scala | 4 ++--
.../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 4 ++--
25 files changed, 72 insertions(+), 122 deletions(-)
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
index 747ac3e..78cd92e 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
@@ -26,7 +26,8 @@ import java.util.List;
import java.util.Objects;
/**
- *
+ * TODO:
+ * Note that this rokenizer also addes 2 properties into parsed token: lemma
and pos
*/
public class NCStanfordNLPTokenParser implements NCTokenParser {
private final NCStanfordNLPTokenParserImpl impl;
diff --git
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
index 49caefa..13eeabd 100644
---
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
+++
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
@@ -34,7 +34,7 @@ class NCStanfordNLPEntityParserSpec:
def test(): Unit =
val txt = "Los Angeles, 23 August, 23 and [email protected],
tomorrow"
- val toks = EN_STANFORD_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = TOK_STANFORD_PARSER.tokenize(txt)
NCTestUtils.printTokens(toks.asScala.toSeq)
val res = parser.parse(NCTestRequest(txt), CFG, toks)
diff --git
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
index 204e0c5..157d7d2 100644
---
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
+++
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
@@ -31,8 +31,7 @@ import scala.jdk.CollectionConverters.*
class NCStanfordNLPTokenParserSpec:
@Test
def test(): Unit =
- val toks =
- EN_STANFORD_PIPELINE.getTokenParser.tokenize("I had a lunch with
brand names 'AAA'").asScala.toSeq
+ val toks = TOK_STANFORD_PARSER.tokenize("I had a lunch with brand
names 'AAA'").asScala.toSeq
require(toks.sizeIs > 1)
NCTestUtils.printTokens(toks)
diff --git
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
index 4e5f644..ea80fdc 100644
---
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
+++
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
@@ -17,7 +17,4 @@ final val STANFORD =
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner")
new StanfordCoreNLP(props)
-/**
- *
- */
-final val EN_STANFORD_PIPELINE = NCTestPipeline(new
NCStanfordNLPTokenParser(STANFORD))
+final val TOK_STANFORD_PARSER = new NCStanfordNLPTokenParser(STANFORD)
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
index eb3df69..5123c84 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
@@ -28,9 +28,7 @@ import scala.util.Using
class NCModelClientSpec:
private def test0(mdl: NCTestModelAdapter): Unit =
- mdl.getPipeline.getEntityParsers.add(
- NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
- )
+
mdl.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml"))
Using.resource(new NCModelClient(mdl)) { client =>
val res = client.ask("Lights on at second floor kitchen", null,
"userId")
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
index 2650993..31e4cf5 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
@@ -59,9 +59,7 @@ class NCModelPingPongSpec:
def onOther(im: NCIntentMatch, @NCIntentTerm("other") other:
NCEntity): NCResult =
R(ASK_RESULT, s"Some request by: ${other.mkText()}")
- MDL.getPipeline.getEntityParsers.add(
- NCTestUtils.mkENSemanticParser(Seq(STE("command"), STE("confirm"),
STE("other")).asJava)
- )
+
MDL.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(Seq(STE("command"),
STE("confirm"), STE("other")).asJava))
@BeforeEach
def setUp(): Unit = client = new NCModelClient(MDL)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
index 86ff1b5..bb78d4d 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
@@ -41,11 +41,9 @@ class NCModelPipelineManagerSpec:
@Test
def test(): Unit =
def test(txt: String, variantCnt: Int, elements: NCSemanticElement*):
Unit =
- val pipeline = EN_PIPELINE.clone()
+ val pipeline = mkEnPipeline
- val parser = NCTestUtils.mkENSemanticParser(elements.asJava)
- pipeline.getEntityParsers.clear()
- pipeline.getEntityParsers.add(parser)
+
pipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(elements.asJava))
val res = new NCModelPipelineManager(CFG, pipeline).prepare(txt,
null, "userId")
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
index 42dc72f..2289709 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
@@ -40,7 +40,7 @@ public class NCTestModelJava {
*/
public static NCModel mkModel() {
return
- new NCModelAdapter(NCTestConfigJava.CFG,
NCTestConfigJava.EN_PIPELINE) {
+ new NCModelAdapter(NCTestConfigJava.CFG,
NCTestConfigJava.mkEnPipeline()) {
@NCIntent(
"intent=locInt term(single)~{# == 'id1'} term(list)~{# ==
'id2'}[0,10] term(opt)~{# == 'id3'}?"
)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
index 4ace4ac..b652b9b 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
@@ -41,7 +41,7 @@ class NCNLPEntityParserSpec:
@Test
def test(): Unit =
val req = NCTestRequest("I had the lunch")
- val toks = EN_PIPELINE.getTokenParser.tokenize(req.txt)
+ val toks = EN_TOK_PARSER.tokenize(req.txt)
val entities = parser.parse(req, CFG, toks).asScala.toSeq
NCTestUtils.printEntities(req.txt, entities)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
index 1ab256c..cc15bca 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
@@ -33,16 +33,20 @@ import scala.jdk.OptionConverters.RichOptional
*
*/
class NCOpenNLPEntityParserSpec:
- private val parser = new NCOpenNLPEntityParser(
- Seq(
- "opennlp/en-ner-location.bin",
- "opennlp/en-ner-money.bin",
- "opennlp/en-ner-person.bin",
- "opennlp/en-ner-organization.bin",
- "opennlp/en-ner-date.bin",
- "opennlp/en-ner-percentage.bin"
- ).map(NCResourceReader.getPath).asJava
- )
+ private val parser =
+ val list = new java.util.concurrent.CopyOnWriteArrayList[String]()
+
+ NCUtils.execPar(
+ Seq(
+ "opennlp/en-ner-location.bin",
+ "opennlp/en-ner-money.bin",
+ "opennlp/en-ner-person.bin",
+ "opennlp/en-ner-organization.bin",
+ "opennlp/en-ner-date.bin",
+ "opennlp/en-ner-percentage.bin"
+ ).map(p => () =>
list.add(NCResourceReader.getPath(p)))*)(ExecutionContext.Implicits.global)
+
+ new NCOpenNLPEntityParser(list)
/**
*
@@ -51,7 +55,7 @@ class NCOpenNLPEntityParserSpec:
*/
private def check(txt: String, expected: String): Unit =
val req = NCTestRequest(txt)
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = EN_TOK_PARSER.tokenize(txt)
val ents = parser.parse(req, CFG, toks).asScala.toSeq
NCTestUtils.printEntities(txt, ents)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
index e588f4a..b11bcf5 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
@@ -34,7 +34,7 @@ import scala.jdk.OptionConverters.RichOptional
*
*/
class NCSemanticEntityParserJsonSpec:
- private val parser =
NCTestUtils.mkENSemanticParser("models/alarm_model.json")
+ private val semParser =
NCTestUtils.mkENSemanticParser("models/alarm_model.json")
/**
*
@@ -44,11 +44,7 @@ class NCSemanticEntityParserJsonSpec:
*/
private def check(txt: String, id: String, elemData: Option[Map[String,
Any]] = None): Unit =
val req = NCTestRequest(txt)
- val ents = parser.parse(
- req,
- CFG,
- EN_PIPELINE.getTokenParser.tokenize(req.txt)
- ).asScala.toSeq
+ val ents = semParser.parse(req, CFG,
EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq
NCTestUtils.printEntities(txt, ents)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
index d3e15b2..73f0fd8 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
@@ -35,12 +35,6 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCSemanticEntityParserLemmaSpec:
- private val lemmaTokEnricher = new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
- private val swTokEnricher = new NCEnStopWordsTokenEnricher
- private val tokParser = new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))
private val lemmaStemmer =
new NCSemanticStemmer():
override def stem(txt: String): String = if wrapped(txt) then
unwrap(txt) else UUID.randomUUID().toString
@@ -64,15 +58,15 @@ class NCSemanticEntityParserLemmaSpec:
val mgr = new NCModelPipelineManager(
CFG,
new NCModelPipelineBuilder().
- withTokenParser(tokParser).
- withTokenEnricher(lemmaTokEnricher).
- withTokenEnricher(swTokEnricher).
+ withTokenParser(EN_TOK_PARSER).
+ withTokenEnricher(EN_TOK_LEMMA_POS_ENRICHER).
+ withTokenEnricher(EN_TOK_STOP_ENRICHER).
// 1. Wraps lemmas.
withTokenEnricher((req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]) =>
toks.forEach(t => t.put("lemma",
wrap(t.get[String]("lemma"))))
).
// 2. Semantic parser with fixed stemmer which stems only
lemmas.
- withEntityParser(new NCSemanticEntityParser(lemmaStemmer,
tokParser, elems.asJava)).
+ withEntityParser(new NCSemanticEntityParser(lemmaStemmer,
EN_TOK_PARSER, elems.asJava)).
build()
)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 4c97e1f..d298673 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -35,7 +35,7 @@ import scala.jdk.OptionConverters.RichOptional
*/
class NCSemanticEntityParserSpec:
import NCSemanticTestElement as E
- private val parser =
+ private val semParser =
NCTestUtils.mkENSemanticParser(
Seq(
// Standard.
@@ -55,12 +55,6 @@ class NCSemanticEntityParserSpec:
).asJava
)
- private val stopWordsEnricher = new NCEnStopWordsTokenEnricher()
- private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
-
/**
*
* @param txt
@@ -70,14 +64,14 @@ class NCSemanticEntityParserSpec:
*/
private def check(txt: String, id: String, value: Option[String] = None,
elemData: Option[Map[String, Any]] = None): Unit =
val req = NCTestRequest(txt)
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = EN_TOK_PARSER.tokenize(txt)
- lemmaPosEnricher.enrich(req, CFG, toks)
- stopWordsEnricher.enrich(req, CFG, toks)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
+ EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks)
NCTestUtils.printTokens(toks.asScala.toSeq)
- val ents = parser.parse(req, CFG, toks).asScala.toSeq
+ val ents = semParser.parse(req, CFG, toks).asScala.toSeq
NCTestUtils.printEntities(txt, ents)
require(ents.sizeIs == 1)
@@ -99,14 +93,14 @@ class NCSemanticEntityParserSpec:
*/
private def checkMultiple(txt: String, ids: String*): Unit =
val req = NCTestRequest(txt)
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = EN_TOK_PARSER.tokenize(txt)
- lemmaPosEnricher.enrich(req, CFG, toks)
- stopWordsEnricher.enrich(req, CFG, toks)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
+ EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks)
NCTestUtils.printTokens(toks.asScala.toSeq)
- val ents = parser.parse(req, CFG, toks).asScala.toSeq
+ val ents = semParser.parse(req, CFG, toks).asScala.toSeq
NCTestUtils.printEntities(txt, ents)
require(ents.sizeIs == ids.size)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
index 9a1df76..3a4702c 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
@@ -32,7 +32,7 @@ import scala.jdk.OptionConverters.RichOptional
*
*/
class NCSemanticEntityParserYamlSpec:
- private val parser =
NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
+ private val semParser =
NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
/**
*
@@ -41,11 +41,7 @@ class NCSemanticEntityParserYamlSpec:
*/
private def check(txt: String, id: String): Unit =
val req = NCTestRequest(txt)
- val ents = parser.parse(
- req,
- CFG,
- EN_PIPELINE.getTokenParser.tokenize(req.txt)
- ).asScala.toSeq
+ val ents = semParser.parse(req, CFG,
EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq
NCTestUtils.printEntities(txt, ents)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
index e74c656..9b07783 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
@@ -36,7 +36,7 @@ class NCBracketsTokenEnricherSpec:
* @param brackets
*/
private def check(txt: String, brackets: Set[Integer]): Unit =
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = EN_TOK_PARSER.tokenize(txt)
enricher.enrich(NCTestRequest(txt), CFG, toks)
val seq = toks.asScala.toSeq
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
index 244690b..43c52e7 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
@@ -30,23 +30,17 @@ import scala.jdk.CollectionConverters.*
class NCDictionaryTokenEnricherSpec:
private val dictEnricher = new NCEnDictionaryTokenEnricher()
- private val lemmaPosEnricher =
- new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
-
@Test
def test(): Unit =
val txt = "milk XYZ"
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt).asScala.toSeq
+ val toks = EN_TOK_PARSER.tokenize(txt).asScala.toSeq
require(toks.head.getOpt[Boolean]("dict:en").isEmpty)
require(toks.last.getOpt[Boolean]("dict:en").isEmpty)
val req = NCTestRequest(txt)
- lemmaPosEnricher.enrich(req, CFG, toks.asJava)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks.asJava)
dictEnricher.enrich(req, CFG, toks.asJava)
NCTestUtils.printTokens(toks)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
index 700bfa7..0f6dbe6 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
@@ -29,11 +29,6 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCQuotesTokenEnricherSpec:
- private val lemmaPosEnricher =
- new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
private val quoteEnricher = new NCEnQuotesTokenEnricher
/**
@@ -42,11 +37,11 @@ class NCQuotesTokenEnricherSpec:
* @param quotes
*/
private def check(txt: String, quotes: Set[Integer]): Unit =
- val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toks = EN_TOK_PARSER.tokenize(txt)
val toksSeq = toks.asScala.toSeq
val req = NCTestRequest(txt)
- lemmaPosEnricher.enrich(req, CFG, toks)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
quoteEnricher.enrich(req, CFG, toks)
NCTestUtils.printTokens(toksSeq)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
index 517e30a..8bdd11a 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
@@ -30,11 +30,6 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCStopWordsEnricherSpec:
- private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
-
/**
*
* @param stopEnricher
@@ -42,7 +37,7 @@ class NCStopWordsEnricherSpec:
* @param boolVals
*/
private def test(stopEnricher: NCEnStopWordsTokenEnricher, txt: String,
boolVals: Boolean*): Unit =
- val toksList = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toksList = EN_TOK_PARSER.tokenize(txt)
require(toksList.size == boolVals.size)
val toks = toksList.asScala.toSeq
@@ -50,7 +45,7 @@ class NCStopWordsEnricherSpec:
val req = NCTestRequest(txt)
- lemmaPosEnricher.enrich(req, CFG, toksList)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList)
stopEnricher.enrich(req, CFG, toksList)
NCTestUtils.printTokens(toks)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
index 3493dcc..35ada43 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
@@ -33,7 +33,7 @@ class NCSwearWordsTokenEnricherSpec:
@Test
def test(): Unit =
- val toks = EN_PIPELINE.getTokenParser.tokenize("english
ass").asScala.toSeq
+ val toks = EN_TOK_PARSER.tokenize("english ass").asScala.toSeq
require(toks.head.getOpt[Boolean]("swear:en").isEmpty)
require(toks.last.getOpt[Boolean]("swear:en").isEmpty)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
index 7bdf7c4..50b7dfb 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
@@ -37,7 +37,7 @@ class NCStopWordsImplSpec:
* @param expected
*/
private def test0(words: Seq[W], expected: Seq[String]): Unit =
- val toksList =
EN_PIPELINE.getTokenParser.tokenize(words.map(_.text).mkString(" "))
+ val toksList = EN_TOK_PARSER.tokenize(words.map(_.text).mkString(" "))
require(toksList.size == words.size)
val toks = toksList.asScala
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
index d022350..9470db7 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
@@ -31,22 +31,15 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCOpenNLPTokenParserSpec:
- private val lemmaPosEnricher =
- new NCOpenNLPLemmaPosTokenEnricher(
- NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
- NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
- )
- private val stopEnricher = new NCEnStopWordsTokenEnricher(null, null)
-
private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
private def test(txt: String, validate: Seq[NCToken] => _): Unit =
- val toksList = EN_PIPELINE.getTokenParser.tokenize(txt)
+ val toksList = EN_TOK_PARSER.tokenize(txt)
val req = NCTestRequest(txt)
- lemmaPosEnricher.enrich(req, CFG, toksList)
- stopEnricher.enrich(req, CFG, toksList)
+ EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList)
+ EN_TOK_STOP_ENRICHER.enrich(req, CFG, toksList)
val toks = toksList.asScala.toSeq
assert(toks.nonEmpty)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index bb0dfdf..bd919b3 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -21,12 +21,8 @@ import org.apache.nlpcraft.NCModelConfig
import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.util.*
-/**
- *
- */
final val CFG = NCTestConfigJava.CFG
-
-/**
- *
- */
-final val EN_PIPELINE = NCTestConfigJava.EN_PIPELINE
+final val EN_TOK_PARSER = NCTestConfigJava.EN_TOK_PARSER
+final val EN_TOK_STOP_ENRICHER = NCTestConfigJava.EN_TOK_STOP_ENRICHER
+final val EN_TOK_LEMMA_POS_ENRICHER =
NCTestConfigJava.EN_TOK_LEMMA_POS_ENRICHER
+final def mkEnPipeline = NCTestConfigJava.mkEnPipeline()
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
index c75545d..bc9f79a 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
@@ -20,18 +20,20 @@ package org.apache.nlpcraft.nlp.util;
import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.internal.util.NCResourceReader;
import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser;
+import org.apache.nlpcraft.nlp.token.enricher.*;
/**
*
*/
public class NCTestConfigJava {
- /**
- *
- */
public static final NCModelConfig CFG = new NCModelConfig("testId",
"test", "1.0", "Test description", "Test origin");
-
- /** */
- public static final NCTestPipeline EN_PIPELINE = new NCTestPipeline(
- new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))
+ public static final NCOpenNLPTokenParser EN_TOK_PARSER = new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
+ public static final NCEnStopWordsTokenEnricher EN_TOK_STOP_ENRICHER = new
NCEnStopWordsTokenEnricher();
+ public static final NCOpenNLPLemmaPosTokenEnricher
EN_TOK_LEMMA_POS_ENRICHER = new NCOpenNLPLemmaPosTokenEnricher(
+ NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+ NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
);
+ public static final NCTestPipeline mkEnPipeline() {
+ return new NCTestPipeline(EN_TOK_PARSER);
+ }
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
index f088130..8393aef 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
@@ -24,5 +24,5 @@ import org.apache.nlpcraft.nlp.util.*
*
*/
abstract class NCTestModelAdapter extends NCModel:
- override def getConfig: NCModelConfig = CFG
- override def getPipeline: NCModelPipeline = EN_PIPELINE
\ No newline at end of file
+ override val getConfig: NCModelConfig = CFG
+ override val getPipeline: NCModelPipeline = mkEnPipeline
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index cf5aed4..f8ada63 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -134,7 +134,7 @@ object NCTestUtils:
new NCSemanticStemmer():
override def stem(txt: String): String =
s.stem(txt.toLowerCase)
,
- new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
+ EN_TOK_PARSER,
macros,
elms
)
@@ -151,6 +151,6 @@ object NCTestUtils:
new NCSemanticStemmer():
override def stem(txt: String): String =
s.stem(txt.toLowerCase)
,
- new
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
+ EN_TOK_PARSER,
src
)