This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 698f2e5 WIP.
698f2e5 is described below
commit 698f2e5a5d075626bd9364d1af0042c356c7716d
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 29 21:51:23 2021 +0300
WIP.
---
.../scala/org/apache/nlpcraft/NCModelClient.java | 4 +++
.../scala/org/apache/nlpcraft/NCModelConfig.java | 6 ++++
.../org/apache/nlpcraft/NCModelConfigAdapter.java | 14 +++++++--
.../main/scala/org/apache/nlpcraft/NCToken.java | 24 +---------------
.../nlp/entity/parser/semantic/NCStemmer.java | 3 ++
.../parser/opennlp/en/NCEnOpenNlpTokenParser.java | 10 +++----
.../parser/opennlp/impl/en/NCEnOpenNlpImpl.scala | 33 ++++++++--------------
.../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 6 ++--
.../parser/opennlp/NCOpenNlpEntityParserSpec.scala | 17 +++++------
.../semantic/NCSemanticEntityParserSpec.scala | 10 +++----
.../en/NCEnBracketsTokenEnricherSpec.scala | 7 ++---
.../en/NCEnDictionaryTokenEnricherSpec.scala | 3 +-
.../enricher/en/NCEnQuotesTokenEnricherSpec.scala | 13 ++++-----
.../opennlp/en/NCEnOpenNlpTokenParserSpec.scala | 8 ++----
.../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 13 ++-------
15 files changed, 70 insertions(+), 101 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
index 85f272b..a5482ff 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
@@ -74,6 +74,8 @@ public class NCModelClient implements NCLifecycle {
public void start(NCModelConfig cfg) {
verify();
+ cfg.getTokenizer().start(cfg);
+
ExecutorService s = getExecutorService();
try {
@@ -101,6 +103,8 @@ public class NCModelClient implements NCLifecycle {
finally {
stopExecutorService(s);
}
+
+ cfg.getTokenizer().stop();
}
/**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index 77116de..02e2883 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -52,6 +52,12 @@ public interface NCModelConfig extends NCPropertyMap {
*
* @return
*/
+ NCTokenizer getTokenizer();
+
+ /**
+ *
+ * @return
+ */
List<NCTokenParser> getTokenParsers();
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
index 943ce76..fe5ca2a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
@@ -27,7 +27,7 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
private final String id;
private final String name;
private final String version;
-
+ private final NCTokenizer tokernizer;
private final List<NCTokenParser> tokParsers = new ArrayList<>();
private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
private final List<NCEntityEnricher> entEnrichers = new ArrayList<>();
@@ -40,13 +40,18 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
* @param version
* @param tokParser
*/
- public NCModelConfigAdapter(String id, String name, String version,
NCTokenParser tokParser, NCEntityParser entParser) {
+ public NCModelConfigAdapter(String id, String name, String version,
NCTokenizer tokernizer, NCTokenParser tokParser, NCEntityParser entParser) {
+ Objects.requireNonNull(id, "ID cannot be null.");
+ Objects.requireNonNull(name, "Name cannot be null.");
+ Objects.requireNonNull(version, "Version cannot be null.");
+ Objects.requireNonNull(tokernizer, "Tokenizer cannot be null.");
Objects.requireNonNull(tokParser, "Token parser cannot be null.");
Objects.requireNonNull(entParser, "Entity parser cannot be null.");
this.id = id;
this.name = name;
this.version = version;
+ this.tokernizer = tokernizer;
tokParsers.add(tokParser);
entParsers.add(entParser);
@@ -126,4 +131,9 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
public List<NCEntityParser> getEntityParsers() {
return entParsers;
}
+
+ @Override
+ public NCTokenizer getTokenizer() {
+ return tokernizer;
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
index ff4ca24..d3fe623 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
@@ -20,11 +20,7 @@ package org.apache.nlpcraft;
/**
*
*/
-public interface NCToken extends NCPropertyMap {
- /**
- *
- * @return
- */
+public interface NCToken extends NCWord, NCPropertyMap {
String getText();
/**
@@ -55,23 +51,5 @@ public interface NCToken extends NCPropertyMap {
*
* @return
*/
- int getStartCharIndex();
-
- /**
- *
- * @return
- */
- int getEndCharIndex();
-
- /**
- *
- * @return
- */
- int getLength();
-
- /**
- *
- * @return
- */
int getIndex();
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCStemmer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCStemmer.java
index 11d5e8c..bffc30d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCStemmer.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCStemmer.java
@@ -17,6 +17,9 @@
package org.apache.nlpcraft.nlp.entity.parser.semantic;
+/**
+ *
+ */
public interface NCStemmer {
String stem(String word);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
index b290516..7c8ee68 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
@@ -53,13 +53,12 @@ public class NCEnOpenNlpTokenParser implements
NCTokenParser {
* @param lemmaDic
* @throws NCException
*/
- public NCEnOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
- Objects.requireNonNull(tokMdl, "Tokenizer model file cannot be null.");
+ public NCEnOpenNlpTokenParser(File posMdl, File lemmaDic) {
Objects.requireNonNull(posMdl, "POS model file cannot be null.");
Objects.requireNonNull(lemmaDic, "Lemmatizer model file cannot be
null.");
try {
- impl = NCEnOpenNlpImpl.apply(tokMdl, posMdl, lemmaDic);
+ impl = NCEnOpenNlpImpl.apply(posMdl, lemmaDic);
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
@@ -73,13 +72,12 @@ public class NCEnOpenNlpTokenParser implements
NCTokenParser {
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @throws NCException
*/
- public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
- Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be
null.");
+ public NCEnOpenNlpTokenParser(String posMdlSrc, String lemmaDicSrc) {
Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
try {
- impl = NCEnOpenNlpImpl.apply(tokMdlSrc, posMdlSrc, lemmaDicSrc);
+ impl = NCEnOpenNlpImpl.apply(posMdlSrc, lemmaDicSrc);
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCEnOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCEnOpenNlpImpl.scala
index e68ebff..9e818f4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCEnOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCEnOpenNlpImpl.scala
@@ -35,25 +35,23 @@ import scala.jdk.CollectionConverters.*
object NCEnOpenNlpImpl:
/**
*
- * @param tokMdlSrc Local filesystem path, resources file path or URL for
OpenNLP tokenizer model.
* @param posMdlSrc Local filesystem path, resources file path or URL for
OpenNLP tagger model.
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @return
*/
- def apply(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc: String):
NCEnOpenNlpImpl =
- new NCEnOpenNlpImpl(NCUtils.getStream(tokMdlSrc),
NCUtils.getStream(posMdlSrc), NCUtils.getStream(lemmaDicSrc))
+ def apply(posMdlSrc: String, lemmaDicSrc: String): NCEnOpenNlpImpl =
+ new NCEnOpenNlpImpl(NCUtils.getStream(posMdlSrc),
NCUtils.getStream(lemmaDicSrc))
/**
*
- * @param tokMdlFile Local file for OpenNLP tokenizer model.
* @param posMdlFile Local file for OpenNLP tagger model.
* @param lemmaDicFile Local file for OpenNLP lemmatizer dictionary.
* @return
*/
- def apply(tokMdlFile: File, posMdlFile: File, lemmaDicFile: File):
NCEnOpenNlpImpl =
+ def apply(posMdlFile: File, lemmaDicFile: File): NCEnOpenNlpImpl =
def toStream(f: File) = new BufferedInputStream(new FileInputStream(f))
- new NCEnOpenNlpImpl(toStream(tokMdlFile), toStream(posMdlFile),
toStream(lemmaDicFile))
+ new NCEnOpenNlpImpl(toStream(posMdlFile), toStream(lemmaDicFile))
/**
*
@@ -62,13 +60,11 @@ object NCEnOpenNlpImpl:
* @param lemmaDicIn
*/
class NCEnOpenNlpImpl(
- tokMdlIn: InputStream,
posMdlIn: InputStream,
lemmaDicIn: InputStream
) extends NCTokenParser :
private val stemmer = new PorterStemmer
- @volatile var tokenizer: TokenizerME = _
@volatile var tagger: POSTaggerME = _
@volatile var lemmatizer: DictionaryLemmatizer = _
@volatile var swFinder: NCEnStopWordsFinder = _
@@ -78,7 +74,6 @@ class NCEnOpenNlpImpl(
override def start(cfg: NCModelConfig): Unit =
NCUtils.execPar(
- () => tokenizer = new TokenizerME(new TokenizerModel(tokMdlIn)),
() => tagger = new POSTaggerME(new POSModel(posMdlIn)),
() => lemmatizer = new DictionaryLemmatizer(lemmaDicIn),
() => swFinder = new NCEnStopWordsFinder(stem(addStopWords),
stem(exclStopWords))
@@ -88,7 +83,6 @@ class NCEnOpenNlpImpl(
swFinder = null
lemmatizer = null
tagger = null
- lemmatizer = null
/**
*
@@ -126,14 +120,9 @@ class NCEnOpenNlpImpl(
this.synchronized {
val sen = req.getText
- case class TokenHolder(origin: String, normalized: String, start:
Int, end: Int, length: Int)
-
- val holders = tokenizer.tokenizePos(sen).map( t => {
- val txt = t.getCoveredText(sen).toString
- TokenHolder(txt, txt.toLowerCase, t.getStart, t.getEnd,
t.length)
- })
+ val holders = cfg.getTokenizer.tokenize(cfg, sen).asScala
- val words = holders.map(_.origin)
+ val words = holders.map(_.getText).toArray
val posTags = tagger.tag(words)
var lemmas = lemmatizer.lemmatize(words, posTags).toSeq
@@ -159,14 +148,14 @@ class NCEnOpenNlpImpl(
val res: Seq[NCToken] =
holders.zip(posTags).zip(lemmas).toIndexedSeq.zipWithIndex.map { case (((h,
pos), lemma), idx) =>
new NCPropertyMapAdapter with NCToken:
- override def getText: String = h.origin
+ override def getText: String = h.getText
override def getLemma: String = lemma
- override def getStem: String = stemmer.stem(h.normalized)
+ override def getStem: String =
stemmer.stem(h.getText.toLowerCase)
override def getPos: String = pos
override def isStopWord: Boolean = false
- override def getStartCharIndex: Int = h.start
- override def getEndCharIndex: Int = h.end
- override def getLength: Int = h.length
+ override def getStartCharIndex: Int = h.getStartCharIndex
+ override def getEndCharIndex: Int = h.getEndCharIndex
+ override def getLength: Int = h.getLength
override def getIndex: Int = idx
}
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index 4a7764e..f345dde 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -48,11 +48,13 @@ public class NCEnOpenNlpTokenParserBenchmark extends
NCBenchmarkAdapter {
}
/**
- *
* @return
*/
private static NCEnOpenNlpTokenParser prepareParser() {
- NCEnOpenNlpTokenParser p = NCTestUtils.mkEnParser();
+ NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser(
+ "opennlp/en-pos-maxent.bin",
+ "opennlp/en-lemmatizer.dict"
+ );
p.start(null); // TODO: fix it.
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
index 8e1e78b..f705e44 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
@@ -17,11 +17,12 @@
package org.apache.nlpcraft.nlp.entity.parser.opennlp
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCUtils
import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.*
-import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
import org.junit.jupiter.api.*
import java.util
@@ -34,15 +35,11 @@ import scala.jdk.OptionConverters.RichOptional
*
*/
class NCOpenNlpEntityParserSpec:
- private val eParsers =
scala.collection.mutable.ArrayBuffer.empty[NCOpenNlpEntityParser]
- private var tParser: NCEnOpenNlpTokenParser = _
+ private val parsers =
scala.collection.mutable.ArrayBuffer.empty[NCOpenNlpEntityParser]
@BeforeEach
def start(): Unit =
- tParser = NCTestUtils.makeAndStart(NCTestUtils.mkEnParser)
-
- def add(res: String): Unit =
- eParsers += NCTestUtils.makeAndStart(new
NCOpenNlpEntityParser(s"opennlp/$res"))
+ def add(res: String): Unit = parsers += NCTestUtils.makeAndStart(new
NCOpenNlpEntityParser(s"opennlp/$res"))
NCUtils.execPar(
// en-ner-time.bin is skipped. I can't find any working example.
@@ -56,8 +53,8 @@ class NCOpenNlpEntityParserSpec:
private def checkSingleEntity(txt: String, expected: String): Unit =
val req = NCTestRequest(txt)
- val toks = tParser.parse(req, null)
- val resSeq = eParsers.map(_.parse(req, null,
toks).asScala.toSeq).filter(_.size == 1)
+ val toks = EN_PARSER.parse(req, EN_MDL_CFG)
+ val resSeq = parsers.map(_.parse(req, EN_MDL_CFG,
toks).asScala.toSeq).filter(_.size == 1)
require(resSeq.size == 1)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 4126693..309ec6c 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -22,6 +22,7 @@ import org.apache.nlpcraft.internal.util.NCUtils
import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.nlp.util.*
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
import org.junit.jupiter.api.*
import java.util
@@ -34,14 +35,11 @@ import scala.jdk.OptionConverters.RichOptional
*
*/
class NCSemanticEntityParserSpec:
- private var tParser: NCEnOpenNlpTokenParser = _
- private var sParser: NCSemanticEntityParser = _
-
+ private var parser: NCSemanticEntityParser = _
@BeforeEach
def start(): Unit =
- tParser = NCTestUtils.makeAndStart(NCTestUtils.mkEnParser)
- sParser =
+ parser =
NCTestUtils.makeAndStart(
new NCSemanticEntityParser(
new NCEnStemmer,
@@ -56,7 +54,7 @@ class NCSemanticEntityParserSpec:
private def checkSingleEntity(txt: String, expected: String): Unit =
val req = NCTestRequest(txt)
- val res = sParser.parse(req, null, tParser.parse(req,
null)).asScala.toSeq
+ val res = parser.parse(req, EN_MDL_CFG, EN_PARSER.parse(req,
EN_MDL_CFG)).asScala.toSeq
require(res.size == 1)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
index 6981788..6fd2210 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.nlp.token.enricher.en.NCEnBracketsTokenEnricher
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.nlp.util.*
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
import org.junit.jupiter.api.*
import scala.jdk.CollectionConverters.*
@@ -29,12 +30,10 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCEnBracketsTokenEnricherSpec:
- private var parser: NCEnOpenNlpTokenParser = _
private var enricher: NCEnBracketsTokenEnricher = _
@BeforeEach
def start(): Unit = enricher =
- parser = NCTestUtils.makeAndStart(NCTestUtils.mkEnParser)
NCTestUtils.makeAndStart(new NCEnBracketsTokenEnricher())
/**
@@ -43,8 +42,8 @@ class NCEnBracketsTokenEnricherSpec:
* @param brackets
*/
private def check(txt: String, brackets: Set[Integer]): Unit =
- val toks = parser.parse(NCTestRequest(txt), null)
- enricher.enrich(NCTestRequest(txt), null, toks)
+ val toks = EN_PARSER.parse(NCTestRequest(txt), EN_MDL_CFG)
+ enricher.enrich(NCTestRequest(txt), EN_MDL_CFG, toks)
val seq = toks.asScala.toSeq
NCTestUtils.printTokens(seq)
seq.foreach (tok =>
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
index fe4703e..1bceab7 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.nlp.token.enricher.en.NCEnDictionaryTokenEnricher
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.nlp.util.*
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
import org.junit.jupiter.api.*
import scala.jdk.CollectionConverters.SeqHasAsJava
@@ -43,7 +44,7 @@ class NCEnDictionaryTokenEnricherSpec:
require(toks.head.getOpt[Boolean]("dict:en").isEmpty)
require(toks.last.getOpt[Boolean]("dict:en").isEmpty)
- enricher.enrich(null, null, toks.asJava)
+ enricher.enrich(null, EN_MDL_CFG, toks.asJava)
NCTestUtils.printTokens(toks)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
index 98a9837..e3c6bd1 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
@@ -20,10 +20,9 @@ package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.NCToken
import org.apache.nlpcraft.nlp.token.enricher.en.NCEnQuotesTokenEnricher
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
-import org.apache.nlpcraft.nlp.util.NCTestUtils
-import org.apache.nlpcraft.nlp.util.{NCTestRequest, NCTestUtils}
-import org.apache.nlpcraft.nlp.util.NCTestRequest
-import org.junit.jupiter.api.{BeforeEach, Test}
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
+import org.apache.nlpcraft.nlp.util.*
+import org.junit.jupiter.api.*
import scala.jdk.CollectionConverters.*
@@ -31,12 +30,10 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCEnQuotesTokenEnricherSpec:
- private var parser: NCEnOpenNlpTokenParser = _
private var enricher: NCEnQuotesTokenEnricher = _
@BeforeEach
def start(): Unit =
- parser = NCTestUtils.makeAndStart(NCTestUtils.mkEnParser)
enricher = NCTestUtils.makeAndStart(new NCEnQuotesTokenEnricher)
/**
@@ -45,9 +42,9 @@ class NCEnQuotesTokenEnricherSpec:
* @param quotes
*/
private def check(txt: String, quotes: Set[Integer]): Unit =
- val toks = parser.parse(NCTestRequest(txt), null)
+ val toks = EN_PARSER.parse(NCTestRequest(txt), EN_MDL_CFG)
val toksSeq = toks.asScala.toSeq
- enricher.enrich(NCTestRequest(txt), null, toks)
+ enricher.enrich(NCTestRequest(txt), EN_MDL_CFG, toks)
NCTestUtils.printTokens(toksSeq)
toksSeq.foreach (tok =>
require(!(tok.get[Boolean]("quoted:en") ^
quotes.contains(tok.getIndex)))
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
index 5bfc288..e41f2ac 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.nlp.util.*
+import org.apache.nlpcraft.nlp.util.NCTestDefaultConfig.*
import org.junit.jupiter.api.*
import java.util
@@ -30,13 +31,8 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCEnOpenNlpTokenParserSpec:
- private var parser: NCEnOpenNlpTokenParser = _
-
- @BeforeEach
- def start(): Unit = parser =
NCTestUtils.makeAndStart(NCTestUtils.mkEnParser)
-
private def test(txt: String, validate: Seq[NCToken] => _): Unit =
- val toks = parser.parse(nlp.util.NCTestRequest(txt),
null).asScala.toSeq
+ val toks = EN_PARSER.parse(nlp.util.NCTestRequest(txt),
EN_MDL_CFG).asScala.toSeq
assert(toks.nonEmpty)
NCTestUtils.printTokens(toks)
validate(toks)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index 933d889..526d650 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.nlp.util
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
+import org.apache.nlpcraft.nlp.tokenizer.NCOpenNlpTokenizer
import scala.jdk.CollectionConverters.*
@@ -81,14 +82,4 @@ object NCTestUtils:
t.start(null) // TODO: fix it.
println(s"'${t.getClass.getSimpleName}' created in ${started -
start}ms and started in ${now() - started}ms.")
- t
-
- /**
- *
- * @return
- */
- def mkEnParser: NCEnOpenNlpTokenParser = new NCEnOpenNlpTokenParser(
- "opennlp/en-token.bin",
- "opennlp/en-pos-maxent.bin",
- "opennlp/en-lemmatizer.dict"
- )
\ No newline at end of file
+ t
\ No newline at end of file