This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new 54b9751 WIP.
54b9751 is described below
commit 54b97517727e2746f189d244b7f4eed505bfab79
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 24 16:31:53 2021 +0300
WIP.
---
.../enricher/NCEnBracketsTokenEnricherSpec.scala | 25 ++++++++++++++++------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnBracketsTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnBracketsTokenEnricherSpec.scala
index 12044c9..d735331 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnBracketsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnBracketsTokenEnricherSpec.scala
@@ -22,25 +22,36 @@ import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.NCEnOpenNlpTokenPar
import org.apache.nlpcraft.internal.nlp.util.{NCTestRequest, NCTestToken,
NCTestUtils}
import org.junit.jupiter.api.{BeforeEach, Test}
-import scala.jdk.CollectionConverters.SeqHasAsJava
+import scala.jdk.CollectionConverters.*
+
/**
*
*/
-
class NCEnBracketsTokenEnricherSpec:
+ private var parser: NCEnOpenNlpTokenParser = _
private var enricher: NCEnBracketsTokenEnricher = _
@BeforeEach
- def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCEnBracketsTokenEnricher())
+ def start(): Unit = enricher =
+ parser = NCTestUtils.makeAndStart(
+ new NCEnOpenNlpTokenParser(
+ "opennlp/en-token.bin",
+ "opennlp/en-pos-maxent.bin",
+ "opennlp/en-lemmatizer.dict"
+ )
+ )
+ NCTestUtils.makeAndStart(new NCEnBracketsTokenEnricher())
private def check(txt: String, brackets: Set[Integer]): Unit =
- val toks = txt.split("
").toIndexedSeq.map(_.strip).filter(_.nonEmpty).map(NCTestToken(_))
+ val toks = parser.parse(NCTestRequest(txt))
+
+ enricher.enrich(NCTestRequest(txt), null, toks)
- enricher.enrich(NCTestRequest(txt), null, toks.asJava)
+ val seq = toks.asScala.toSeq
- NCTestUtils.printTokens(toks, "brackets:en")
+ NCTestUtils.printTokens(seq, "brackets:en")
- toks.zipWithIndex.foreach { case (tok, idx) =>
+ seq.zipWithIndex.foreach { case (tok, idx) =>
require(!(tok.get[Boolean]("brackets:en") ^
brackets.contains(idx)))
}