[incubator-nlpcraft] 03/03: WIP.

sergeykamov Mon, 19 Dec 2022 00:34:07 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


commit 8f8e9cf487f250ae9b2ec5d3b47d0cbc8a38c165
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Dec 19 12:34:14 2022 +0400

    WIP.
---
 .../nlp/enrichers/impl/NCEnStopWordGenerator.scala |  4 +-
 .../nlp/enrichers/NCStopWordsEnricherSpec.scala    | 57 ++++++++++++++--------
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/impl/NCEnStopWordGenerator.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/impl/NCEnStopWordGenerator.scala
index b90e0567..3295738a 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/impl/NCEnStopWordGenerator.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/impl/NCEnStopWordGenerator.scala
@@ -156,7 +156,7 @@ import 
org.apache.nlpcraft.nlp.enrichers.impl.NCEnStopWordGenerator.*
   */
 private[enrichers] class NCEnStopWordGenerator(stemmer: NCStemmer):
     def mkNounWords(): Set[String] =
-        val buf = new mutable.HashSet[String]()
+        val buf = new mutable.ArrayBuffer[String]()
 
         for (w1 <- NOUN_WORDS)
             buf += s"$w1"
@@ -167,7 +167,7 @@ private[enrichers] class NCEnStopWordGenerator(stemmer: 
NCStemmer):
         buf.map(stem).toSet
 
     def mkFirstWords(): Set[String] =
-        val buf = new mutable.HashSet[String]()
+        val buf = new mutable.ArrayBuffer[String]()
 
         // is there
         for (w1 <- QWORDS2)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCStopWordsEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCStopWordsEnricherSpec.scala
index b81ee116..b5b0ee25 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCStopWordsEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCStopWordsEnricherSpec.scala
@@ -18,11 +18,13 @@
 package org.apache.nlpcraft.nlp.enrichers
 
 import org.apache.nlpcraft.*
-import internal.util.NCResourceReader
 import nlp.util.*
 import nlp.enrichers.NCEnStopWordsTokenEnricher
-import org.apache.nlpcraft.nlp.stemmer.NCStemmer
 import org.scalatest.funsuite.AnyFunSuite
+import org.apache.nlpcraft.internal.util.NCUtils
+
+import scala.collection.*
+import scala.concurrent.ExecutionContext
 
 /**
   *
@@ -34,73 +36,90 @@ class NCStopWordsEnricherSpec extends AnyFunSuite:
       * @param txt
       * @param boolVals
       */
-    private def test(stopEnricher: NCEnStopWordsTokenEnricher, txt: String, 
boolVals: Boolean*): Unit =
-        val toks = EN_TOK_PARSER.tokenize(txt)
-        require(toks.size == boolVals.size)
+    private def add(stopEnricher: => NCEnStopWordsTokenEnricher, txt: String, 
boolVals: Boolean*)
+        (using bodies: mutable.ArrayBuffer[() => Unit], errs: 
mutable.ArrayBuffer[Throwable]): Unit =
+        val body: () => Unit = () =>
+            try
+                val toks = EN_TOK_PARSER.tokenize(txt)
+                require(toks.size == boolVals.size)
+
+                toks.foreach(tok => 
require(tok.get[Boolean]("stopword").isEmpty))
 
-        toks.foreach(tok => require(tok.get[Boolean]("stopword").isEmpty))
+                val req = NCTestRequest(txt)
 
-        val req = NCTestRequest(txt)
+                EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
+                stopEnricher.enrich(req, CFG, toks)
 
-        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
-        stopEnricher.enrich(req, CFG, toks)
+                NCTestUtils.printTokens(toks)
+                toks.zip(boolVals).foreach { (tok, boolVal) => 
require(tok[Boolean]("stopword") == boolVal) }
+            catch
+                case e: Throwable => errs.synchronized { errs += e }
 
-        NCTestUtils.printTokens(toks)
-        toks.zip(boolVals).foreach { (tok, boolVal) => 
require(tok[Boolean]("stopword") == boolVal) }
+        bodies += body
 
     test("test") {
-        test(
+        val errs = mutable.ArrayBuffer.empty[Throwable]
+        val bodies = mutable.ArrayBuffer.empty[() => Unit]
+
+        given mutable.ArrayBuffer[Throwable] = errs
+        given mutable.ArrayBuffer[() => Unit] = bodies
+
+        add(
             EN_TOK_STOP_ENRICHER,
             "the test",
             true,
             false
         )
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("test"), exclSet = 
Set("the")),
             "the test",
             false,
             true
         )
         // The synonym is defined as lemma => all kind of input words should 
be found.
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("woman")),
             "woman women",
             true,
             true
         )
         // The synonym is defined in some form => only in the same form input 
words should be found.
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("women")),
             "woman women",
             false,
             true
         )
         // The synonym is defined in some form, but stemmer is very rough =>  
all kind of input words should be found.
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("women"), stemmer = 
_.take(3)),
             "woman women",
             true,
             true
         )
         // The synonym is defined as lemma => all kind of input words should 
be found, but excluded set is defined.
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("woman"), exclSet = 
Set("women")),
             "woman women",
             true,
             false
         )
         // Very rough stemmers defined.
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(addSet = Set("women"), stemmer = 
_.head.toString),
             "weather windows noun",
             true,
             true,
             false
         )
-        test(
+        add(
             new NCEnStopWordsTokenEnricher(stemmer = _ => ""),
             "weather noun",
             true,
             true
         )
+
+        NCUtils.execPar(bodies)(ExecutionContext.Implicits.global)
+        errs.foreach(_.printStackTrace)
+        require(errs.isEmpty)
     }
\ No newline at end of file

[incubator-nlpcraft] 03/03: WIP.

Reply via email to