[incubator-nlpcraft] branch NLPCRAFT-469 updated: WIP.

sergeykamov Tue, 21 Dec 2021 08:07:48 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
     new d132952  WIP.
d132952 is described below

commit d13295297c1921578ed3810033b075b01055ec95
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 21 19:05:58 2021 +0300

    WIP.
---
 .../parser/opennlp/NCOpenNlpTokenParserSpec.scala  | 85 +++++++++++++---------
 1 file changed, 49 insertions(+), 36 deletions(-)

diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
index 59d2184..61ccfeb 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
@@ -18,6 +18,7 @@
 package org.apache.nlpcraft.internal.nlp.token.parser.opennlp
 
 import com.google.gson.GsonBuilder
+import org.apache.nlpcraft.internal.ascii.NCAsciiTable
 import org.apache.nlpcraft.{NCRequest, NCToken}
 import org.junit.jupiter.api.{BeforeEach, Test}
 
@@ -25,7 +26,7 @@ import scala.jdk.CollectionConverters.*
 import java.util
 
 /**
-  * 
+  *
   */
 class NCOpenNlpTokenParserSpec:
     private var parser: NCOpenNlpTokenParser = _
@@ -40,9 +41,11 @@ class NCOpenNlpTokenParserSpec:
                 new NCEnStopWordsFinder()
             )
 
-        parser.start
+        parser.start()
+
+    private def test(txt: String, validate: Seq[NCToken] => _) =
+        val t = System.currentTimeMillis()
 
-    private def request(txt: String): Seq[NCToken] =
         val toks = parser.parse(
             new NCRequest:
                 override def getUserId: String = null
@@ -54,51 +57,61 @@ class NCOpenNlpTokenParserSpec:
                 override def getRequestData: util.Map[String, AnyRef] = null
         )
 
+        println(s"Request: $txt, processed: ${System.currentTimeMillis() - t} 
ms.")
+
         assert(toks != null)
         assert(!toks.isEmpty)
 
         val res = toks.asScala.toSeq
 
-        println(s"Request: $txt")
+        val tbl = new NCAsciiTable()
+
+        tbl #= ("Text", "Normalized", "POS", "Stem", "Lemma", "Start", "End", 
"Length", "Stopword")
 
         res.foreach(t =>
-            println(
-                s"Text: ${t.getOriginalText}" +
-                    s", normalized: ${t.getNormalizedText}" +
-                    s", pos: ${t.getPos}" +
-                    s", stem: ${t.getStem}" +
-                    s", start: ${t.getStartCharIndex}" +
-                    s", end: ${t.getEndCharIndex}" +
-                    s", length: ${t.getLength}" +
-                    s", isStop: ${t.isStopWord}"
+            tbl += (
+                t.getOriginalText,
+                t.getNormalizedText,
+                t.getPos,
+                t.getStem,
+                t.getLemma,
+                t.getStartCharIndex,
+                t.getEndCharIndex,
+                t.getLength,
+                t.isStopWord
             )
         )
 
+        println(tbl.toString)
         println
 
-        res
+        validate(res)
 
     @Test
     def test(): Unit =
-        var toks = request("Test requests!")
-
-        require(toks.length == 3)
-        require(!toks.head.isStopWord)
-        require(toks.last.isStopWord)
-
-        toks = request("Test requests !")
-
-        require(toks.length == 3)
-        require(!toks.head.isStopWord)
-        require(toks.last.isStopWord)
-
-        // First and last are stop words,
-        // Third and fourth are not because quoted.
-        // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`", 
"A") because OpenNLP tokenizer logic,
-        // So we use spaces around quotes to simplify test.
-        toks = request("A ` A A ` A")
-
-        require(toks.length == 6)
-        require(toks.head.isStopWord)
-        require(toks.last.isStopWord)
-        require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
\ No newline at end of file
+        test(
+            "Test requests!",
+            toks =>
+                require(toks.length == 3);
+                require(!toks.head.isStopWord);
+                require(toks.last.isStopWord)
+        )
+        test(
+            "Test requests !",
+            toks =>
+                require(toks.length == 3);
+                require(!toks.head.isStopWord);
+                require(toks.last.isStopWord)
+        )
+        test(
+            // First and last are stop words,
+            // Third and fourth are not because quoted.
+            // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`", 
"A") because OpenNLP tokenizer logic,
+            // So we use spaces around quotes to simplify test.
+            "A ` A A ` A",
+            toks =>
+                require(toks.length == 6);
+                require(toks.head.isStopWord);
+                require(toks.last.isStopWord);
+                require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
+        )
\ No newline at end of file

[incubator-nlpcraft] branch NLPCRAFT-469 updated: WIP.

Reply via email to