This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
     new c38ee35  WIP.
c38ee35 is described below

commit c38ee35b234fa3cc99c63546105793320c7eac7e
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 21 16:17:03 2021 +0300

    WIP.
---
 .../parser/opennlp/NCOpenNlpTokenParserSpec.scala     | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
index 714c9b2..1c1ee2c 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
@@ -40,21 +40,6 @@ class NCOpenNlpTokenParserSpec:
         parser.start()
     }
 
-    def pprint(obj: Any, depth: Int = 0, paramName: Option[String] = None): 
Unit =
-        val indent = "  " * depth
-        val prettyName = paramName.fold("")(x => s"$x: ")
-        val ptype = obj match { case _: Iterable[Any] => "" case obj: Product 
=> obj.productPrefix case _ => obj.toString }
-
-        println(s"$indent$prettyName$ptype")
-
-        obj match
-            case seq: Iterable[Any] =>
-                seq.foreach(pprint(_, depth + 1))
-            case obj: Product =>
-                obj.productIterator.zip(obj.productElementNames).foreach { 
case (subObj, paramName) =>
-                    pprint(subObj, depth + 1, Some(paramName))
-                }
-            case _ =>
 
     private def request(txt: String): Seq[NCToken] =
         val toks = parser.parse(
@@ -106,6 +91,10 @@ class NCOpenNlpTokenParserSpec:
         require(!toks.head.isStopWord)
         require(toks.last.isStopWord)
 
+        // First and last are stop words,
+        // Third and fourth are not because quoted.
+        // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`", 
"A") because OpenNLP tokenizer logic,
+        // So we use spaces around quotes to simplify test.
         toks = request("A ` A A ` A")
 
         require(toks.length == 6)

Reply via email to