This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new c38ee35 WIP.
c38ee35 is described below
commit c38ee35b234fa3cc99c63546105793320c7eac7e
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 21 16:17:03 2021 +0300
WIP.
---
.../parser/opennlp/NCOpenNlpTokenParserSpec.scala | 19 ++++---------------
1 file changed, 4 insertions(+), 15 deletions(-)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
index 714c9b2..1c1ee2c 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
@@ -40,21 +40,6 @@ class NCOpenNlpTokenParserSpec:
parser.start()
}
- def pprint(obj: Any, depth: Int = 0, paramName: Option[String] = None):
Unit =
- val indent = " " * depth
- val prettyName = paramName.fold("")(x => s"$x: ")
- val ptype = obj match { case _: Iterable[Any] => "" case obj: Product
=> obj.productPrefix case _ => obj.toString }
-
- println(s"$indent$prettyName$ptype")
-
- obj match
- case seq: Iterable[Any] =>
- seq.foreach(pprint(_, depth + 1))
- case obj: Product =>
- obj.productIterator.zip(obj.productElementNames).foreach {
case (subObj, paramName) =>
- pprint(subObj, depth + 1, Some(paramName))
- }
- case _ =>
private def request(txt: String): Seq[NCToken] =
val toks = parser.parse(
@@ -106,6 +91,10 @@ class NCOpenNlpTokenParserSpec:
require(!toks.head.isStopWord)
require(toks.last.isStopWord)
+ // First and last are stop words,
+ // Third and fourth are not because quoted.
+ // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`",
"A") because OpenNLP tokenizer logic,
+ // So we use spaces around quotes to simplify test.
toks = request("A ` A A ` A")
require(toks.length == 6)