This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new d132952 WIP.
d132952 is described below
commit d13295297c1921578ed3810033b075b01055ec95
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 21 19:05:58 2021 +0300
WIP.
---
.../parser/opennlp/NCOpenNlpTokenParserSpec.scala | 85 +++++++++++++---------
1 file changed, 49 insertions(+), 36 deletions(-)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
index 59d2184..61ccfeb 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
@@ -18,6 +18,7 @@
package org.apache.nlpcraft.internal.nlp.token.parser.opennlp
import com.google.gson.GsonBuilder
+import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import org.apache.nlpcraft.{NCRequest, NCToken}
import org.junit.jupiter.api.{BeforeEach, Test}
@@ -25,7 +26,7 @@ import scala.jdk.CollectionConverters.*
import java.util
/**
- *
+ *
*/
class NCOpenNlpTokenParserSpec:
private var parser: NCOpenNlpTokenParser = _
@@ -40,9 +41,11 @@ class NCOpenNlpTokenParserSpec:
new NCEnStopWordsFinder()
)
- parser.start
+ parser.start()
+
+ private def test(txt: String, validate: Seq[NCToken] => _) =
+ val t = System.currentTimeMillis()
- private def request(txt: String): Seq[NCToken] =
val toks = parser.parse(
new NCRequest:
override def getUserId: String = null
@@ -54,51 +57,61 @@ class NCOpenNlpTokenParserSpec:
override def getRequestData: util.Map[String, AnyRef] = null
)
+ println(s"Request: $txt, processed: ${System.currentTimeMillis() - t}
ms.")
+
assert(toks != null)
assert(!toks.isEmpty)
val res = toks.asScala.toSeq
- println(s"Request: $txt")
+ val tbl = new NCAsciiTable()
+
+ tbl #= ("Text", "Normalized", "POS", "Stem", "Lemma", "Start", "End",
"Length", "Stopword")
res.foreach(t =>
- println(
- s"Text: ${t.getOriginalText}" +
- s", normalized: ${t.getNormalizedText}" +
- s", pos: ${t.getPos}" +
- s", stem: ${t.getStem}" +
- s", start: ${t.getStartCharIndex}" +
- s", end: ${t.getEndCharIndex}" +
- s", length: ${t.getLength}" +
- s", isStop: ${t.isStopWord}"
+ tbl += (
+ t.getOriginalText,
+ t.getNormalizedText,
+ t.getPos,
+ t.getStem,
+ t.getLemma,
+ t.getStartCharIndex,
+ t.getEndCharIndex,
+ t.getLength,
+ t.isStopWord
)
)
+ println(tbl.toString)
println
- res
+ validate(res)
@Test
def test(): Unit =
- var toks = request("Test requests!")
-
- require(toks.length == 3)
- require(!toks.head.isStopWord)
- require(toks.last.isStopWord)
-
- toks = request("Test requests !")
-
- require(toks.length == 3)
- require(!toks.head.isStopWord)
- require(toks.last.isStopWord)
-
- // First and last are stop words,
- // Third and fourth are not because quoted.
- // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`",
"A") because OpenNLP tokenizer logic,
- // So we use spaces around quotes to simplify test.
- toks = request("A ` A A ` A")
-
- require(toks.length == 6)
- require(toks.head.isStopWord)
- require(toks.last.isStopWord)
- require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
\ No newline at end of file
+ test(
+ "Test requests!",
+ toks =>
+ require(toks.length == 3);
+ require(!toks.head.isStopWord);
+ require(toks.last.isStopWord)
+ )
+ test(
+ "Test requests !",
+ toks =>
+ require(toks.length == 3);
+ require(!toks.head.isStopWord);
+ require(toks.last.isStopWord)
+ )
+ test(
+ // First and last are stop words,
+ // Third and fourth are not because quoted.
+ // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`",
"A") because OpenNLP tokenizer logic,
+ // So we use spaces around quotes to simplify test.
+ "A ` A A ` A",
+ toks =>
+ require(toks.length == 6);
+ require(toks.head.isStopWord);
+ require(toks.last.isStopWord);
+ require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
+ )
\ No newline at end of file