This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 9153ff9c WIP.
9153ff9c is described below

commit 9153ff9ca282c44da37d4d5dac8a3197eafc4e0c
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 16 16:58:00 2022 +0400

    WIP.
---
 .../scala/org/apache/nlpcraft/internal/util/NCUtils.scala    | 12 ++++++++++++
 .../nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala   |  3 +--
 .../nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala  |  4 ++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 8ce01492..ca2b7635 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -776,3 +776,15 @@ object NCUtils extends LazyLogging:
                 es.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS)
             catch
                 case _: InterruptedException => () // Safely ignore.
+
+    /**
+      *
+      * @param tok
+      * @param name
+      * @tparam T
+      * @return
+      */
+    def getProperty[T](tok: NCToken, name: String): T =
+        tok.get(name).getOrElse(throw new NCException(
+            s"'$name' property not found in token [index=${tok.getIndex}, 
text=${tok.getText}, properties=${tok.keysSet}]")
+        )
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
index c0155e45..413e296d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
@@ -42,8 +42,7 @@ class NCDictionaryTokenEnricher(dictRes: String) extends 
NCTokenEnricher:
     init()
 
     private def init(): Unit = dict = NCUtils.readResource(dictRes).toSet
-    private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw 
new NCException("'lemma' property not found in token."))
 
     /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
List[NCToken]): Unit =
-        toks.foreach(t => t.put("dict", dict.contains(getLemma(t))))
+        toks.foreach(t => t.put("dict", dict.contains(NCUtils.getProperty(t, 
"lemma"))))
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index fc63db37..0b66b52b 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -101,8 +101,8 @@ private object NCEnStopWordsTokenEnricher extends 
LazyLogging:
     )
 
     private def read(path: String): Set[String] = 
NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet
-    private def getPos(t: NCToken): String = t.get("pos").getOrElse(throw new 
NCException(s"'pos' property not found in token: ${t.keysSet}"))
-    private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw 
new NCException(s"'lemma' property not found in token: ${t.keysSet}"))
+    private def getPos(t: NCToken): String = NCUtils.getProperty(t, "pos")
+    private def getLemma(t: NCToken): String = NCUtils.getProperty(t, "lemma")
     private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
     private def toLemmaKey(toks: Seq[NCToken]): String = 
toks.map(getLemma).mkString(" ")
     private def toOriginalKey(toks: Seq[NCToken]): String = 
toks.map(_.getText).mkString(" ")

Reply via email to