This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 9153ff9c WIP.
9153ff9c is described below
commit 9153ff9ca282c44da37d4d5dac8a3197eafc4e0c
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 16 16:58:00 2022 +0400
WIP.
---
.../scala/org/apache/nlpcraft/internal/util/NCUtils.scala | 12 ++++++++++++
.../nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala | 3 +--
.../nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala | 4 ++--
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 8ce01492..ca2b7635 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -776,3 +776,15 @@ object NCUtils extends LazyLogging:
es.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS)
catch
case _: InterruptedException => () // Safely ignore.
+
+ /**
+ *
+ * @param tok
+ * @param name
+ * @tparam T
+ * @return
+ */
+ def getProperty[T](tok: NCToken, name: String): T =
+ tok.get(name).getOrElse(throw new NCException(
+ s"'$name' property not found in token [index=${tok.getIndex},
text=${tok.getText}, properties=${tok.keysSet}]")
+ )
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
index c0155e45..413e296d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
@@ -42,8 +42,7 @@ class NCDictionaryTokenEnricher(dictRes: String) extends
NCTokenEnricher:
init()
private def init(): Unit = dict = NCUtils.readResource(dictRes).toSet
- private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw
new NCException("'lemma' property not found in token."))
/** @inheritdoc */
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): Unit =
- toks.foreach(t => t.put("dict", dict.contains(getLemma(t))))
+ toks.foreach(t => t.put("dict", dict.contains(NCUtils.getProperty(t,
"lemma"))))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index fc63db37..0b66b52b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -101,8 +101,8 @@ private object NCEnStopWordsTokenEnricher extends
LazyLogging:
)
private def read(path: String): Set[String] =
NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet
- private def getPos(t: NCToken): String = t.get("pos").getOrElse(throw new
NCException(s"'pos' property not found in token: ${t.keysSet}"))
- private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw
new NCException(s"'lemma' property not found in token: ${t.keysSet}"))
+ private def getPos(t: NCToken): String = NCUtils.getProperty(t, "pos")
+ private def getLemma(t: NCToken): String = NCUtils.getProperty(t, "lemma")
private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
private def toLemmaKey(toks: Seq[NCToken]): String =
toks.map(getLemma).mkString(" ")
private def toOriginalKey(toks: Seq[NCToken]): String =
toks.map(_.getText).mkString(" ")