This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
     new 2d7677a  WIP.
2d7677a is described below

commit 2d7677aeae84bd0be0e1e0e44a401fd920df6af4
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jul 6 18:15:00 2021 +0300

    WIP.
---
 .../ctxword/NCContextWordCategoriesEnricher.scala  | 39 +++++++++++-----------
 .../nlpcraft/model/ctxword/NCContextWordSpec.scala |  2 +-
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
index 8307863..9a385d3 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
@@ -328,10 +328,10 @@ object NCContextWordCategoriesEnricher extends 
NCServerEnricher {
       */
     @throws[NCE]
     private def askSamples(cfg: NCCtxWordCategoriesConfigMdo, parent: Span = 
null):
-    Map[/** Element ID */String, ElementData] = {
+        Map[/** Element ID */String, ElementData] = {
         val corpusSeq = cfg.corpus.toSeq
-        val corpusWords = corpusSeq.map(parser.parse(_).map(_.word))
         val nlpWords = corpusSeq.map(s => parser.parse(s))
+        val corpusWords = nlpWords.map(_.map(_.word))
 
         val corpusWordsStems = corpusWords.map(_.map(stem))
         val corpusWordsNorm = corpusWords.map(_.map(normCase))
@@ -358,14 +358,16 @@ object NCContextWordCategoriesEnricher extends 
NCServerEnricher {
                 map { case (elemId, m) => elemId -> m.map(_._2) }
 
         if (recs.nonEmpty) {
-            val resps = 
syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq, parent 
= parent))
+            val respsSeq: Seq[(NCSuggestionRequest, Seq[NCWordSuggestion])] =
+                
syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq, parent 
= parent)).
+                    toSeq.sortBy(p => (p._1.words.mkString, p._1.index))
 
             if (DEBUG_MODE) {
                 val t = NCAsciiTable()
 
                 t #= ("Request", "Responses")
 
-                for ((req, resp) <- resps) {
+                for ((req, resp) <- respsSeq) {
                     t += (
                         req,
                         s"${resp.map(p => 
s"${p.word}=${FMT.format(normalizeConfidence(p.score))}").mkString(", ")}"
@@ -376,7 +378,6 @@ object NCContextWordCategoriesEnricher extends 
NCServerEnricher {
             }
 
             val req2Elem = recs.flatMap { case (elemId, recs) => recs.map(p => 
p -> elemId) }
-            val respsSeq: Seq[(NCSuggestionRequest, Seq[NCWordSuggestion])] = 
resps.toSeq
 
             def mkMap(convert: (NCSuggestionRequest, NCWordSuggestion) => 
String):
                 Map[/** Element ID */ String, /** Word key */ Map[String, /** 
Confidences */ Seq[Double]]] = {
@@ -508,40 +509,38 @@ object NCContextWordCategoriesEnricher extends 
NCServerEnricher {
                         if (DEBUG_MODE)
                             logger.info(
                                 s"Model loaded [" +
-                                    s"key=$key, elements: " +
-                                    s"${cfg.elements.mkString(" ,")}, " +
-                                    s"values data=$vd]"
+                                s"key=$key, elements: " +
+                                s"${cfg.elements.mkString(", ")}, " +
+                                s"values data=$vd]"
                             )
 
                         def get(m: Map[String, Set[String]], key: String): 
Set[String] = m.getOrElse(key, Set.empty)
 
                         for (
                             n <- nouns;
-                                elemId <- get(vNorms, n.normText) ++ 
get(vNorms, normCase(n.lemma)) ++ get(vStems, n.stem)
+                            elemId <- get(vNorms, n.normText) ++ get(vNorms, 
normCase(n.lemma)) ++ get(vStems, n.stem)
                         )
                             add(n, elemId, Confidence(INCL_MAX_CONFIDENCE))
 
-                        // 2. Via examples.
-                        val mdlCorpusData: Map[String, ElementData] = 
getCorpusData(cfg, key, parent)
+                        // 2. Via corpus.
+                        val corpusData = getCorpusData(cfg, key, parent)
 
                         for (
                             nounTok <- nouns;
-                                (elemId, elemData) <- mdlCorpusData;
-                                confOpt = elemData.get(nounTok.normText, 
nounTok.stem, nounTok.lemma)
-                                if confOpt.isDefined && confOpt.get >= 
cfg.elements(elemId)
+                            (elemId, elemData) <- corpusData;
+                            confOpt = elemData.get(nounTok.normText, 
nounTok.stem, nounTok.lemma)
+                            if confOpt.isDefined && confOpt.get >= 
cfg.elements(elemId)
                         )
                             add(nounTok, elemId, Confidence(confOpt.get))
 
-                        // 3. Ask for sentence.
-                        val idxs = ns.tokens.flatMap(p => if 
(p.pos.startsWith("N")) Some(p.index)
-                        else None).toSeq
+                        // 3. Ask for sentence (via co-references)
+                        val idxs = ns.tokens.flatMap(p => if 
(p.pos.startsWith("N")) Some(p.index) else None).toSeq
                         val reqs = idxs.map(idx => 
NCSuggestionRequest(ns.tokens.map(_.origText).toSeq, idx))
 
                         val resps: Map[NCWordSuggestion, NCSuggestionRequest] =
                             syncExec(
                                 NCSuggestSynonymManager.suggestWords(reqs, 
parent = parent)).
-                                flatMap { case (req, suggs) => suggs.map(_ -> 
req)
-                                }
+                                flatMap { case (req, suggs) => suggs.map(_ -> 
req) }
 
                         if (DEBUG_MODE) {
                             val t = NCAsciiTable()
@@ -574,7 +573,7 @@ object NCContextWordCategoriesEnricher extends 
NCServerEnricher {
                             // separated by space, and Suggestion Manager uses 
space tokenizer.
                             (sugg, req) <- resps.toSeq.sortBy(_._2.index);
                                 suggConf = normalizeConfidence(sugg.score);
-                                (elemId, elemData) <- mdlCorpusData;
+                                (elemId, elemData) <- corpusData;
                                 elemConf = cfg.elements(elemId);
                                 corpConfOpt = 
elemData.get(normCase(sugg.word), stem(sugg.word), getLemma(req, sugg))
                                 if corpConfOpt.isDefined;
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index f72cf63..cc28e6f 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -71,7 +71,7 @@ class NCContextWordSpecModel extends NCModel {
                         "BMW drivers have the highest loyalty",
 
                         "A wild cat is very dangerous",
-                        "A fox eats hens",
+                        "A fox eat hens",
                         "The fox was already in your chicken house",
 
                         "What is the local temperature?",

Reply via email to