[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

sergeykamov Thu, 15 Apr 2021 04:12:08 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
     new 6dda404  WIP.
6dda404 is described below

commit 6dda404fedccfe0d836ea8daf6733b5b078c3e01
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 15 14:11:18 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 101 ++++++++++-----------
 1 file changed, 50 insertions(+), 51 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 120d8d0..4df51c3 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, 
NCNlpSentenceNote ⇒ NlpNote, NCNlpSentence ⇒ Sentence}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, _}
@@ -160,7 +160,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param metaOpt
       */
     private def mark(
-        ns: NCNlpSentence,
+        ns: Sentence,
         elem: NCElement,
         toks: Seq[NlpToken],
         direct: Boolean,
@@ -203,7 +203,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
         val idxs = toks.map(_.index).sorted
 
-        val note = NCNlpSentenceNote(idxs, elem.getId, params: _*)
+        val note = NlpNote(idxs, elem.getId, params: _*)
 
         toks.foreach(_.add(note))
 
@@ -218,16 +218,15 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param span
       * @param req
       */
-    private def processParsers(mdl: NCProbeModel, ns: NCNlpSentence, span: 
Span, req: NCRequestImpl): Unit = {
-        val parsers = mdl.model.getParsers
-
-        for (parser ← parsers.asScala) {
+    private def processParsers(mdl: NCProbeModel, ns: Sentence, span: Span, 
req: NCRequestImpl): Unit = {
+        for (parser ← mdl.model.getParsers.asScala) {
             parser.onInit()
 
             startScopedSpan("customParser", span,
                 "srvReqId" → ns.srvReqId,
                 "mdlId" → mdl.model.getId,
-                "txt" → ns.text) { _ ⇒
+                "txt" → ns.text
+            ) { _ ⇒
                 def to(t: NlpToken): NCCustomWord =
                     new NCCustomWord {
                         override def getNormalizedText: String = t.normText
@@ -246,7 +245,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                         override def isEnglish: Boolean = t.isEnglish
                     }
 
-                val parsingRes = parser.parse(
+                val res = parser.parse(
                     req,
                     mdl.model,
                     ns.map(to).asJava,
@@ -256,15 +255,15 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                         val md = n.asMetadata()
 
                         new NCCustomElement() {
-                            override def getElementId: String = noteId
+                            override def getElementId: String = n.noteType
                             override def getWords: JList[NCCustomWord] = words
                             override def getMetadata: JavaMeta = md.map(p ⇒ 
p._1 → p._2.asInstanceOf[AnyRef]).asJava
                         }
                     }).asJava
                 )
 
-                if (parsingRes != null)
-                    parsingRes.asScala.foreach(e ⇒ {
+                if (res != null)
+                    res.asScala.foreach(e ⇒ {
                         val elemId = e.getElementId
                         val words = e.getWords
 
@@ -333,7 +332,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param tows
       * @param ns
       */
-    private def toTokens(tows: Seq[NCIdlContent], ns: NCNlpSentence): 
Seq[NlpToken] =
+    private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken] 
=
         (
             tows.filter(_.isRight).map(_.right.get) ++
                 tows.filter(_.isLeft).map(_.left.get).
@@ -364,7 +363,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param mdl
       * @param ns
       */
-    private def mkComplexes(mdl: NCProbeModel, ns: NCNlpSentence): 
ComplexHolder = {
+    private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
         val complexesWords = ns.map(Complex(_))
 
         val complexes =
@@ -424,42 +423,40 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
     private def add(
         dbgType: String,
-        ns: NCNlpSentence,
+        ns: Sentence,
         contCache: Cache,
-        elm: NCElement,
-        res: Seq[NlpToken],
-        allToksIdxs: Seq[Int],
+        elem: NCElement,
+        elemToks: Seq[NlpToken],
+        sliceToksIdxs: Seq[Int],
         syn: Synonym,
         parts: Seq[TokType] = Seq.empty)
     : Unit = {
-        val resIdxs = res.map(_.index)
+        val resIdxs = elemToks.map(_.index)
+        val resIdxsSorted = resIdxs.sorted
 
-        if (U.isContinuous(resIdxs.sorted) && resIdxs == allToksIdxs)
-            contCache(elm.getId) += allToksIdxs
+        if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
+            contCache(elem.getId) += sliceToksIdxs
 
-        val ok = !alreadyMarked(ns, elm.getId, res, allToksIdxs)
+        val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)
 
-        if (ok) {
-            val direct = syn.isDirect && U.isIncreased(resIdxs)
-
-            mark(ns, elm, res, direct, syn = Some(syn), parts)
-        }
+        if (ok)
+            mark(ns, elem, elemToks, direct = syn.isDirect && 
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
 
         if (DEEP_DEBUG)
             logger.trace(
                 s"${if (ok) "Added" else "Skipped"} element [" +
-                    s"id=${elm.getId}, " +
+                    s"id=${elem.getId}, " +
                     s"type=$dbgType, " +
-                    s"text='${res.map(_.origText).mkString(" ")}', " +
+                    s"text='${elemToks.map(_.origText).mkString(" ")}', " +
                     s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
-                    s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, 
" +
+                    s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",", 
"]")}, " +
                     s"synonym=$syn" +
                     s"]"
             )
     }
 
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: 
Map[String, Serializable], parent: Span = null): Unit = {
+    override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String, 
Serializable], parent: Span = null): Unit = {
         require(isStarted)
 
         startScopedSpan(
@@ -537,25 +534,15 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
                         // 2. IDL.
                         if (idlEnabled) {
-                            if (mdl.hasSparseSynonyms)
-                                for (s ← get(mdl.idlSynonyms, eId); comb ← 
mkCombinations(ch, toks, idlCache.toSet))
-                                    s.sparseMatch(comb.map(_.data), req) match 
{
-                                        case Some(res) ⇒
-                                            val typ = if (s.sparse) "IDL 
sparse" else "IDL continuous"
-
-                                            add(typ, ns, contCache, e, 
toTokens(res, ns), idxs, s, toParts(res, s))
-
-                                            idlCache += comb
-                                        case None ⇒ // No-op.
-                                    }
-                            else {
+                            // 2.1 Continuous.
+                            if (!mdl.hasSparseSynonyms) {
                                 var found = false
 
                                 for (
                                     s ← get(mdl.idlSynonyms, eId);
-                                    comb ← mkCombinations(ch, toks, 
idlCache.toSet);
-                                    data = comb.map(_.data)
-                                    if !found
+                                        comb ← mkCombinations(ch, toks, 
idlCache.toSet);
+                                        data = comb.map(_.data)
+                                        if !found
                                 )
                                     if (s.isMatch(data, req)) {
                                         add("IDL continuous", ns, contCache, 
e, toks, idxs, s, toParts(data, s))
@@ -563,8 +550,20 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                         idlCache += comb
 
                                         found = true
-                                }
+                                    }
                             }
+                            else
+                                // 2.2 Sparse.
+                                for (s ← get(mdl.idlSynonyms, eId); comb ← 
mkCombinations(ch, toks, idlCache.toSet))
+                                    s.sparseMatch(comb.map(_.data), req) match 
{
+                                        case Some(res) ⇒
+                                            val typ = if (s.sparse) "IDL 
sparse" else "IDL continuous"
+
+                                            add(typ, ns, contCache, e, 
toTokens(res, ns), idxs, s, toParts(res, s))
+
+                                            idlCache += comb
+                                        case None ⇒ // No-op.
+                                    }
                         }
                     }
                 }
@@ -588,17 +587,17 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       *
       * @param elemId
       * @param toks
-      * @param allSortedSliceIdxs
+      * @param sliceToksIdxsSorted
       */
-    private def alreadyMarked(ns: NCNlpSentence, elemId: String, toks: 
Seq[NlpToken], allSortedSliceIdxs: Seq[Int]): Boolean = {
+    private def alreadyMarked(ns: Sentence, elemId: String, toks: 
Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
         lazy val toksIdxsSorted = toks.map(_.index).sorted
 
-        allSortedSliceIdxs.map(ns).forall(_.exists(n ⇒ n.noteType == elemId && 
n.isContiguous)) ||
+        sliceToksIdxsSorted.map(ns).forall(_.exists(n ⇒ n.noteType == elemId 
&& n.isContiguous)) ||
         toks.exists(_.exists(n ⇒
             n.noteType == elemId &&
             (
                 (n.isContiguous &&
-                    (allSortedSliceIdxs.containsSlice(n.tokenIndexes) || 
n.tokenIndexes.containsSlice(toksIdxsSorted))
+                    (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) || 
n.tokenIndexes.containsSlice(toksIdxsSorted))
                 )
                     ||
                 (

[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

Reply via email to