[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sat, 25 Sep 2021 10:15:17 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new dd63a0a  WIP.
dd63a0a is described below

commit dd63a0a68100ae464ae3b12e30357d7c81944946
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 20:15:05 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 114 ++++++++-------
 .../probe/mgrs/synonyms/NCSynonymsManager.scala    | 162 +++++++++++++--------
 2 files changed, 164 insertions(+), 112 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index bd7804b..ded7928 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -508,9 +508,9 @@ object NCModelEnricher extends NCProbeEnricher {
                         (toks, toksExt) <- combosTokens(ns.toSeq);
                         idxs = toks.map(_.index);
                         e <- mdl.elements.values;
-                        eId = e.getId;
+                        elemId = e.getId;
                         greedy = e.isGreedy.orElse(mdl.model.isGreedy)
-                        if !greedy || !alreadyMarked(ns, eId, toks, idxs)
+                        if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
                     ) {
                         def add(
                             dbgType: String,
@@ -521,16 +521,16 @@ object NCModelEnricher extends NCProbeEnricher {
                             val resIdxs = elemToks.map(_.index)
 
                             val ok =
-                                (!greedy || !alreadyMarked(ns, eId, elemToks, 
idxs)) &&
-                                 ( parts.isEmpty || !parts.exists { case (t, 
_) => t.getId == eId })
+                                (!greedy || !alreadyMarked(ns, elemId, 
elemToks, idxs)) &&
+                                 ( parts.isEmpty || !parts.exists { case (t, 
_) => t.getId == elemId })
 
                             if (ok)
-                                mark(ns, eId, elemToks, direct = syn.isDirect 
&& U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
+                                mark(ns, elemId, elemToks, direct = 
syn.isDirect && U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
 
                             if (DEEP_DEBUG)
                                 logger.trace(
                                     s"${if (ok) "Added" else "Skipped"} 
element [" +
-                                        s"id=$eId, " +
+                                        s"id=$elemId, " +
                                         s"type=$dbgType, " +
                                         
s"text='${elemToks.map(_.origText).mkString(" ")}', " +
                                         s"indexes=${resIdxs.mkString("[", ",", 
"]")}, " +
@@ -541,14 +541,14 @@ object NCModelEnricher extends NCProbeEnricher {
                         }
 
                         // 1. SIMPLE.
-                        if (simpleEnabled && (if (idlEnabled) 
mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
+                        if (simpleEnabled && (if (idlEnabled) 
mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) {
                             lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                             // 1.1 Continuous.
                             var found = false
 
                             if (mdl.hasContinuousSynonyms)
-                                fastAccess(mdl.continuousSynonyms, eId, 
toks.length) match {
+                                fastAccess(mdl.continuousSynonyms, elemId, 
toks.length) match {
                                     case Some(h) =>
                                         def tryMap(syns: Map[String, Synonym], 
notFound: () => Unit): Unit =
                                             syns.get(tokStems) match {
@@ -559,16 +559,17 @@ object NCModelEnricher extends NCProbeEnricher {
                                             }
 
                                         def tryScan(syns: Seq[Synonym]): Unit =
-                                            for (
-                                                s <- syns
-                                                if
-                                                    !found &&
-                                                    
NCSynonymsManager.isUnprocessedTokens(ns.srvReqId, eId, s, idxs)
-                                            )
-                                                if 
(NCSynonymsManager.isMatch(s, toks)) {
-                                                    found = true
-                                                    add("simple continuous 
scan", toksExt, s)
-                                                }
+                                            for (syn <- syns if !found)
+                                                NCSynonymsManager.onMatch(
+                                                    ns.srvReqId,
+                                                    elemId,
+                                                    syn,
+                                                    toks,
+                                                    _ => {
+                                                        found = true
+                                                        add("simple continuous 
scan", toksExt, syn)
+                                                    }
+                                                )
 
                                         tryMap(
                                             h.txtDirectSynonyms,
@@ -584,57 +585,60 @@ object NCModelEnricher extends NCProbeEnricher {
 
                             // 1.2 Sparse.
                             if (!found && mdl.hasSparseSynonyms)
-                                for (
-                                    s <- get(mdl.sparseSynonyms, eId)
-                                    if 
NCSynonymsManager.isUnprocessedTokens(ns.srvReqId, eId, s, idxs)
-                                )
-                                    NCSynonymsManager.sparseMatch(s, toks) 
match {
-                                        case Some(res) =>
-                                            add("simple sparse", 
getSparsedTokens(res, toks), s)
-                                        case None => // No-op.
-                                    }
+                                for (syn <- get(mdl.sparseSynonyms, elemId))
+                                    NCSynonymsManager.onSparseMatchTokens(
+                                        ns.srvReqId,
+                                        elemId,
+                                        syn,
+                                        toks,
+                                        res => add("simple sparse", 
getSparsedTokens(res, toks), syn)
+                                    )
                         }
 
                         // 2. IDL.
                         if (idlEnabled) {
-                            val allSyns = get(mdl.idlSynonyms, eId)
+                            val allSyns = get(mdl.idlSynonyms, elemId)
                             lazy val allCombs = mkCombinations(ch, toks)
 
                             // 2.1 Continuous.
                             if (!mdl.hasSparseSynonyms) {
                                 var found = false
 
-                                for (
-                                    s <- allSyns;
-                                    comb <- allCombs;
-                                    data = comb.map(_.data)
-                                    if !found && 
NCSynonymsManager.isUnprocessedIdl(ns.srvReqId, eId, s, data)
-                                )
-                                    if (NCSynonymsManager.isMatch(s, data, 
req, variantsToks)) {
-                                        val parts = toParts(mdl, ns.srvReqId, 
data, s)
-
-                                        add("IDL continuous", toksExt, s, 
parts)
-
-                                        found = true
-                                    }
+                                for (syn <- allSyns; comb <- allCombs; data = 
comb.map(_.data) if !found)
+                                    NCSynonymsManager.onMatch(
+                                        ns.srvReqId,
+                                        elemId,
+                                        syn,
+                                        data,
+                                        req,
+                                        variantsToks,
+                                        _ => {
+                                            val parts = toParts(mdl, 
ns.srvReqId, data, syn)
+
+                                            add("IDL continuous", toksExt, 
syn, parts)
+
+                                            found = true
+                                        }
+                                    )
                             }
                             else
                                 // 2.2 Sparse.
-                                for (
-                                    s <- allSyns;
-                                    comb <- allCombs;
-                                    data = comb.map(_.data)
-                                    if 
NCSynonymsManager.isUnprocessedIdl(ns.srvReqId, eId, s, data)
-                                )
-                                    NCSynonymsManager.sparseMatch(s, data, 
req, variantsToks) match {
-                                        case Some(res) =>
+                                for (syn <- allSyns; comb <- allCombs)
+                                    NCSynonymsManager.onSparseMatch(
+                                        ns.srvReqId,
+                                        elemId,
+                                        syn,
+                                        comb.map(_.data),
+                                        req,
+                                        variantsToks,
+                                        res => {
                                             val toks = 
getSparsedTokens(toTokens(res, ns), toTokens(comb.map(_.data), ns))
-                                            val parts = toParts(mdl, 
ns.srvReqId, res, s)
-                                            val typ = if (s.sparse) "IDL 
sparse"else "IDL continuous"
+                                            val parts = toParts(mdl, 
ns.srvReqId, res, syn)
+                                            val typ = if (syn.sparse) "IDL 
sparse"else "IDL continuous"
 
-                                            add(typ, toks, s, parts)
-                                        case None => // No-op.
-                                    }
+                                            add(typ, toks, syn, parts)
+                                        }
+                                    )
                         }
                     }
                 }
@@ -652,7 +656,7 @@ object NCModelEnricher extends NCProbeEnricher {
             processParsers(mdl, ns, span, req)
         }
 
-        NCSynonymsManager.clearRequestIterationData(ns.srvReqId)
+        NCSynonymsManager.clearIteration(ns.srvReqId)
 
         normalize(ns)
     }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index 7996510..315f380 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -18,7 +18,7 @@
 package org.apache.nlpcraft.probe.mgrs.synonyms
 
 import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceNote, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceNote => NlpNote, 
NCNlpSentenceToken => NlpToken}
 import org.apache.nlpcraft.common.{NCService, U}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
@@ -35,7 +35,7 @@ import scala.jdk.CollectionConverters.ListHasAsScala
   *
   */
 object NCSynonymsManager extends NCService {
-    class CacheHolder[T] {
+    private class CacheHolder[T] {
         private lazy val cache =
             mutable.HashMap.empty[String, mutable.HashMap[Int, 
mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
 
@@ -56,9 +56,9 @@ object NCSynonymsManager extends NCService {
         }
     }
 
-    case class SavedIdlKey(id: String, startCharIndex: Int, endCharIndex: Int, 
other: Map[String, AnyRef] = Map.empty)
+    private case class SavedIdlKey(id: String, startCharIndex: Int, 
endCharIndex: Int, other: Map[String, AnyRef] = Map.empty)
 
-    object SavedIdlKey {
+    private object SavedIdlKey {
         def apply(t: NCToken): SavedIdlKey =
             if (t.isUserDefined)
                 SavedIdlKey(t.getId, t.getStartCharIndex, t.getEndCharIndex)
@@ -67,14 +67,14 @@ object NCSynonymsManager extends NCService {
                     t.getId,
                     t.getStartCharIndex,
                     t.getEndCharIndex,
-                    NCNlpSentenceNote.getBuiltProperties(t.getId).flatMap(p => 
t.metaOpt(p).asScala match {
+                    NlpNote.getBuiltProperties(t.getId).flatMap(p => 
t.metaOpt(p).asScala match {
                         case Some(v) => Some(p -> v)
                         case None => None
                     }).toMap
                 )
     }
 
-    case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], 
predicate: NCIdlFunction) {
+    private case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], 
predicate: NCIdlFunction) {
         override def toString: String = variants.toString()
     }
 
@@ -100,7 +100,7 @@ object NCSynonymsManager extends NCService {
       * @param tok
       * @param chunk
       */
-    private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk): 
Boolean =
+    private def isMatch(tok: NlpToken, chunk: NCProbeSynonymChunk): Boolean =
         chunk.kind match {
             case TEXT => chunk.wordStem == tok.stem
             case REGEX =>
@@ -210,7 +210,7 @@ object NCSynonymsManager extends NCService {
             getOrElseUpdate(
                 (tow, chunk),
                 {
-                    def get0[T](fromToken: NCToken => T, fromWord: 
NCNlpSentenceToken => T): T =
+                    def get0[T](fromToken: NCToken => T, fromWord: NlpToken => 
T): T =
                         if (tow.isLeft) fromToken(tow.swap.toOption.get)
                         else fromWord(tow.toOption.get)
 
@@ -242,74 +242,118 @@ object NCSynonymsManager extends NCService {
 
     /**
       *
-      * @param s
+      * @param srvReqId
+      * @param elemId
+      * @param syn
       * @param toks
+      * @param callback
       */
-    def isMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]): Boolean = {
-        require(toks != null)
-        require(!s.sparse && !s.hasIdl)
-
-        if (toks.length == s.length) {
-            if (s.isTextOnly)
-                toks.zip(s).forall(p => p._1.stem == p._2.wordStem)
-            else
-                toks.zip(s).sortBy(p => getSort(p._2.kind)).forall { case 
(tok, chunk) => isMatch(tok, chunk) }
+    def onMatch(srvReqId: String, elemId: String, syn: Synonym, toks: 
Seq[NlpToken], callback: Unit => Unit): Unit =
+        if (isUnprocessedTokens(srvReqId, elemId, syn, toks.map(_.index))) {
+            require(toks != null)
+            require(!syn.sparse && !syn.hasIdl)
+
+            if (
+                toks.length == syn.length && {
+                    if (syn.isTextOnly)
+                        toks.zip(syn).forall(p => p._1.stem == p._2.wordStem)
+                    else
+                        toks.zip(syn).sortBy(p => getSort(p._2.kind)).forall { 
case (tok, chunk) => isMatch(tok, chunk) }
+                }
+            )
+                callback()
         }
-        else
-            false
-    }
 
     /**
       *
+      * @param srvReqId
+      * @param elemId
       * @param s
       * @param tows
       * @param req
       * @param variantsToks
+      * @param callback
       */
-    def isMatch(s: Synonym, tows: Seq[NCIdlToken], req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]): Boolean = {
-        require(tows != null)
-
-        if (tows.length == s.length && tows.count(_.isLeft) >= s.idlChunks)
-            tows.zip(s).sortBy(p => getSort(p._2.kind)).forall {
-                case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
-            }
-        else
-            false
-    }
+    def onMatch(
+        srvReqId: String,
+        elemId: String,
+        s: Synonym,
+        tows: Seq[NCIdlToken],
+        req: NCRequest,
+        variantsToks: Seq[Seq[NCToken]],
+        callback: Unit => Unit
+    ): Unit =
+        if (isUnprocessedIdl(srvReqId, elemId, s, tows)) {
+            require(tows != null)
+
+            if (
+                tows.length == s.length &&
+                tows.count(_.isLeft) >= s.idlChunks && {
+                    tows.zip(s).sortBy(p => getSort(p._2.kind)).forall {
+                        case (tow, chunk) => isMatch(tow, chunk, req, 
variantsToks)
+                    }
+                }
+            )
+                callback()
+        }
 
     /**
       *
-      * @param s
+      * @param srvReqId
+      * @param elemId
+      * @param syn
       * @param toks
+      * @param callback
       */
-    def sparseMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
-        require(toks != null)
-        require(s.sparse && !s.hasIdl)
-
-        sparseMatch0(s, toks, isMatch, (t: NCNlpSentenceToken) => 
t.startCharIndex, shouldBeNeighbors = false)
-    }
+    def onSparseMatchTokens(
+        srvReqId: String, elemId: String, syn: Synonym, toks: Seq[NlpToken], 
callback: Seq[NlpToken] => Unit
+    ): Unit =
+        if (isUnprocessedTokens(srvReqId, elemId, syn, toks.map(_.index))) {
+            require(toks != null)
+            require(syn.sparse && !syn.hasIdl)
+
+            sparseMatch0(syn, toks, isMatch, (t: NlpToken) => 
t.startCharIndex, shouldBeNeighbors = false) match {
+                case Some(res) => callback(res)
+                case None => // No-op.
+            }
+        }
 
     /**
       *
-      * @param s
+      * @param srvReqId
+      * @param elemId
+      * @param syn
       * @param tows
       * @param req
       * @param variantsToks
+      * @param callback
       */
-    def sparseMatch(s: Synonym, tows: Seq[NCIdlToken], req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]): Option[Seq[NCIdlToken]] = {
-        require(tows != null)
-        require(req != null)
-        require(s.hasIdl)
-
-        sparseMatch0(
-            s,
-            tows,
-            (t: NCIdlToken, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, 
req, variantsToks),
-            (t: NCIdlToken) => if (t.isLeft) 
t.swap.toOption.get.getStartCharIndex
-            else t.toOption.get.startCharIndex,
-            shouldBeNeighbors = !s.sparse
-        )
-    }
+    def onSparseMatch(
+        srvReqId: String,
+        elemId: String,
+        syn: Synonym,
+        tows: Seq[NCIdlToken],
+        req: NCRequest,
+        variantsToks: Seq[Seq[NCToken]],
+        callback: Seq[NCIdlToken] => Unit
+    ): Unit =
+        if (isUnprocessedIdl(srvReqId, elemId, syn, tows)) {
+            require(tows != null)
+            require(req != null)
+            require(syn.hasIdl)
+
+            sparseMatch0(
+                syn,
+                tows,
+                (t: NCIdlToken, chunk: NCProbeSynonymChunk) => isMatch(t, 
chunk, req, variantsToks),
+                (t: NCIdlToken) => if (t.isLeft) 
t.swap.toOption.get.getStartCharIndex
+                else t.toOption.get.startCharIndex,
+                shouldBeNeighbors = !syn.sparse
+            ) match {
+                case Some(res) => callback(res)
+                case None => // No-op.
+            }
+        }
 
     /**
       *
@@ -361,7 +405,7 @@ object NCSynonymsManager extends NCService {
       * @param s
       * @param tokens
       */
-    def isUnprocessedTokens(srvReqId: String, elemId: String, s: Synonym, 
tokens: Seq[Int]): Boolean =
+    private def isUnprocessedTokens(srvReqId: String, elemId: String, s: 
Synonym, tokens: Seq[Int]): Boolean =
         tokCaches.getOrElseUpdate(srvReqId, new 
CacheHolder[Int]).isUnprocessed(elemId, s, tokens)
 
     /**
@@ -371,20 +415,24 @@ object NCSynonymsManager extends NCService {
       * @param s
       * @param tokens
       */
-    def isUnprocessedIdl(srvReqId: String, elemId: String, s: Synonym, tokens: 
Seq[NCIdlToken]): Boolean =
+    private def isUnprocessedIdl(srvReqId: String, elemId: String, s: Synonym, 
tokens: Seq[NCIdlToken]): Boolean =
         idlCaches.getOrElseUpdate(srvReqId, new 
CacheHolder[NCIdlToken]).isUnprocessed(elemId, s, tokens)
 
     /**
       *
       * @param srvReqId
       */
-    def clearRequestData(srvReqId: String): Unit = savedIdl -= srvReqId
+    def clearRequestData(srvReqId: String): Unit = {
+        clearIteration(srvReqId)
+
+        savedIdl -= srvReqId
+    }
 
     /**
       *
       * @param srvReqId
       */
-    def clearRequestIterationData(srvReqId: String): Unit = {
+    def clearIteration(srvReqId: String): Unit = {
         idlChunksCache -= srvReqId
         idlCaches -= srvReqId
         tokCaches -= srvReqId

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to