[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sat, 25 Sep 2021 00:10:15 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new f5ed7c9  WIP.
f5ed7c9 is described below

commit f5ed7c9ee839e594daad7564b547cb275df6c88b
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 10:10:02 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala      | 17 +++++++++++------
 .../probe/mgrs/sentence/NCSentenceManager.scala         | 11 +++++++++--
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e079aff..0197f2e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -129,11 +129,16 @@ object NCModelEnricher extends NCProbeEnricher {
     case class ComplexHolder(complexesWords: Seq[Complex], complexes: 
Seq[ComplexSeq])
 
     class CacheHolder[T] {
-        private lazy val cache = mutable.HashMap.empty[Int, 
mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]
+        private lazy val cache =
+            mutable.HashMap.empty[String, mutable.HashMap[Int, 
mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
 
-        def add(s: Synonym, tokens: Seq[T]): Boolean = {
+        def isUnprocessed(elemId: String, s: Synonym, tokens: Seq[T]): Boolean 
= {
             cache.
                 getOrElseUpdate(
+                    elemId,
+                    mutable.HashMap.empty[Int, mutable.HashMap[Seq[T], 
mutable.HashSet[Synonym]]]
+                ).
+                getOrElseUpdate(
                     tokens.length,
                     mutable.HashMap.empty[Seq[T], mutable.HashSet[Synonym]]
                 ).
@@ -577,7 +582,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                             }
 
                                         def tryScan(syns: Seq[Synonym]): Unit =
-                                            for (s <- syns if !found && 
tokCache.add(s, idxs))
+                                            for (s <- syns if !found && 
tokCache.isUnprocessed(eId, s, idxs))
                                                 if 
(NCSynonymsManager.isMatch(s, toks)) {
                                                     found = true
                                                     add("simple continuous 
scan", toksExt, s)
@@ -597,7 +602,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
                             // 1.2 Sparse.
                             if (!found && mdl.hasSparseSynonyms)
-                                for (s <- get(mdl.sparseSynonyms, eId) if 
tokCache.add(s, idxs))
+                                for (s <- get(mdl.sparseSynonyms, eId) if 
tokCache.isUnprocessed(eId, s, idxs))
                                     NCSynonymsManager.sparseMatch(s, toks) 
match {
                                         case Some(res) =>
                                             add("simple sparse", 
getSparsedTokens(res, toks), s)
@@ -618,7 +623,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     s <- allSyns;
                                     comb <- allCombs;
                                     data = comb.map(_.data)
-                                    if !found && idlCache.add(s, data)
+                                    if !found && idlCache.isUnprocessed(eId, 
s, data)
                                 )
                                     if (NCSynonymsManager.isMatch(s, data, 
req, variantsToks)) {
                                         val parts = toParts(mdl, ns.srvReqId, 
data, s)
@@ -634,7 +639,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     s <- allSyns;
                                     comb <- allCombs;
                                     data = comb.map(_.data)
-                                    if idlCache.add(s, data)
+                                    if idlCache.isUnprocessed(eId, s, data)
                                 )
                                     NCSynonymsManager.sparseMatch(s, data, 
req, variantsToks) match {
                                         case Some(res) =>
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 9d52fa5..09904d2 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -674,8 +674,15 @@ object NCSentenceManager extends NCService {
                         map { case (_, seq) => seq.map { case (_, note) => 
note }.toSet }.
                         toSeq.sortBy(-_.size)
 
-                def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
-                    
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, 
pool).asScala.map(_.asScala.toSeq)
+                def findCombinations(): Seq[Seq[NCNlpSentenceNote]] = {
+                    val x = System.currentTimeMillis()
+                    println("!findCombinations start = "+toksByIdx.map(_.size) 
+ ", all=" + toksByIdx.map(_.size).sum)
+                    val res = 
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, 
pool).asScala.map(_.asScala.toSeq)
+
+                    println("findCombinations=" + (System.currentTimeMillis() 
-x))
+
+                    res
+                }
 
                 val seqSens =
                     combCache.

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to