This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new f5ed7c9 WIP.
f5ed7c9 is described below
commit f5ed7c9ee839e594daad7564b547cb275df6c88b
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 10:10:02 2021 +0300
WIP.
---
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 17 +++++++++++------
.../probe/mgrs/sentence/NCSentenceManager.scala | 11 +++++++++--
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e079aff..0197f2e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -129,11 +129,16 @@ object NCModelEnricher extends NCProbeEnricher {
case class ComplexHolder(complexesWords: Seq[Complex], complexes:
Seq[ComplexSeq])
class CacheHolder[T] {
- private lazy val cache = mutable.HashMap.empty[Int,
mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]
+ private lazy val cache =
+ mutable.HashMap.empty[String, mutable.HashMap[Int,
mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
- def add(s: Synonym, tokens: Seq[T]): Boolean = {
+ def isUnprocessed(elemId: String, s: Synonym, tokens: Seq[T]): Boolean
= {
cache.
getOrElseUpdate(
+ elemId,
+ mutable.HashMap.empty[Int, mutable.HashMap[Seq[T],
mutable.HashSet[Synonym]]]
+ ).
+ getOrElseUpdate(
tokens.length,
mutable.HashMap.empty[Seq[T], mutable.HashSet[Synonym]]
).
@@ -577,7 +582,7 @@ object NCModelEnricher extends NCProbeEnricher {
}
def tryScan(syns: Seq[Synonym]): Unit =
- for (s <- syns if !found &&
tokCache.add(s, idxs))
+ for (s <- syns if !found &&
tokCache.isUnprocessed(eId, s, idxs))
if
(NCSynonymsManager.isMatch(s, toks)) {
found = true
add("simple continuous
scan", toksExt, s)
@@ -597,7 +602,7 @@ object NCModelEnricher extends NCProbeEnricher {
// 1.2 Sparse.
if (!found && mdl.hasSparseSynonyms)
- for (s <- get(mdl.sparseSynonyms, eId) if
tokCache.add(s, idxs))
+ for (s <- get(mdl.sparseSynonyms, eId) if
tokCache.isUnprocessed(eId, s, idxs))
NCSynonymsManager.sparseMatch(s, toks)
match {
case Some(res) =>
add("simple sparse",
getSparsedTokens(res, toks), s)
@@ -618,7 +623,7 @@ object NCModelEnricher extends NCProbeEnricher {
s <- allSyns;
comb <- allCombs;
data = comb.map(_.data)
- if !found && idlCache.add(s, data)
+ if !found && idlCache.isUnprocessed(eId,
s, data)
)
if (NCSynonymsManager.isMatch(s, data,
req, variantsToks)) {
val parts = toParts(mdl, ns.srvReqId,
data, s)
@@ -634,7 +639,7 @@ object NCModelEnricher extends NCProbeEnricher {
s <- allSyns;
comb <- allCombs;
data = comb.map(_.data)
- if idlCache.add(s, data)
+ if idlCache.isUnprocessed(eId, s, data)
)
NCSynonymsManager.sparseMatch(s, data,
req, variantsToks) match {
case Some(res) =>
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 9d52fa5..09904d2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -674,8 +674,15 @@ object NCSentenceManager extends NCService {
map { case (_, seq) => seq.map { case (_, note) =>
note }.toSet }.
toSeq.sortBy(-_.size)
- def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
-
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava,
pool).asScala.map(_.asScala.toSeq)
+ def findCombinations(): Seq[Seq[NCNlpSentenceNote]] = {
+ val x = System.currentTimeMillis()
+ println("!findCombinations start = "+toksByIdx.map(_.size)
+ ", all=" + toksByIdx.map(_.size).sum)
+ val res =
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava,
pool).asScala.map(_.asScala.toSeq)
+
+ println("findCombinations=" + (System.currentTimeMillis()
-x))
+
+ res
+ }
val seqSens =
combCache.