This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 6c23bec681b90813cfda7d34c4dd6ccb1fe7465a Author: Sergey Kamov <[email protected]> AuthorDate: Mon Apr 5 22:44:46 2021 +0300 WIP. --- .../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 40 ++++++++++++---------- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 16 +++++---- .../model/NCEnricherNestedModelSpec4.scala | 3 ++ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala index fffd476..5324304 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala @@ -92,35 +92,37 @@ class NCProbeSynonym( require(toks != null) require(toks.nonEmpty) - lazy val buf = mutable.ArrayBuffer.empty[T] + lazy val res = mutable.ArrayBuffer.empty[T] + lazy val all = mutable.HashSet.empty[T] + var state = 0 - for (chunk ← this if state != -1) - toks.find(t ⇒ { + for (chunk ← this if state != -1) { + val seq = if (state == 0) { state = 1 - isMatch(t, chunk) && !buf.contains(t) + toks.filter(t ⇒ isMatch(t, chunk)) } else - !buf.contains(t) && isMatch(t, chunk) - }) match { - case Some(t) ⇒ - if (!perm && buf.nonEmpty && getIndex(t) <= getIndex(buf.last)) - state = -1 - else - buf += t - case None ⇒ state = -1 - } + toks.filter(t ⇒ !res.contains(t) && isMatch(t, chunk)) - if (state != -1 && - { - val remained = toks.filter(t ⇒ !buf.contains(t)) + if (seq.nonEmpty) { + val head = seq.head - !this.exists(chunk ⇒ remained.exists(t ⇒ isMatch(t, chunk))) + if (!perm && res.nonEmpty && getIndex(head) <= getIndex(res.last)) + state = -1 + else { + res += head + all ++= seq + } } - ) - Some(buf) + else + state = -1 + } + + if (state != -1 && all.size == res.size) + Some(res) else None } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index fec01e5..46506fd 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -272,7 +272,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { // * @param toks // * @param elemId // */ -// private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId)) + private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId)) /** * @@ -435,6 +435,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { foundNotSparse = true } + // 1. Simple, sparse. if (firstPhase && sparseEnabled) for (syn ← mdl.sparseSynonyms.getOrElse(elemId, Seq.empty) if !foundSparse) @@ -479,16 +480,19 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { // 3. DSL, sparse. if (sparseEnabled) - for (syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty); (_, seq) ← dslCombs; comb ← seq if !foundSparse) { + for ( + (_, seq) ← dslCombs; + syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty); + comb ← seq if !foundSparse + ) { syn.trySparseMatch(comb.map(_.data), req) match { - case Some(towsRes) ⇒ - addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn)) + case Some(towsRes) ⇒ addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn)) case None ⇒ // No-op. } } + // 4. DSL, non sparse. if (notSparseEnabled) { - // 4. DSL, non sparse. for ( (len, seq) ← dslCombs; syn ← fastAccess(mdl.nonSparseSynonymsDsl, elemId, len).getOrElse(Seq.empty); @@ -608,7 +612,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { ) // TODO: - //if (!alreadyMarked(matchedToks, elemId)) + if (!alreadyMarked(matchedToks, elemId)) mark( ns, elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")), diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala index 43320e7..680086d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala @@ -41,6 +41,9 @@ class NCNestedTestModel4 extends NCModelAdapter( @NCIntent("intent=onE2 term(t1)={tok_id() == 'e2'}[8, 100]") def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK") + + override def isPermutateSynonyms: Boolean = false + override def isSparse: Boolean = false } /**
