This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
new 6dda404 WIP.
6dda404 is described below
commit 6dda404fedccfe0d836ea8daf6733b5b078c3e01
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 15 14:11:18 2021 +0300
WIP.
---
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 101 ++++++++++-----------
1 file changed, 50 insertions(+), 51 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 120d8d0..4df51c3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken,
NCNlpSentenceNote ⇒ NlpNote, NCNlpSentence ⇒ Sentence}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, _}
@@ -160,7 +160,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param metaOpt
*/
private def mark(
- ns: NCNlpSentence,
+ ns: Sentence,
elem: NCElement,
toks: Seq[NlpToken],
direct: Boolean,
@@ -203,7 +203,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val idxs = toks.map(_.index).sorted
- val note = NCNlpSentenceNote(idxs, elem.getId, params: _*)
+ val note = NlpNote(idxs, elem.getId, params: _*)
toks.foreach(_.add(note))
@@ -218,16 +218,15 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param span
* @param req
*/
- private def processParsers(mdl: NCProbeModel, ns: NCNlpSentence, span:
Span, req: NCRequestImpl): Unit = {
- val parsers = mdl.model.getParsers
-
- for (parser ← parsers.asScala) {
+ private def processParsers(mdl: NCProbeModel, ns: Sentence, span: Span,
req: NCRequestImpl): Unit = {
+ for (parser ← mdl.model.getParsers.asScala) {
parser.onInit()
startScopedSpan("customParser", span,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
- "txt" → ns.text) { _ ⇒
+ "txt" → ns.text
+ ) { _ ⇒
def to(t: NlpToken): NCCustomWord =
new NCCustomWord {
override def getNormalizedText: String = t.normText
@@ -246,7 +245,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
override def isEnglish: Boolean = t.isEnglish
}
- val parsingRes = parser.parse(
+ val res = parser.parse(
req,
mdl.model,
ns.map(to).asJava,
@@ -256,15 +255,15 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val md = n.asMetadata()
new NCCustomElement() {
- override def getElementId: String = noteId
+ override def getElementId: String = n.noteType
override def getWords: JList[NCCustomWord] = words
override def getMetadata: JavaMeta = md.map(p ⇒
p._1 → p._2.asInstanceOf[AnyRef]).asJava
}
}).asJava
)
- if (parsingRes != null)
- parsingRes.asScala.foreach(e ⇒ {
+ if (res != null)
+ res.asScala.foreach(e ⇒ {
val elemId = e.getElementId
val words = e.getWords
@@ -333,7 +332,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param tows
* @param ns
*/
- private def toTokens(tows: Seq[NCIdlContent], ns: NCNlpSentence):
Seq[NlpToken] =
+ private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken]
=
(
tows.filter(_.isRight).map(_.right.get) ++
tows.filter(_.isLeft).map(_.left.get).
@@ -364,7 +363,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param mdl
* @param ns
*/
- private def mkComplexes(mdl: NCProbeModel, ns: NCNlpSentence):
ComplexHolder = {
+ private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
val complexesWords = ns.map(Complex(_))
val complexes =
@@ -424,42 +423,40 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
private def add(
dbgType: String,
- ns: NCNlpSentence,
+ ns: Sentence,
contCache: Cache,
- elm: NCElement,
- res: Seq[NlpToken],
- allToksIdxs: Seq[Int],
+ elem: NCElement,
+ elemToks: Seq[NlpToken],
+ sliceToksIdxs: Seq[Int],
syn: Synonym,
parts: Seq[TokType] = Seq.empty)
: Unit = {
- val resIdxs = res.map(_.index)
+ val resIdxs = elemToks.map(_.index)
+ val resIdxsSorted = resIdxs.sorted
- if (U.isContinuous(resIdxs.sorted) && resIdxs == allToksIdxs)
- contCache(elm.getId) += allToksIdxs
+ if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
+ contCache(elem.getId) += sliceToksIdxs
- val ok = !alreadyMarked(ns, elm.getId, res, allToksIdxs)
+ val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)
- if (ok) {
- val direct = syn.isDirect && U.isIncreased(resIdxs)
-
- mark(ns, elm, res, direct, syn = Some(syn), parts)
- }
+ if (ok)
+ mark(ns, elem, elemToks, direct = syn.isDirect &&
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
if (DEEP_DEBUG)
logger.trace(
s"${if (ok) "Added" else "Skipped"} element [" +
- s"id=${elm.getId}, " +
+ s"id=${elem.getId}, " +
s"type=$dbgType, " +
- s"text='${res.map(_.origText).mkString(" ")}', " +
+ s"text='${elemToks.map(_.origText).mkString(" ")}', " +
s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
- s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")},
" +
+ s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",",
"]")}, " +
s"synonym=$syn" +
s"]"
)
}
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
+ override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String,
Serializable], parent: Span = null): Unit = {
require(isStarted)
startScopedSpan(
@@ -537,25 +534,15 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
// 2. IDL.
if (idlEnabled) {
- if (mdl.hasSparseSynonyms)
- for (s ← get(mdl.idlSynonyms, eId); comb ←
mkCombinations(ch, toks, idlCache.toSet))
- s.sparseMatch(comb.map(_.data), req) match
{
- case Some(res) ⇒
- val typ = if (s.sparse) "IDL
sparse" else "IDL continuous"
-
- add(typ, ns, contCache, e,
toTokens(res, ns), idxs, s, toParts(res, s))
-
- idlCache += comb
- case None ⇒ // No-op.
- }
- else {
+ // 2.1 Continuous.
+ if (!mdl.hasSparseSynonyms) {
var found = false
for (
s ← get(mdl.idlSynonyms, eId);
- comb ← mkCombinations(ch, toks,
idlCache.toSet);
- data = comb.map(_.data)
- if !found
+ comb ← mkCombinations(ch, toks,
idlCache.toSet);
+ data = comb.map(_.data)
+ if !found
)
if (s.isMatch(data, req)) {
add("IDL continuous", ns, contCache,
e, toks, idxs, s, toParts(data, s))
@@ -563,8 +550,20 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
idlCache += comb
found = true
- }
+ }
}
+ else
+ // 2.2 Sparse.
+ for (s ← get(mdl.idlSynonyms, eId); comb ←
mkCombinations(ch, toks, idlCache.toSet))
+ s.sparseMatch(comb.map(_.data), req) match
{
+ case Some(res) ⇒
+ val typ = if (s.sparse) "IDL
sparse" else "IDL continuous"
+
+ add(typ, ns, contCache, e,
toTokens(res, ns), idxs, s, toParts(res, s))
+
+ idlCache += comb
+ case None ⇒ // No-op.
+ }
}
}
}
@@ -588,17 +587,17 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
*
* @param elemId
* @param toks
- * @param allSortedSliceIdxs
+ * @param sliceToksIdxsSorted
*/
- private def alreadyMarked(ns: NCNlpSentence, elemId: String, toks:
Seq[NlpToken], allSortedSliceIdxs: Seq[Int]): Boolean = {
+ private def alreadyMarked(ns: Sentence, elemId: String, toks:
Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
lazy val toksIdxsSorted = toks.map(_.index).sorted
- allSortedSliceIdxs.map(ns).forall(_.exists(n ⇒ n.noteType == elemId &&
n.isContiguous)) ||
+ sliceToksIdxsSorted.map(ns).forall(_.exists(n ⇒ n.noteType == elemId
&& n.isContiguous)) ||
toks.exists(_.exists(n ⇒
n.noteType == elemId &&
(
(n.isContiguous &&
- (allSortedSliceIdxs.containsSlice(n.tokenIndexes) ||
n.tokenIndexes.containsSlice(toksIdxsSorted))
+ (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) ||
n.tokenIndexes.containsSlice(toksIdxsSorted))
)
||
(