This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new ddfef96 WIP.
ddfef96 is described below
commit ddfef9682bdf846364c27c5ff79200ce364f071b
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 21:33:30 2021 +0300
WIP.
---
.../nlpcraft/probe/mgrs/NCProbeIdlToken.scala | 3 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 44 +++++++++++-----------
2 files changed, 23 insertions(+), 24 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
index 37fea25..5da9808 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
@@ -52,8 +52,7 @@ case class NCProbeIdlToken(token: NCToken, word:
NCNlpSentenceToken) {
override def toString: String = {
val idxs = wordIndexes.mkString(",")
- if (isToken && token.getId != "nlpcraft:nlp") s"'$origText'
(${token.getId}) [$idxs]]"
- else s"'$origText' [$idxs]"
+ if (isToken && token.getId != "nlpcraft:nlp") s"'$origText'
(${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 69c85d3..0a7f900 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -42,13 +42,13 @@ import scala.jdk.CollectionConverters.{ListHasAsScala,
MapHasAsJava, MapHasAsSca
object NCModelEnricher extends NCProbeEnricher {
type TokType = (NCToken, NCSynonymChunkKind)
- object ComplexSeq {
- def apply(all: Seq[IdlToken]): ComplexSeq =
ComplexSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
+ object IdlTokensSeq {
+ def apply(all: Seq[IdlToken]): IdlTokensSeq =
IdlTokensSeq(all.filter(_.isToken), all.flatMap(_.wordIndexes).toSet)
}
- case class ComplexSeq(tokensComplexes: Seq[IdlToken], wordsIndexes:
Set[Int]) {
+ case class IdlTokensSeq(tokens: Seq[IdlToken], wordsIndexes: Set[Int]) {
private val (idxsSet: Set[Int], minIndex: Int, maxIndex: Int) = {
- val seq = tokensComplexes.flatMap(_.wordIndexes).distinct.sorted
+ val seq = tokens.flatMap(_.wordIndexes).distinct.sorted
(seq.toSet, seq.head, seq.last)
}
@@ -59,10 +59,10 @@ object NCModelEnricher extends NCProbeEnricher {
else
this.idxsSet.exists(idxsSet.contains)
- override def toString: String = tokensComplexes.mkString(" | ")
+ override def toString: String = tokens.mkString(" | ")
}
- case class ComplexHolder(complexesWords: Seq[IdlToken], complexes:
Seq[ComplexSeq])
+ case class IdlTokensHolder(tokens: Seq[IdlToken], seqs: Seq[IdlTokensSeq])
/**
*
@@ -288,8 +288,8 @@ object NCModelEnricher extends NCProbeEnricher {
*/
private def toParts(mdl: NCProbeModel, stvReqId: String, seq:
Seq[IdlToken], s: Synonym): Seq[TokType] =
seq.zip(s.map(_.kind)).flatMap {
- case (complex, kind) =>
- val t = if (complex.isToken) complex.token else
mkNlpToken(mdl, stvReqId, complex.word)
+ case (idlTok, kind) =>
+ val t = if (idlTok.isToken) idlTok.token else mkNlpToken(mdl,
stvReqId, idlTok.word)
Some(t -> kind)
}
@@ -329,10 +329,10 @@ object NCModelEnricher extends NCProbeEnricher {
* @param mdl
* @param ns
*/
- private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
- val complexesWords = ns.map(IdlToken(_))
+ private def mkHolder(mdl: NCProbeModel, ns: Sentence): IdlTokensHolder = {
+ val toks = ns.map(IdlToken(_))
- val complexes =
+ val seqs =
NCProbeVariants.convert(ns.srvReqId, mdl,
NCSentenceManager.collapse(mdl.model, ns.clone())).
map(_.asScala).
par.
@@ -353,13 +353,13 @@ object NCModelEnricher extends NCProbeEnricher {
if (t.wordIndexes.length == 1 ||
senPartComb.contains(t))
Seq(IdlToken(t))
else
- t.wordIndexes.map(complexesWords)
+ t.wordIndexes.map(toks)
)
// Drops without tokens (IDL part works with
tokens).
-
}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
+
}).filter(_.exists(_.isToken)).map(IdlTokensSeq(_)).distinct
).seq
- ComplexHolder(complexesWords, complexes)
+ IdlTokensHolder(toks, seqs)
}
/**
@@ -381,17 +381,17 @@ object NCModelEnricher extends NCProbeEnricher {
* @param h
* @param toks
*/
- private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken]):
Seq[Seq[IdlToken]] = {
+ private def mkCombinations(h: IdlTokensHolder, toks: Seq[NlpToken]):
Seq[Seq[IdlToken]] = {
val idxs = toks.flatMap(_.wordIndexes).toSet
- h.complexes.par.
- flatMap(complexSeq => {
- val rec =
complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))
+ h.seqs.par.
+ flatMap(seq => {
+ val rec =
seq.tokens.filter(_.wordIndexes.exists(idxs.contains))
// Drops without tokens (IDL part works with tokens).
if (rec.nonEmpty)
Some(rec ++
- (complexSeq.wordsIndexes.intersect(idxs) --
rec.flatMap(_.wordIndexes)).map(h.complexesWords)
+ (seq.wordsIndexes.intersect(idxs) --
rec.flatMap(_.wordIndexes)).map(h.tokens)
)
else
None
@@ -423,10 +423,10 @@ object NCModelEnricher extends NCProbeEnricher {
) { span =>
val req = NCRequestImpl(senMeta, ns.srvReqId)
- lazy val ch = mkComplexes(mdl, ns)
+ lazy val ch = mkHolder(mdl, ns)
lazy val variantsToks =
- ch.complexes.map(
- p => p.tokensComplexes.map(p => if (p.isToken) p.token
else mkNlpToken(mdl, ns.srvReqId, p.word))
+ ch.seqs.map(
+ p => p.tokens.map(p => if (p.isToken) p.token else
mkNlpToken(mdl, ns.srvReqId, p.word))
)
def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =