This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new e22f9fa WIP.
e22f9fa is described below
commit e22f9fa7c14c0a8be0e7d1ce28608a8e16741e1c
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Jun 30 18:37:45 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 10 +--
.../nlpcraft/model/NCContextWordModelConfig.java | 2 +-
.../apache/nlpcraft/model/NCModelFileAdapter.java | 2 +-
.../probe/mgrs/conn/NCConnectionManager.scala | 10 +--
.../probe/mgrs/deploy/NCDeployManager.scala | 4 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 6 +-
.../nlpcraft/server/mdo/NCProbeModelMdo.scala | 6 +-
.../nlp/enrichers/NCServerEnrichmentManager.scala | 6 +-
.../enrichers/ctxword/NCContextWordEnricher.scala | 96 +++++++++++-----------
.../nlpcraft/server/probe/NCProbeManager.scala | 12 +--
.../nlpcraft/server/query/NCQueryManager.scala | 2 +-
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 2 +-
12 files changed, 80 insertions(+), 78 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 613a7ce..6b93614 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.common.nlp
import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
+import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
import java.io.{Serializable => JSerializable}
import java.util.{Collections, List => JList}
@@ -41,7 +41,7 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* @param srvReqId Server request ID.
* @param text Normalized text.
* @param enabledBuiltInToks Enabled built-in tokens.
- * @param mlConfig Machine learning configuration. Optional.
+ * @param ctxWordConfig Machine learning configuration. Optional.
* @param tokens Initial buffer.
* @param firstProbePhase Processing phase flag.
* @param deletedNotes Deleted overridden notes with their tokens.
@@ -52,8 +52,8 @@ class NCNlpSentence(
val srvReqId: String,
val text: String,
val enabledBuiltInToks: Set[String],
- val mlConfig: Option[NCModelMLConfigMdo] = None,
- var mlData: Map[Int, Map[String, java.util.List[Double]]] = Map.empty,
+ val ctxWordConfig: Option[NCCtxWordConfigMdo] = None,
+ var ctxWordData: Map[Int, Map[String, java.util.List[Double]]] = Map.empty,
override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new
mutable.ArrayBuffer[NCNlpSentenceToken](32),
var firstProbePhase: Boolean = true,
private val deletedNotes: mutable.HashMap[NCNlpSentenceNote,
Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
@@ -71,7 +71,7 @@ class NCNlpSentence(
srvReqId = srvReqId,
text = text,
enabledBuiltInToks = enabledBuiltInToks,
- mlConfig = mlConfig,
+ ctxWordConfig = ctxWordConfig,
tokens = tokens.map(_.clone()),
deletedNotes = deletedNotes.map(p => p._1.clone() ->
p._2.map(_.clone())),
initNlpNotes = initNlpNotes,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
index fc5ac8d..7f856c4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
@@ -24,7 +24,7 @@ import java.util.Map;
// TODO:
public interface NCContextWordModelConfig extends Serializable {
- default List<String> getSamples() {
+ default List<String> getCorpus() {
return Collections.emptyList();
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index b25fc6f..fdf8b48 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -306,7 +306,7 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
return js != null?
new NCContextWordModelConfig() {
@Override
- public List<String> getSamples() {
+ public List<String> getCorpus() {
return js.getSamples() != null ?
Arrays.asList(js.getSamples()) : null;
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 19a0af3..9d731db 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -217,7 +217,7 @@ object NCConnectionManager extends NCService {
val (
values,
- samples,
+ corpus,
policies,
scores
): (
@@ -231,10 +231,10 @@ object NCConnectionManager extends NCService {
else {
val cfg =
mdl.getContextWordModelConfig.get()
- var samples = if (cfg.getSamples == null)
Seq.empty else cfg.getSamples.asScala
+ var corpus = if (cfg.getCorpus == null)
Seq.empty else cfg.getCorpus.asScala
if (cfg.useIntentsSamples)
- samples = samples ++
wrapper.samples.flatMap(_._2.flatMap(p => p))
+ corpus = corpus ++
wrapper.samples.flatMap(_._2.flatMap(p => p))
val values =
mdl.getElements.
@@ -256,7 +256,7 @@ object NCConnectionManager extends NCService {
(
values.asJava,
- samples.toSet.asJava,
+ corpus.toSet.asJava,
getData(_.getPolicy.toString),
getData(_.getScore)
)
@@ -272,7 +272,7 @@ object NCConnectionManager extends NCService {
mdl.getVersion,
new
util.HashSet[String](mdl.getEnabledBuiltInTokens),
values,
- samples,
+ corpus,
policies,
scores
)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index e56f883..39ea521 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -563,13 +563,13 @@ object NCDeployManager extends NCService {
if (ctxCfgOpt.isPresent) {
if (samples.isEmpty) {
- if (ctxCfgOpt.get.getSamples.isEmpty)
+ if (ctxCfgOpt.get.getCorpus.isEmpty)
// TODO:
throw new NCE("Model should contains samples for intents
or in context word config.")
}
else {
- val cnt = samples.size + ctxCfgOpt.get.getSamples.size()
+ val cnt = samples.size + ctxCfgOpt.get.getCorpus.size()
if (cnt > MAX_CTXWORD_SAMPLES_CNT)
// TODO: do we need print recommended value.?
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 042e86e..9e1ffdb 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -448,10 +448,12 @@ object NCModelEnricher extends NCProbeEnricher {
"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" ->
mdl.model.getId, "txt" -> ns.text
) { span =>
if (ns.firstProbePhase)
- for ((tokIdx, map) <- ns.mlData; (elemId, score) <- map)
+ for ((tokIdx, map) <- ns.ctxWordData; (elemId, score) <- map)
mark(
ns = ns,
- elem = mdl.elements.find(_._1 ==
elemId).getOrElse(throw new NCE(s"Element not found: $elemId"))._2,
+ elem =
+ mdl.elements.find(_._1 == elemId).
+ getOrElse(throw new NCE(s"Element not found:
$elemId"))._2,
toks = Seq(ns.tokens(tokIdx)),
direct = true,
metaOpt = Some(Map("scores" -> score))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index 6abce64..5ed0ae4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -22,11 +22,11 @@ import org.apache.nlpcraft.server.mdo.impl._
@NCMdoEntity(sql = false)
-case class NCModelMLConfigMdo(
+case class NCCtxWordConfigMdo(
@NCMdoField probeId: String,
@NCMdoField modelId: String,
@NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String,
/*Synonym*/Set[String]]],
- @NCMdoField samples: Set[String],
+ @NCMdoField corpus: Set[String],
@NCMdoField elements: Map[String /*Element ID*/,
NCContextWordElementConfig]
)
/**
@@ -38,7 +38,7 @@ case class NCProbeModelMdo(
@NCMdoField name: String,
@NCMdoField version: String,
@NCMdoField enabledBuiltInTokens: Set[String],
- @NCMdoField mlConfig: Option[NCModelMLConfigMdo]
+ @NCMdoField ctxWordConfig: Option[NCCtxWordConfigMdo]
) extends NCAnnotatedMdo[NCProbeModelMdo] {
override def hashCode(): Int = s"$id$name".hashCode()
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 097a3ca..022fad9 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -26,7 +26,7 @@ import org.apache.nlpcraft.common.pool.NCThreadPoolManager
import org.apache.nlpcraft.common.{NCService, _}
import org.apache.nlpcraft.server.ignite.NCIgniteHelpers._
import org.apache.nlpcraft.server.ignite.NCIgniteInstance
-import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
+import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher,
NCNlpServerManager}
import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher
import
org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher
@@ -100,7 +100,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
srvReqId: String,
normTxt: String,
enabledBuiltInToks: Set[String],
- mlConf: Option[NCModelMLConfigMdo],
+ mlConf: Option[NCCtxWordConfigMdo],
parent: Span = null
): NCNlpSentence =
startScopedSpan("process", parent, "srvReqId" -> srvReqId, "txt" ->
normTxt) { span =>
@@ -149,7 +149,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
srvReqId: String,
txt: String,
enabledBuiltInToks: Set[String],
- mlConf: Option[NCModelMLConfigMdo],
+ mlConf: Option[NCCtxWordConfigMdo],
parent: Span = null
): NCNlpSentence = {
startScopedSpan("enrichPipeline", parent, "srvReqId" -> srvReqId,
"txt" -> txt) { span =>
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
index bb8b997..4804902 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -23,7 +23,7 @@ import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank._
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService}
import org.apache.nlpcraft.model.NCContextWordElementConfig
-import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
+import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
import org.apache.nlpcraft.server.nlp.core.{NCNlpParser, NCNlpServerManager,
NCNlpWord}
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager,
NCSuggestionRequest, NCWordSuggestion}
@@ -66,7 +66,7 @@ object NCContextWordEnricher extends NCServerEnricher {
}
@volatile private var valuesStems: mutable.HashMap[ModelProbeKey,
ValuesHolder] = _
- @volatile private var samples: mutable.HashMap[ModelProbeKey, Map[/**
Element ID */String, ScoreHolder]] = _
+ @volatile private var corpuses: mutable.HashMap[ModelProbeKey, Map[/**
Element ID */String, ScoreHolder]] = _
@volatile private var parser: NCNlpParser = _
@@ -74,7 +74,7 @@ object NCContextWordEnricher extends NCServerEnricher {
ackStarting()
valuesStems = mutable.HashMap.empty
- samples = mutable.HashMap.empty
+ corpuses = mutable.HashMap.empty
parser = NCNlpServerManager.getParser
ackStarted()
@@ -86,7 +86,7 @@ object NCContextWordEnricher extends NCServerEnricher {
// TODO: clear model cache
parser = null
- samples = null
+ corpuses = null
valuesStems = null
ackStopped()
@@ -103,46 +103,46 @@ object NCContextWordEnricher extends NCServerEnricher {
/**
*
* @param nlpWords
- * @param sampleWords
- * @param sampleWordsStems
- * @param sampleWordsNorm
+ * @param corpusWords
+ * @param corpusWordsStems
+ * @param corpusWordsNorm
* @param elemValuesSyns
* @param elemValuesSynsStems
* @param elemValuesSynsNorm
* @return
*/
- private def parseSample(
+ private def parseCorpus(
nlpWords: Seq[Seq[NCNlpWord]],
- sampleWords: Seq[Seq[String]],
- sampleWordsStems: Seq[Seq[String]],
- sampleWordsNorm: Seq[Seq[String]],
+ corpusWords: Seq[Seq[String]],
+ corpusWordsStems: Seq[Seq[String]],
+ corpusWordsNorm: Seq[Seq[String]],
elemValuesSyns: Set[String],
elemValuesSynsStems: Set[String],
elemValuesSynsNorm: Set[String]
): Iterable[NCSuggestionRequest] = {
- require(nlpWords.size == sampleWords.size)
- require(sampleWords.size == sampleWordsStems.size)
- require(sampleWords.size == sampleWordsNorm.size)
+ require(nlpWords.size == corpusWords.size)
+ require(corpusWords.size == corpusWordsStems.size)
+ require(corpusWords.size == corpusWordsNorm.size)
require(elemValuesSyns.size == elemValuesSynsStems.size)
require(elemValuesSyns.size == elemValuesSynsNorm.size)
- sampleWordsStems.
- zip(sampleWords).
- zip(sampleWordsNorm).
+ corpusWordsStems.
+ zip(corpusWords).
+ zip(corpusWordsNorm).
zip(nlpWords).
flatMap {
- case (((sampleWordsStem, sampleWords), sampleWordsNorm),
nlpWords) =>
- def getIndexes(elemValuesData: Set[String], sampleData:
Seq[String]): Set[Int] =
+ case (((corpusWordsStem, corpusWords), corpusWordsNorm),
nlpWords) =>
+ def getIndexes(elemValuesData: Set[String], corpusData:
Seq[String]): Set[Int] =
elemValuesData.flatMap(vd => {
- val i = sampleData.indexOf(vd)
+ val i = corpusData.indexOf(vd)
if (i >= 0) Some(i) else None
})
- val idxs = getIndexes(elemValuesSynsStems, sampleWordsStem) ++
getIndexes(elemValuesSynsNorm, sampleWordsNorm)
+ val idxs = getIndexes(elemValuesSynsStems, corpusWordsStem) ++
getIndexes(elemValuesSynsNorm, corpusWordsNorm)
def mkRequest(idx: Int, syn: String): NCSuggestionRequest = {
- var newSen = substitute(sampleWords, syn, idx)
+ var newSen = substitute(corpusWords, syn, idx)
val nlpWordsNew = parser.parse(newSen.mkString(" "))
@@ -154,14 +154,14 @@ object NCContextWordEnricher extends NCServerEnricher {
if (NOUNS_POS_SINGULAR.contains(pos) &&
NOUNS_POS_PLURALS.contains(posNew)) {
println(s"newSen1=$newSen")
- newSen = substitute(sampleWords,
CONVERTER.depluralize(syn), idx)
+ newSen = substitute(corpusWords,
CONVERTER.depluralize(syn), idx)
println(s"newSen2=$newSen")
}
else if (NOUNS_POS_PLURALS.contains(pos) &&
NOUNS_POS_SINGULAR.contains(posNew)) {
println(s"newSen1=$newSen")
- newSen = substitute(sampleWords,
CONVERTER.pluralize(syn), idx)
+ newSen = substitute(corpusWords,
CONVERTER.pluralize(syn), idx)
println(s"newSen3=$newSen")
}
@@ -188,13 +188,13 @@ object NCContextWordEnricher extends NCServerEnricher {
* @param key
* @return
*/
- private def getSamplesData(cfg: NCModelMLConfigMdo, key: ModelProbeKey):
Map[/** Element ID */String, ScoreHolder] =
- samples.synchronized { samples.get(key) } match {
+ private def getSamplesData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey):
Map[/** Element ID */String, ScoreHolder] =
+ corpuses.synchronized { corpuses.get(key) } match {
case Some(cache) => cache
case None =>
val res = askSamples(cfg)
- samples.synchronized { samples += key -> res }
+ corpuses.synchronized { corpuses += key -> res }
res
}
@@ -205,7 +205,7 @@ object NCContextWordEnricher extends NCServerEnricher {
* @param key
* @return
*/
- private def getValuesData(cfg: NCModelMLConfigMdo, key: ModelProbeKey):
ValuesHolder =
+ private def getValuesData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey):
ValuesHolder =
valuesStems.synchronized { valuesStems.get(key) } match {
case Some(cache) => cache
case None =>
@@ -251,13 +251,13 @@ object NCContextWordEnricher extends NCServerEnricher {
* @return
*/
@throws[NCE]
- private def askSamples(cfg: NCModelMLConfigMdo): Map[/** Element ID
*/String, ScoreHolder] = {
- val samplesSeq = cfg.samples.toSeq
- val sampleWords = samplesSeq.map(parser.parse(_).map(_.word))
- val nlpWords = samplesSeq.map(s => parser.parse(s))
+ private def askSamples(cfg: NCCtxWordConfigMdo): Map[/** Element ID
*/String, ScoreHolder] = {
+ val corpusSeq = cfg.corpus.toSeq
+ val corpusWords = corpusSeq.map(parser.parse(_).map(_.word))
+ val nlpWords = corpusSeq.map(s => parser.parse(s))
- val sampleWordsStems = sampleWords.map(_.map(stem))
- val sampleWordsNorm = sampleWords.map(_.map(_.toLowerCase))
+ val corpusWordsStems = corpusWords.map(_.map(stem))
+ val corpusWordsNorm = corpusWords.map(_.map(_.toLowerCase))
val recs: Map[/** Element ID */String, Seq[NCSuggestionRequest]] =
(
@@ -265,11 +265,11 @@ object NCContextWordEnricher extends NCServerEnricher {
(elemId, elemValues) <- cfg.values.toSeq;
// Uses single words synonyms only.
elemValuesSyns =
elemValues.flatMap(_._2).toSet.filter(!_.contains(' '));
- suggReq <- parseSample(
+ suggReq <- parseCorpus(
nlpWords = nlpWords,
- sampleWords = sampleWords,
- sampleWordsStems = sampleWordsStems,
- sampleWordsNorm = sampleWordsNorm,
+ corpusWords = corpusWords,
+ corpusWordsStems = corpusWordsStems,
+ corpusWordsNorm = corpusWordsNorm,
elemValuesSyns = elemValuesSyns,
elemValuesSynsStems = elemValuesSyns.map(stem),
elemValuesSynsNorm = elemValuesSyns.map(_.toLowerCase)
@@ -346,7 +346,7 @@ object NCContextWordEnricher extends NCServerEnricher {
}
override def enrich(ns: NCNlpSentence, parent: Span): Unit =
- ns.mlConfig match {
+ ns.ctxWordConfig match {
case Some(cfg) =>
val detected = mutable.HashMap.empty[NCNlpSentenceToken,
mutable.HashSet[ElementScore]]
@@ -425,20 +425,20 @@ object NCContextWordEnricher extends NCServerEnricher {
// separated by space, and Suggestion Manager uses
space tokenizer.
(sugg, req) <- resps;
senScore = normalizeScore(sugg.score);
- (elemId, mdlSamplesSuggs) <- mdlSamples;
+ (elemId, mdlCorpusSuggs) <- mdlSamples;
elemScore = cfg.elements(elemId);
- sampleScore =
+ corpusScore =
Seq(
-
mdlSamplesSuggs.stems.getOrElse(stem(sugg.word), EXCL_MIN_SCORE),
-
mdlSamplesSuggs.normal.getOrElse(sugg.word.toLowerCase, EXCL_MIN_SCORE),
-
mdlSamplesSuggs.lemma.getOrElse(getSuggestionLemma(req, sugg), EXCL_MIN_SCORE)
+
mdlCorpusSuggs.stems.getOrElse(stem(sugg.word), EXCL_MIN_SCORE),
+
mdlCorpusSuggs.normal.getOrElse(sugg.word.toLowerCase, EXCL_MIN_SCORE),
+
mdlCorpusSuggs.lemma.getOrElse(getSuggestionLemma(req, sugg), EXCL_MIN_SCORE)
).max
- if isMatched(elemScore, sampleScore, senScore)
+ if isMatched(elemScore, corpusScore, senScore)
)
- add(ns.tokens(req.index), elemId, senScore,
sampleScore)
+ add(ns.tokens(req.index), elemId, senScore,
corpusScore)
}
- ns.mlData = detected.map {
+ ns.ctxWordData = detected.map {
case (tok, scores) => tok.index -> scores.map(p =>
p.elementId -> p.scores.asJava).toMap
}.toMap
@@ -448,6 +448,6 @@ object NCContextWordEnricher extends NCServerEnricher {
def onDisconnectProbe(probeId: String): Unit = {
valuesStems.synchronized { valuesStems --=
valuesStems.keySet.filter(_.probeId == probeId) }
- samples.synchronized { samples --= samples.keySet.filter(_.probeId ==
probeId) }
+ corpuses.synchronized { corpuses --= corpuses.keySet.filter(_.probeId
== probeId) }
}
}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 6488bb1..7551f14 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -632,7 +632,7 @@ object NCProbeManager extends NCService {
mdlVer,
enabledBuiltInToks,
values,
- samples,
+ corpus,
policies,
scores
) =>
@@ -641,8 +641,8 @@ object NCProbeManager extends NCService {
require(mdlVer != null)
require(enabledBuiltInToks != null)
require(
- values.isEmpty && samples.isEmpty &&
policies.isEmpty ||
- !values.isEmpty && !samples.isEmpty &&
!policies.isEmpty
+ values.isEmpty && corpus.isEmpty &&
policies.isEmpty ||
+ !values.isEmpty && !corpus.isEmpty &&
!policies.isEmpty
)
require(policies.size() == scores.size())
@@ -651,12 +651,12 @@ object NCProbeManager extends NCService {
name = mdlName,
version = mdlVer,
enabledBuiltInTokens =
enabledBuiltInToks.asScala.toSet,
- mlConfig =
+ ctxWordConfig =
if (!values.isEmpty) {
val scoresMap = scores.asScala
Some(
- NCModelMLConfigMdo(
+ NCCtxWordConfigMdo(
probeId = probeId,
modelId = mdlId,
values =
values.asScala.map {
@@ -666,7 +666,7 @@ object NCProbeManager extends NCService {
case
(value, syns) => value -> syns.asScala.toSet
}.toMap
}.toMap,
- samples =
samples.asScala.toSet,
+ corpus =
corpus.asScala.toSet,
policies.asScala.map {
case (elemId, policy) =>
elemId -> new
NCContextWordElementConfig() {
override def
getPolicy: NCContextWordElementPolicy =
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
index 32492b9..e9d6b43 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
@@ -288,7 +288,7 @@ object NCQueryManager extends NCService with
NCIgniteInstance with NCOpenCensusS
company,
mdlId,
txt0,
- NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0,
mdl.enabledBuiltInTokens, mdl.mlConfig),
+ NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0,
mdl.enabledBuiltInTokens, mdl.ctxWordConfig),
usrAgent,
rmtAddr,
data,
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index ee50a2a..886e726 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -66,7 +66,7 @@ class NCContextWordSpecModel extends NCModel {
override def useIntentsSamples(): Boolean = false
- override def getSamples: util.List[String] =
+ override def getCorpus: util.List[String] =
Seq(
"I like drive my new BMW",
"BMW has the best engine",