This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new b0f663a WIP.
b0f663a is described below
commit b0f663aebabd689ab02577dd086e1a69bc5ab350
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Jun 25 19:08:37 2021 +0300
WIP.
---
.../scala/org/apache/nlpcraft/model/NCElement.java | 7 +-
.../apache/nlpcraft/model/NCModelFileAdapter.java | 10 +-
.../org/apache/nlpcraft/model/NCModelView.java | 6 +
.../nlpcraft/model/impl/json/NCElementJson.java | 10 +-
.../nlpcraft/model/impl/json/NCModelJson.java | 8 +
.../probe/mgrs/conn/NCConnectionManager.scala | 19 +-
.../probe/mgrs/deploy/NCDeployManager.scala | 43 +++-
.../nlpcraft/server/mdo/NCProbeModelMdo.scala | 3 +-
.../enrichers/ctxword/NCContextWordEnricher.scala | 217 ++++++++++++---------
.../nlpcraft/server/probe/NCProbeManager.scala | 22 ++-
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 28 ++-
11 files changed, 243 insertions(+), 130 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index d2e3514..774484f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -383,8 +383,9 @@ public interface NCElement extends NCMetadata, Serializable
{
return Optional.empty();
}
- // TODO:
- default boolean isContextWordSupport() {
- return false;
+ // TODO: 0 .. 1
+ // Empty - means disabled.
+ default Optional<Double> getContextWordStrictLevel() {
+ return Optional.empty();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index af075b6..3310252 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -358,17 +358,15 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
- public boolean isContextWordSupport() {
- return nvlMandatory(js.isContextWordSupport(),
NCElement.super.isContextWordSupport());
+ public Optional<Double> getContextWordStrictLevel() {
+ return js.getContextWordStrictLevel() != null ?
+ Optional.of(js.getContextWordStrictLevel()) :
+
Optional.ofNullable(proxy.getContextWordStrictLevel());
}
private<T> Optional<T> nvlOpt(T t, T dflt) {
return Optional.of(t != null ? t : dflt);
}
-
- private<T> T nvlMandatory(T t, T dflt) {
- return t != null ? t : dflt;
- }
};
}).collect(Collectors.toSet());
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index c82ddd2..f4ba31f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -1218,4 +1218,10 @@ public interface NCModelView extends NCMetadata {
default Map<String, Set<String>> getRestrictedCombinations() {
return Collections.emptyMap();
}
+
+ // TODO: 0 .. 1
+ // Empty - means disabled. default. Can be overridden by each elements.
+ default Optional<Double> getContextWordStrictLevel() {
+ return Optional.empty();
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index 2dab47e..992b1c1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -37,7 +37,7 @@ public class NCElementJson {
// Can be null.
private Boolean isSparse;
// Can be null.
- private Boolean contextWordSupport;
+ private Double contextWordStrictLevel;
public String getParentId() {
return parentId;
@@ -99,10 +99,10 @@ public class NCElementJson {
public void setSparse(Boolean sparse) {
isSparse = sparse;
}
- public Boolean isContextWordSupport() {
- return contextWordSupport;
+ public Double getContextWordStrictLevel() {
+ return contextWordStrictLevel;
}
- public void setContextWordSupport(Boolean contextWordSupport) {
- this.contextWordSupport = contextWordSupport;
+ public void setContextWordStrictLevel(Double contextWordStrictLevel) {
+ this.contextWordStrictLevel = contextWordStrictLevel;
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index d2459d3..599c6c4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -40,6 +40,8 @@ public class NCModelJson {
private String[] intents;
private String[] parsers;
private Map<String, String[]> restrictedCombinations;
+ // Can be null.
+ private Double contextWordStrictLevel;
private int maxUnknownWords = DFLT_MAX_UNKNOWN_WORDS;
private int maxFreeWords = DFLT_MAX_FREE_WORDS;
@@ -271,4 +273,10 @@ public class NCModelJson {
return restrictedCombinations;
}
public void setRestrictedCombinations(Map<String, String[]>
restrictedCombinations) { this.restrictedCombinations = restrictedCombinations;}
+ public Double getContextWordStrictLevel() {
+ return contextWordStrictLevel;
+ }
+ public void setContextWordStrictLevel(Double contextWordStrictLevel) {
+ this.contextWordStrictLevel = contextWordStrictLevel;
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index c712ed7..df9378c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -30,7 +30,7 @@ import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
import java.io.{EOFException, IOException, InterruptedIOException}
import java.net.{InetAddress, NetworkInterface}
-import java.util
+import java.{lang, util}
import java.util.concurrent.CountDownLatch
import java.util.{Collections, Properties, TimeZone}
import scala.collection.mutable
@@ -214,18 +214,19 @@ object NCConnectionManager extends NCService {
NCModelManager.getAllModels().map(wrapper => {
val mdl = wrapper.model
- val ctxWordElems =
mdl.getElements.asScala.filter(_.isContextWordSupport)
+ val ctxWordElems =
mdl.getElements.asScala.filter(_.getContextWordStrictLevel.isPresent)
- // TODO: validate: too many values, examples.
missed them.
val (
values,
- samples
+ samples,
+ levels
): (
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
- java.util.Set[String]
+ java.util.Set[String],
+ java.util.Map[String, lang.Double]
) =
if (ctxWordElems.isEmpty)
- (Collections.emptyMap(),
Collections.emptySet())
+ (Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap())
else {
(
ctxWordElems.map(e =>
@@ -236,7 +237,8 @@ object NCConnectionManager extends NCService {
set
}).toMap.asJava
).toMap.asJava,
- wrapper.samples.flatMap(_._2.flatMap(p
=> p)).asJava
+ wrapper.samples.flatMap(_._2.flatMap(p
=> p)).asJava,
+ ctxWordElems.map(e => e.getId ->
e.getContextWordStrictLevel.get()).toMap.asJava
)
}
@@ -250,7 +252,8 @@ object NCConnectionManager extends NCService {
mdl.getVersion,
new
util.HashSet[String](mdl.getEnabledBuiltInTokens),
values,
- samples
+ samples,
+ levels
)
})
), cryptoKey)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 0dd5ffe..6cd7cdb 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -83,6 +83,8 @@ object NCDeployManager extends NCService {
private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
+ private final val MAX_CTXWORD_VALS_CNT = 1000
+
@volatile private var data: mutable.ArrayBuffer[NCProbeModel] = _
@volatile private var mdlFactory: NCModelFactory = _
@@ -417,6 +419,23 @@ object NCDeployManager extends NCService {
s"max=$maxCnt" +
s"]")
+ // Validates context words parameters.
+ val ctxWordElems =
mdl.getElements.asScala.filter(_.getContextWordStrictLevel.isPresent)
+
+ if (ctxWordElems.nonEmpty) {
+ val valsSynsCnt =
ctxWordElems.toSeq.map(_.getValues.asScala.map(_.getSynonyms.size()).sum).sum
+
+ if (valsSynsCnt > MAX_CTXWORD_VALS_CNT) {
+ // TODO: do we need print recommended value.?
+ logger.warn(
+ s"Too many values synonyms detected for context words
elements [" +
+ s"mdlId=$mdlId, " +
+ s"cnt=$valsSynsCnt," +
+ s"recommended=$MAX_CTXWORD_VALS_CNT" +
+ s"]")
+ }
+ }
+
// Discard value loaders.
for (elm <- mdl.getElements.asScala)
elm.getValueLoader.ifPresent(_.onDiscard())
@@ -519,7 +538,7 @@ object NCDeployManager extends NCService {
exclStopWordsStems = exclStopWords,
suspWordsStems = suspWords,
elements = mdl.getElements.asScala.map(elm => (elm.getId,
elm)).toMap,
- samples = scanSamples(mdl)
+ samples = scanSamples(mdl, hasCtxWordElems = ctxWordElems.nonEmpty)
)
}
@@ -848,6 +867,19 @@ object NCDeployManager extends NCService {
s"mdlId=${mdl.getId}, " +
s"elmId=$elmId" +
s"]")
+
+ if (elm.getContextWordStrictLevel.isPresent) {
+ val level = elm.getContextWordStrictLevel.get()
+
+ if (level < 0 || level > 1) {
+ // TODO:
+ throw new NCE(s"Model element context word strict level is
out of range [" +
+ s"mdlId=${mdl.getId}, " +
+ s"elmId=$elmId, " +
+ s"level=$level" +
+ s"]")
+ }
+ }
}
/**
@@ -1600,9 +1632,10 @@ object NCDeployManager extends NCService {
* Scans given model for intent samples.
*
* @param mdl Model to scan.
+ * @param hasCtxWordElems Flag.
*/
@throws[NCE]
- private def scanSamples(mdl: NCModel): Set[Sample] = {
+ private def scanSamples(mdl: NCModel, hasCtxWordElems: Boolean):
Set[Sample] = {
val mdlId = mdl.getId
val samples = mutable.Buffer.empty[Sample]
@@ -1674,6 +1707,12 @@ object NCDeployManager extends NCService {
}
}
+ if (hasCtxWordElems && samples.isEmpty) {
+ // TODO:
+ // TODO: we don't check samples count, and their validity
(provided samples can for another elements.)
+ throw new NCE(s"Model with context word elements should contains
samples [id=${mdl.getId}]")
+ }
+
samples.toSet
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index 2d0bf58..adecd40 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -25,7 +25,8 @@ case class NCModelMLConfigMdo(
@NCMdoField probeId: String,
@NCMdoField modelId: String,
@NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String,
/*Synonym*/Set[String]]],
- @NCMdoField samples: Set[String]
+ @NCMdoField samples: Set[String],
+ @NCMdoField levels: Map[String /*Element ID*/, Double]
)
/**
* Probe model MDO.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
index 55fae72..0845723 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -18,9 +18,9 @@
package org.apache.nlpcraft.server.nlp.enrichers.ctxword
import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.nlp.NCNlpSentence
import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer.stem
-import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{NCE, NCService, U}
import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager,
NCSuggestionRequest, NCWordSuggestion}
@@ -33,12 +33,14 @@ import scala.concurrent.duration.Duration
* ContextWord enricher.
*/
object NCContextWordEnricher extends NCServerEnricher {
+ private final val MAX_CTXWORD_SCORE = 2
+ private final val UNEXISTS_LOW_SCORE = -1.0
+
private case class ModelProbeKey(probeId: String, modelId: String)
private case class WordIndex(word: String, index: Int)
private case class ElementValue(elementId: String, value: String)
private case class ElementScore(elementId: String, score: Double)
-
- private type ElementStemScore = Map[/** Element ID */ String, Map[/** Stem
*/ String, /** Score */ Double]]
+ private type ElementStemScore = Map[/** Element ID */String, Map[/** Stem
*/String,/** Score */Double]]
@volatile private var samples: mutable.HashMap[ModelProbeKey,
ElementStemScore] = _
@@ -58,8 +60,29 @@ object NCContextWordEnricher extends NCServerEnricher {
ackStopped()
}
- private def spaceTokenize(s: String): Seq[String] = s.split("
").map(_.strip()).filter(_.nonEmpty)
-
+ /**
+ *
+ * @param s
+ * @return
+ */
+ private def spaceTokenize(s: String): Seq[String] = U.splitTrimFilter(s, "
")
+
+ /**
+ *
+ * @param reqs
+ * @return
+ */
+ private def getSentenceData(reqs: Seq[NCSuggestionRequest]):
Map[NCWordSuggestion, Int] =
+ Await.result(NCSuggestSynonymManager.suggestWords(reqs), Duration.Inf).
+ flatMap { case (req, suggs) => suggs.map(_ -> req.index) }
+
+ /**
+ *
+ * @param sampleWords
+ * @param sampleMap
+ * @param synsStem
+ * @return
+ */
private def parseSample(
sampleWords: Seq[String],
sampleMap: Map[String, WordIndex],
@@ -82,17 +105,37 @@ object NCContextWordEnricher extends NCServerEnricher {
)
}
- private def getSamples(cfg: NCModelMLConfigMdo, key: ModelProbeKey):
ElementStemScore =
+ /**
+ *
+ * @param d
+ * @return
+ */
+ private def normalizeScore(d: Double): Double = d / MAX_CTXWORD_SCORE
+
+ /**
+ *
+ * @param cfg
+ * @param key
+ * @return
+ */
+ private def getSamplesData(cfg: NCModelMLConfigMdo, key: ModelProbeKey):
ElementStemScore =
samples.synchronized { samples.get(key) } match {
case Some(cache) => cache
case None =>
val res = askSamples(cfg)
- samples.synchronized { samples += key -> res }
+ samples.synchronized {
+ samples += key -> res
+ }
res
}
+ /**
+ *
+ * @param cfg
+ * @return
+ */
@throws[NCE]
private def askSamples(cfg: NCModelMLConfigMdo): ElementStemScore = {
case class Record(request: NCSuggestionRequest, value: String)
@@ -104,110 +147,100 @@ object NCContextWordEnricher extends NCServerEnricher {
synsStem = syns.map(stem);
sample <- cfg.samples;
sampleWords = spaceTokenize(sample);
- samplesMap = sampleWords.zipWithIndex.map { case (w, idx) =>
stem(w) -> WordIndex(w, idx)}.toMap;
+ samplesMap = sampleWords.zipWithIndex.map { case (w, idx) =>
stem(w) -> WordIndex(w, idx) }.toMap;
sugg <- parseSample(sampleWords, samplesMap, synsStem)
)
- yield (elemId, Record(sugg, value))).groupBy { case (elemId,
_) => elemId }.
+ yield (elemId, Record(sugg, value))).groupBy { case (elemId, _) =>
elemId }.
map { case (elemId, map) => elemId -> map.values.toSeq }
- val map: Map[NCSuggestionRequest, ElementValue] =
- recs.flatMap { case (elemId, recs) => recs.map(p => p.request ->
ElementValue(elemId, p.value)) }
+ val map = recs.flatMap { case (elemId, recs) => recs.map(p =>
p.request -> ElementValue(elemId, p.value)) }
- // TODO: sync
val res =
- (
- if (recs.nonEmpty)
-
Await.result(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.request)).toSeq),
Duration.Inf)
- else
- Map.empty
- ).map {
- case (req, suggs) =>
+ if (recs.nonEmpty)
+ Await.result(
+
NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.request)).toSeq),
Duration.Inf
+ ).
+ map { case (req, suggs) =>
val d = map(req)
- d.elementId -> suggs.groupBy(p =>stem(p.word)).map { case
(stem, map) => stem -> map.map(_.score).max }
- }
-
- // TODO:
- println("!!!samples")
- res.foreach(s => {
- println(s"elemID=${s._1}")
-
- println(s._2.mkString("\n") + "\n")
-
- })
+ d.elementId -> suggs.groupBy(p => stem(p.word)).map { case
(stem, suggs) =>
+ stem -> normalizeScore(suggs.map(_.score).max)
+ }
+ }
+ else
+ Map.empty[String, Map[String, Double]]
+
+ // // TODO:
+// println("!!!samples")
+// res.foreach(s => {
+// println(s"elemID=${s._1}")
+//
+// println(s._2.toSeq.sortBy(-_._2).mkString("\n") + "\n")
+//
+// })
res
}
- @throws[NCE]
- private def askSentence(ns: NCNlpSentence, samples: ElementStemScore):
Map[Int, Set[ElementScore]] = {
- val idxs = ns.tokens.flatMap(p => if (p.pos.startsWith("N"))
Some(p.index) else None).toSeq
- val reqs = idxs.map(idx => NCSuggestionRequest(ns.text, idx))
-
- //
-
- // TODO: tokenization.
- // TODO: sync.
- val suggs: Map[NCWordSuggestion, NCSuggestionRequest] =
- Await.
- result(NCSuggestSynonymManager.suggestWords(reqs),
Duration.Inf).
- flatMap { case (req, suggs) => suggs.map(_ -> req) }
-
- // TODO:
- println("suggs="+suggs.mkString("\n"))
-
-
- suggs.map { case(sugg, req) => (stem(sugg.word), sugg.score, req) }.
- flatMap { case (stem, suggScore, req) =>
- samples.map { case (elemId, map) =>
- // TODO: contains ? check key (and use score)
-
- if (map.contains(stem)) {
- // TODO:
- println(s"!!!FOUND BY suggStem=$stem,
index=${req.index}, suggScore=${suggScore}, elem=$elemId, map=$map")
-
- map.map { case (_, score) => (ElementScore(elemId,
score), req.index) }
+ override def enrich(ns: NCNlpSentence, parent: Span): Unit =
+ ns.mlConfig match {
+ case Some(cfg) =>
+ val detected = mutable.HashMap.empty[NCNlpSentenceToken,
mutable.HashSet[ElementScore]]
+
+ def add(nounTok: NCNlpSentenceToken, elemId: String, score:
Double): Unit = {
+ val tokElems = detected.getOrElseUpdate(nounTok,
mutable.HashSet.empty[ElementScore])
+
+ tokElems.find(_.elementId == elemId) match {
+ case Some(saved) =>
+ if (score > saved.score) {
+ tokElems -= saved
+ tokElems += ElementScore(elemId, score)
+ }
+ case None => tokElems += ElementScore(elemId, score)
}
- else
- Seq.empty
}
- }.
- flatten.
- groupBy { case (_, idx) => idx }.
- map { case (idx, map) =>
- idx -> map.
- map { case (score, _) => score }.
- groupBy(_.elementId).
- map { case (_, scores) => scores.toSeq.minBy(-_.score)
}.toSet
- }
- }
- override def enrich(ns: NCNlpSentence, parent: Span): Unit = {
- ns.mlConfig match {
- case Some(cfg) =>
- val nouns = ns.tokens.filter(_.pos.startsWith("N"))
+ val nounToks = ns.tokens.filter(_.pos.startsWith("N"))
- if (nouns.nonEmpty) {
+ if (nounToks.nonEmpty) {
val key = ModelProbeKey(cfg.probeId, cfg.modelId)
- val samples = getSamples(cfg, key)
-
-
- for (n <- nouns; (elemId, stems) <- getSamples(cfg, key)
if stems.contains(n.stem))
- println("EX FOUND elemId=" + elemId + ", n=" + n.stem
+ ", stem=" + stems.toSeq.sortBy(-_._2))
-
- val sens = askSentence(ns, samples)
-
- println("!!!sens")
- sens.foreach(s => {
- println(s"INDEX=${s._1}")
-
- println(s._2.mkString("\n") + "\n")
-
- })
+ val mdlSamples = getSamplesData(cfg, key)
+ for (
+ nounTok <- nounToks;
+ (elemId, suggs) <- mdlSamples;
+ score = suggs.getOrElse(nounTok.stem,
UNEXISTS_LOW_SCORE)
+ if score >= cfg.levels(elemId)
+ )
+ add(nounTok, elemId, score)
+
+ println("detected1="+detected.map(p => p._1.origText ->
p._2))
+
+ val idxs = ns.tokens.flatMap(p => if
(p.pos.startsWith("N")) Some(p.index) else None).toSeq
+
+ val reqs = idxs.map(idx =>
NCSuggestionRequest(ns.tokens.map(_.origText).mkString(" "), idx))
+
+ for (
+ // Token index (tokIdx) should be correct because
request created from original words,
+ // separated by space, and Suggestion Manager uses
space tokenizer.
+ (sugg, tokIdx) <- getSentenceData(reqs);
+ suggStem = stem(sugg.word);
+ suggScore = normalizeScore(sugg.score);
+ (elemId, mdlSamplesSuggs) <- mdlSamples
+ if mdlSamplesSuggs.contains(suggStem);
+ elemScore = cfg.levels(elemId);
+ sampleScore = mdlSamplesSuggs(suggStem);
+ avg = (sampleScore + suggScore) / 2
+ if avg >= elemScore
+ ) {
+ println(s"elemId=$elemId,
word=${ns.tokens(tokIdx).origText}, sampleScore=$sampleScore,
suggScore=$suggScore, avg=$avg, suggStem=$suggStem, ")
+
+ add(ns.tokens(tokIdx), elemId, avg)
+ }
}
+ println("detected2="+detected.map(p => p._1.origText -> p._2))
+
case None => // No-op.
}
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 5e11883..8870ffe 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -615,7 +615,8 @@ object NCProbeManager extends NCService {
String,
java.util.Set[String],
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
- java.util.Set[String]
+ java.util.Set[String],
+ java.util.Map[String, Double]
)]]("PROBE_MODELS").
map {
case (
@@ -624,13 +625,17 @@ object NCProbeManager extends NCService {
mdlVer,
enabledBuiltInToks,
values,
- samples
+ samples,
+ levels
) =>
require(mdlId != null)
require(mdlName != null)
require(mdlVer != null)
require(enabledBuiltInToks != null)
- require(values.isEmpty && samples.isEmpty ||
!values.isEmpty && !samples.isEmpty)
+ require(
+ values.isEmpty && samples.isEmpty &&
levels.isEmpty ||
+ !values.isEmpty && !samples.isEmpty &&
!levels.isEmpty
+ )
NCProbeModelMdo(
id = mdlId,
@@ -643,8 +648,15 @@ object NCProbeManager extends NCService {
NCModelMLConfigMdo(
probeId = probeId,
modelId = mdlId,
- values =
values.asScala.map(p => p._1 -> p._2.asScala.map(p => p._1 ->
p._2.asScala.toSet).toMap).toMap,
- samples =
samples.asScala.toSet
+ values =
values.asScala.map {
+ case (elemId, map) =>
+ elemId ->
+
map.asScala.map {
+ case
(value, syns) => value -> syns.asScala.toSet
+ }.toMap
+ }.toMap,
+ samples =
samples.asScala.toSet,
+ levels.asScala.toMap
)
)
else
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index 6df491d..a368a35 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -21,7 +21,8 @@ import org.apache.nlpcraft.model.{NCElement, NCIntent,
NCIntentSample, NCIntentT
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
-import java.util
+import java.{lang, util}
+import java.util.Optional
import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
/**
@@ -33,10 +34,14 @@ class NCContextWordSpecModel extends NCModel {
override def getSynonyms: util.List[String] = (Seq(name) ++
syns).asJava
}
- case class Elem(id: String, values: NCValue*) extends NCElement {
+ case class Elem(id: String, level: Double, values: NCValue*) extends
NCElement {
override def getId: String = id
override def getValues: util.List[NCValue] = values.asJava
- override def isContextWordSupport: Boolean = true
+ override def getContextWordStrictLevel: Optional[lang.Double] =
Optional.of(level)
+ }
+
+ object Elem {
+ def apply(id: String, values: NCValue*): Elem = new Elem(id, 0.4,
values: _*)
}
override def getId: String = this.getClass.getSimpleName
@@ -60,9 +65,11 @@ class NCContextWordSpecModel extends NCModel {
"BMW has the best engine",
"Luxury cars like Mercedes and BMW are prime targets",
"BMW will install side air bags up front",
+
"A wild cat is very dangerous",
"A fox eats hens",
"The fox was already in your chicken house",
+
"What is the local temperature ?",
"This is the first day of heavy rain",
"It is the beautiful day, the sun is shining ",
@@ -70,9 +77,9 @@ class NCContextWordSpecModel extends NCModel {
)
@NCIntent(
"intent=classification " +
- "term(carBrands)~{tok_id() == 'class:carBrand'}* " +
- "term(animals)~{tok_id() == 'class:animal'}* " +
- "term(weathers)~{tok_id() == 'class:weather'}* "
+ "term(carBrands)~{tok_id() == 'class:carBrand'}* " +
+ "term(animals)~{tok_id() == 'class:animal'}* " +
+ "term(weathers)~{tok_id() == 'class:weather'}* "
)
def onMatch(
@NCIntentTerm("carBrands") carBrands: List[NCToken],
@@ -97,8 +104,13 @@ class NCContextWordSpec extends NCTestContext {
private[ctxword] def test(): Unit = {
val cli = getClient
-// cli.ask("I want to have a dog and fox")
-// cli.ask("I like to drive my Porsche and Volkswagen")
+ cli.ask("I want to have a dog and fox")
+ cli.ask("I fed your fish")
+
+ cli.ask("I like to drive my Porsche and Volkswagen")
+ cli.ask("Peugeot added motorcycles to its range in 1901")
+
cli.ask("The frost is possible today")
+ cli.ask("There's a very strong wind from the east now")
}
}