This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new 74dd7d5 WIP.
74dd7d5 is described below
commit 74dd7d53b11ee28311168c8e698214a10dad2a2a
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Jun 18 17:48:24 2021 +0300
WIP.
---
.../enrichers/ctxword/NCContextWordEnricher.scala | 126 ++++++++++-----------
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 9 +-
2 files changed, 66 insertions(+), 69 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
index daf990b..79cd9a2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -33,12 +33,12 @@ import scala.concurrent.duration.Duration
* ContextWord enricher.
*/
object NCContextWordEnricher extends NCServerEnricher {
- case class ModelProbeKey(probeId: String, modelId: String)
- case class WordIndex(word: String, index: Int)
- case class ElementValue(elementId: String, value: String)
- case class ElementScore(elementId: String, score: Double)
+ private case class ModelProbeKey(probeId: String, modelId: String)
+ private case class WordIndex(word: String, index: Int)
+ private case class ElementValue(elementId: String, value: String)
+ private case class ElementScore(elementId: String, score: Double)
- type ElementStemScore = Map[/** Element ID */ String, Map[/** Stem */
String, /** Score */ Double]]
+ private type ElementStemScore = Map[/** Element ID */ String, Map[/** Stem
*/ String, /** Score */ Double]]
@volatile private var samples: mutable.HashMap[ModelProbeKey,
ElementStemScore] = _
@@ -58,7 +58,7 @@ object NCContextWordEnricher extends NCServerEnricher {
ackStopped()
}
- private def toWords(s: String): Seq[String] = s.split("
").map(_.strip()).filter(_.nonEmpty)
+ private def spaceTokenize(s: String): Seq[String] = s.split("
").map(_.strip()).filter(_.nonEmpty)
private def parseSample(
sampleWords: Seq[String],
@@ -103,79 +103,84 @@ object NCContextWordEnricher extends NCServerEnricher {
(value, syns) <- values;
synsStem = syns.map(stem);
sample <- cfg.samples;
- sampleWords = toWords(sample);
+ sampleWords = spaceTokenize(sample);
samplesMap = sampleWords.zipWithIndex.map { case (w, idx) =>
stem(w) -> WordIndex(w, idx)}.toMap;
sugg <- parseSample(sampleWords, samplesMap, synsStem)
)
yield (elemId, Record(sugg, value))).groupBy { case (elemId,
_) => elemId }.
map { case (elemId, map) => elemId -> map.values.toSeq }
- val data = recs.flatMap { case (elemId, recs) => recs.map(p =>
p.request -> ElementValue(elemId, p.value)) }
+ val map: Map[NCSuggestionRequest, ElementValue] =
+ recs.flatMap { case (elemId, recs) => recs.map(p => p.request ->
ElementValue(elemId, p.value)) }
+
+ // TODO: sync
+ val res =
+ (
+ if (recs.nonEmpty)
+
Await.result(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.request)).toSeq),
Duration.Inf)
+ else
+ Map.empty
+ ).map {
+ case (req, suggs) =>
+ val d = map(req)
+
+ d.elementId -> suggs.groupBy(p =>stem(p.word)).map { case
(stem, map) => stem -> map.map(_.score).max }
+ }
// TODO:
- val res: Map[NCSuggestionRequest, Seq[NCWordSuggestion]] =
- if (recs.nonEmpty)
-
Await.result(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.request)).toSeq),
Duration.Inf)
- else
- Map.empty
+ println("!!!samples")
+ res.foreach(s => {
+ println(s"elemID=${s._1}")
- res.map {
- case (req, suggs) =>
- val d = data(req)
+ println(s._2.mkString("\n") + "\n")
- d.elementId -> suggs.groupBy(p =>stem(p.word)).map { case
(stem, map) => stem -> map.map(_.score).max }
- }
+ })
+
+ res
}
@throws[NCE]
- private def askSentence(
- ns: NCNlpSentence,
- samples: ElementStemScore
- ): Map[Int, Set[ElementScore]] = {
+ private def askSentence(ns: NCNlpSentence, samples: ElementStemScore):
Map[Int, Set[ElementScore]] = {
val idxs = ns.tokens.flatMap(p => if (p.pos.startsWith("N"))
Some(p.index) else None).toSeq
+ val reqs = idxs.map(idx => NCSuggestionRequest(ns.text, idx))
- if (idxs.nonEmpty) {
- val reqs = idxs.map(idx => NCSuggestionRequest(ns.text, idx))
-
- //println("suggsReq=" + reqs.mkString("|"))
+ //
- // TODO: tokenization.
- // TODO: sync.
- val suggs: Map[NCWordSuggestion, NCSuggestionRequest] =
- Await.result(
- NCSuggestSynonymManager.suggestWords(reqs),
- Duration.Inf
- ).flatMap { case (req, suggs) => suggs.map(_ -> req) }
+ // TODO: tokenization.
+ // TODO: sync.
+ val suggs: Map[NCWordSuggestion, NCSuggestionRequest] =
+ Await.
+ result(NCSuggestSynonymManager.suggestWords(reqs),
Duration.Inf).
+ flatMap { case (req, suggs) => suggs.map(_ -> req) }
+ // TODO:
+ println("suggsReq=" + reqs.mkString("|"))
+ println("suggs="+suggs.keys.mkString("\n"))
-// println("suggs="+suggs.keys.mkString("\n"))
-// println("suggs")
- suggs.map { case(sugg, req) => (stem(sugg.word), sugg.score, req)
}.
- flatMap { case (stem, suggScore, req) =>
- samples.map { case (elemId, map) =>
- // TODO: contains ? check key (and use score)
+ suggs.map { case(sugg, req) => (stem(sugg.word), sugg.score, req) }.
+ flatMap { case (stem, suggScore, req) =>
+ samples.map { case (elemId, map) =>
+ // TODO: contains ? check key (and use score)
- if (map.contains(stem)) {
- //println(s"!!!FOUND BY stem=$stem, elem=$elemId,
map=$map")
+ if (map.contains(stem)) {
+ // TODO:
+ println(s"!!!FOUND BY stem=$stem, elem=$elemId,
map=$map")
- map.map { case (_, score) => (ElementScore(elemId,
score), req.index) }
- }
- else
- Seq.empty
+ map.map { case (_, score) => (ElementScore(elemId,
score), req.index) }
}
- }.
- flatten.
- groupBy { case (_, idx) => idx }.
- map { case (idx, map) =>
- idx -> map.
- map { case (score, _) => score }.
- groupBy(_.elementId).
- map { case (_, scores) => scores.toSeq.minBy(-_.score)
}.toSet
+ else
+ Seq.empty
}
- }
- else
- Map.empty
+ }.
+ flatten.
+ groupBy { case (_, idx) => idx }.
+ map { case (idx, map) =>
+ idx -> map.
+ map { case (score, _) => score }.
+ groupBy(_.elementId).
+ map { case (_, scores) => scores.toSeq.minBy(-_.score)
}.toSet
+ }
}
override def enrich(ns: NCNlpSentence, parent: Span): Unit = {
@@ -184,18 +189,9 @@ object NCContextWordEnricher extends NCServerEnricher {
val nouns = ns.tokens.filter(_.pos.startsWith("N"))
if (nouns.nonEmpty) {
- //println("nouns=" + nouns.map(_.stem).mkString("|"))
-
val key = ModelProbeKey(cfg.probeId, cfg.modelId)
val samples = getSamples(cfg, key)
-// println("!!!samples")
-// samples.foreach(s => {
-// println(s"elemID=${s._1}")
-//
-// println(s._2.mkString("\n") + "\n")
-//
-// })
for (n <- nouns; (elemId, stems) <- getSamples(cfg, key)
if stems.contains(n.stem))
println("EX FOUND elemId=" + elemId + ", n=" + n.stem
+ ", stem=" + stems.toSeq.sortBy(-_._2))
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index f452f0a..8193464 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -63,8 +63,9 @@ class NCContextWordSpecModel extends NCModel {
"A wild cat is very dangerous",
"A fox eats hens",
"The fox was already in your chicken house",
- "What is the local temperature",
- "This is the first day of heavy rain"
+ "What is the local temperature ?",
+ "This is the first day of heavy rain",
+ "The beautiful day, the sun is shining ",
)
)
@NCIntent(
@@ -96,8 +97,8 @@ class NCContextWordSpec extends NCTestContext {
private[ctxword] def test(): Unit = {
val cli = getClient
- cli.ask("I want to have a dog and fox")
- cli.ask("I like to drive my Porsche and Volkswagen")
+// cli.ask("I want to have a dog and fox")
+// cli.ask("I like to drive my Porsche and Volkswagen")
cli.ask("The frost is possible today")
}
}