[incubator-nlpcraft] branch NLPCRAFT-70_NEW updated: WIP.

sergeykamov Tue, 29 Jun 2021 10:43:05 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
     new 57a6a7f  WIP.
57a6a7f is described below

commit 57a6a7fa97f5e2ac13cf4054ca36f56293759827
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jun 29 20:41:15 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala |  2 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  3 +-
 .../enrichers/ctxword/NCContextWordEnricher.scala  | 67 ++++++++++-----
 .../nlpcraft/model/ctxword/NCContextWordSpec.scala | 97 +++++++++++++---------
 .../model/ctxword/NCContextWordSpec2.scala         | 72 ++++++++++++++++
 5 files changed, 180 insertions(+), 61 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index ed22935..613a7ce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -53,7 +53,7 @@ class NCNlpSentence(
     val text: String,
     val enabledBuiltInToks: Set[String],
     val mlConfig: Option[NCModelMLConfigMdo] = None,
-    var mlData: Map[Int, Map[String, Double]] = Map.empty,
+    var mlData: Map[Int, Map[String, java.util.List[Double]]] = Map.empty,
     override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new 
mutable.ArrayBuffer[NCNlpSentenceToken](32),
     var firstProbePhase: Boolean = true,
     private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, 
Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index c3f5e3d..042e86e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -29,7 +29,6 @@ import 
org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, 
NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
-import java.lang
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -455,7 +454,7 @@ object NCModelEnricher extends NCProbeEnricher {
                         elem = mdl.elements.find(_._1 == 
elemId).getOrElse(throw new NCE(s"Element not found: $elemId"))._2,
                         toks = Seq(ns.tokens(tokIdx)),
                         direct = true,
-                        metaOpt = Some(Map("score" -> 
score.asInstanceOf[AnyRef]))
+                        metaOpt = Some(Map("scores" -> score))
                     )
 
             val req = NCRequestImpl(senMeta, ns.srvReqId)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
index 16609b1..5b1b1c7 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -31,6 +31,7 @@ import org.jibx.schema.codegen.extend.DefaultNameConverter
 import scala.collection.mutable
 import scala.concurrent.Await
 import scala.concurrent.duration.Duration
+import scala.jdk.CollectionConverters.SeqHasAsJava
 
 /**
   * ContextWord enricher.
@@ -38,23 +39,30 @@ import scala.concurrent.duration.Duration
 object NCContextWordEnricher extends NCServerEnricher {
     private final val MAX_CTXWORD_SCORE = 2
     private final val EXCL_MIN_SCORE = -1.0
-    private final val INCL_MIN_SCORE = 0.0
     private final val INCL_MAX_SCORE = 1.0
 
     private final val CONVERTER = new DefaultNameConverter
 
     private case class ModelProbeKey(probeId: String, modelId: String)
-    private case class ElementScore(elementId: String, averageScore: Double, 
senScore: Double, sampleScore: Double)
+    private case class ElementScore(elementId: String, scores: Double*) {
+        lazy val maxScore: Double = scores.max // TODO" logic
+        override def toString: String = s"Element [id=$elementId, 
scores=${scores.sortBy(p => -p).mkString(",", "[", "]")}]"
+    }
     private case class ValuesHolder(
         values: Map[/**  Value as is */ String, /** Element ID */ Set[String]],
         valuesStems: Map[/** Value's stem */ String, /** Element ID */ 
Set[String]]
-    )
+    ) {
+        override def toString: String = s"Values [values=$values, 
stems=$valuesStems]"
+    }
 
     case class ScoreHolder(
         normal: Map[/** Normal value */ String, /** Score */ Double],
         stems: Map[/** Stem */ String, /** Score */ Double],
-        lemma: Map[/** Lemma */ String, /** Score */ Double],
-    )
+        lemma: Map[/** Lemma */ String, /** Score */ Double]
+    ) {
+        private def sort(m: Map[String, Double]): String = 
m.toSeq.sortBy(-_._2).map({ case (k, v) => s"$k=$v" }).mkString(",")
+        override def toString: String = s"Score [normal=${sort(normal)}, 
stems=${sort(stems)}, lemma=${sort(lemma)}]"
+    }
 
     @volatile private var valuesStems: mutable.HashMap[ModelProbeKey, 
ValuesHolder] = _
     @volatile private var samples: mutable.HashMap[ModelProbeKey, Map[/** 
Element ID */String, ScoreHolder]] = _
@@ -75,6 +83,7 @@ object NCContextWordEnricher extends NCServerEnricher {
         startScopedSpan("stop", parent) { _ =>
             ackStopping()
 
+            // TODO: clear model cache
             parser = null
             samples = null
             valuesStems = null
@@ -252,7 +261,7 @@ object NCContextWordEnricher extends NCServerEnricher {
         val recs: Map[/** Element ID */String, Seq[NCSuggestionRequest]] =
             (
                 for (
-                    (elemId, elemValues) <- cfg.values;
+                    (elemId, elemValues) <- cfg.values.toSeq;
                     // Uses single words synonyms only.
                     elemValuesSyns = 
elemValues.flatMap(_._2).toSet.filter(!_.contains(' '));
                     suggReq <- parseSample(
@@ -268,7 +277,7 @@ object NCContextWordEnricher extends NCServerEnricher {
                     yield (elemId, suggReq)
             ).
                 groupBy { case (elemId, _) => elemId }.
-                map { case (elemId, m) => elemId -> m.toSeq.map(_._2) }
+                map { case (elemId, m) => elemId -> m.map(_._2) }
 
         if (recs.nonEmpty) {
             val resps = 
syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq))
@@ -312,15 +321,16 @@ object NCContextWordEnricher extends NCServerEnricher {
                 val detected = mutable.HashMap.empty[NCNlpSentenceToken, 
mutable.HashSet[ElementScore]]
 
                 def add(
-                    nounTok: NCNlpSentenceToken, elemId: String, averageScore: 
Double, senScore: Double, sampleScore: Double
+                    nounTok: NCNlpSentenceToken, elemId: String, senScore: 
Double, sampleScore: Double
                 ): Unit = {
+                    val maxScore = Math.max(senScore, sampleScore)
                     val tokElems = detected.getOrElseUpdate(nounTok, 
mutable.HashSet.empty[ElementScore])
 
-                    def mkNew(): ElementScore = ElementScore(elemId, 
averageScore, senScore, sampleScore)
+                    def mkNew(): ElementScore = ElementScore(elemId, senScore, 
sampleScore)
 
                     tokElems.find(_.elementId == elemId) match {
                         case Some(saved) =>
-                            if (averageScore > saved.averageScore) {
+                            if (maxScore > saved.maxScore) {
                                 tokElems -= saved
                                 tokElems += mkNew()
                             }
@@ -336,6 +346,8 @@ object NCContextWordEnricher extends NCServerEnricher {
                     // 1. Values. Direct.
                     val valuesData = getValuesData(cfg, key)
 
+                    //println("valuesData="+valuesData)
+
                     for (
                         nounTok <- nounToks;
                         elemId <-
@@ -343,11 +355,13 @@ object NCContextWordEnricher extends NCServerEnricher {
                             valuesData.values.getOrElse(nounTok.normText, 
Set.empty) ++
                             valuesData.valuesStems.getOrElse(nounTok.stem, 
Set.empty)
                     )
-                        add(nounTok, elemId, INCL_MAX_SCORE, INCL_MAX_SCORE, 
INCL_MAX_SCORE)
+                        add(nounTok, elemId, INCL_MAX_SCORE, INCL_MAX_SCORE)
 
                     // 2. Via examples.
                     val mdlSamples = getSamplesData(cfg, key)
 
+                    //println("mdlSamples="+mdlSamples.mkString("\n"))
+
                     for (
                         nounTok <- nounToks;
                         (elemId, suggs) <- mdlSamples;
@@ -358,19 +372,31 @@ object NCContextWordEnricher extends NCServerEnricher {
                         ).max
                         if score >= cfg.levels(elemId)
                     )
-                        add(nounTok, elemId, score, score, score)
+                        add(nounTok, elemId, score, score)
 
                     // 3. Ask for sentence.
                     val idxs = ns.tokens.flatMap(p => if 
(p.pos.startsWith("N")) Some(p.index) else None).toSeq
                     val reqs = idxs.map(idx => 
NCSuggestionRequest(ns.tokens.map(_.origText).toSeq, idx))
 
+                    val resps =
+                        syncExec(
+                            
NCSuggestSynonymManager.suggestWords(reqs)).flatMap { case (req, suggs) => 
suggs.map(_ -> req)
+                        }
+
+//                    resps.toSeq.groupBy(_._2.index).foreach { case (_, seq) 
=>
+//                        val sorted = seq.sortBy(-_._1.score)
+//
+//                        println("REQ=" + sorted.head._2)
+//                        println("Resps=" + sorted.map(_._1))
+//                        println()
+//                    }
+
+
+
                     for (
                         // Token index (tokIdx) should be correct because 
request created from original words,
                         // separated by space, and Suggestion Manager uses 
space tokenizer.
-                        (sugg, req) <-
-                            syncExec(
-                                
NCSuggestSynonymManager.suggestWords(reqs)).flatMap { case (req, suggs) => 
suggs.map(_ -> req)
-                            };
+                        (sugg, req) <- resps;
                         senScore = normalizeScore(sugg.score);
                         (elemId, mdlSamplesSuggs) <- mdlSamples;
                         elemScore = cfg.levels(elemId);
@@ -379,15 +405,14 @@ object NCContextWordEnricher extends NCServerEnricher {
                                 
mdlSamplesSuggs.stems.getOrElse(stem(sugg.word), EXCL_MIN_SCORE),
                                 
mdlSamplesSuggs.normal.getOrElse(sugg.word.toLowerCase, EXCL_MIN_SCORE),
                                 
mdlSamplesSuggs.lemma.getOrElse(getSuggestionLemma(req, sugg), EXCL_MIN_SCORE)
-                            ).max;
-                        averageScore = (sampleScore + senScore) / 2
-                        if sampleScore >= INCL_MIN_SCORE && averageScore >= 
elemScore
+                            ).max
+                        if sampleScore >= elemScore && senScore >= elemScore 
// TODO: logic
                     )
-                        add(ns.tokens(req.index), elemId, averageScore, 
senScore, sampleScore)
+                        add(ns.tokens(req.index), elemId, senScore, 
sampleScore)
                 }
 
                 ns.mlData = detected.map {
-                    case (tok, scores) => tok.index -> scores.map(p => 
p.elementId -> p.averageScore).toMap
+                    case (tok, scores) => tok.index -> scores.map(p => 
p.elementId -> p.scores.asJava).toMap
                 }.toMap
 
                 println("detected="+detected.map(p => p._1.lemma -> p._2))
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index 9afcf41..736f5be 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -17,17 +17,34 @@
 
 package org.apache.nlpcraft.model.ctxword
 
+import org.apache.nlpcraft.common.ascii.NCAsciiTable
 import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, 
NCIntentSample, NCIntentTerm, NCModel, NCResult, NCToken, NCValue}
 import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 
+import java.text.DecimalFormat
 import java.util.{Collections, Optional}
 import java.{lang, util}
 import scala.collection.mutable.ArrayBuffer
-import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
+import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava, 
SetHasAsJava}
 
 object NCContextWordSpecModel {
-    private final val LEVEL = 0.4
+    val tables: ArrayBuffer[(String, NCAsciiTable)] = 
ArrayBuffer.empty[(String, NCAsciiTable)]
+}
+
+import org.apache.nlpcraft.model.ctxword.NCContextWordSpecModel._
+
+/**
+  * Test model.
+  */
+class NCContextWordSpecModel extends NCModel {
+    override def getId: String = this.getClass.getSimpleName
+    override def getName: String = this.getClass.getSimpleName
+    override def getVersion: String = "1.0.0"
+
+    private final val FMT = new DecimalFormat("#0.00000")
+
+    val level = 0.4
 
     case class Value(name: String, syns: String*) extends NCValue {
         override def getName: String = name
@@ -42,22 +59,9 @@ object NCContextWordSpecModel {
     }
 
     object Element {
-        def apply(id: String, values: NCValue*): Element = new Element(id, 
LEVEL, values: _*)
+        def apply(id: String, values: NCValue*): Element = new Element(id, 
level, values: _*)
     }
 
-    var testsData: ArrayBuffer[String] = ArrayBuffer.empty[String]
-}
-
-import NCContextWordSpecModel._
-
-/**
-  * Test model.
-  */
-class NCContextWordSpecModel extends NCModel {
-    override def getId: String = this.getClass.getSimpleName
-    override def getName: String = this.getClass.getSimpleName
-    override def getVersion: String = "1.0.0"
-
     override def getElements: util.Set[NCElement] =
         Set(
             Element("class:cars", Value("BMW")),
@@ -87,12 +91,24 @@ class NCContextWordSpecModel extends NCModel {
     )
     @NCIntent("intent=classification term(toks)~{has(tok_groups(), 
'testGroup')}*")
     def onMatch(ctx: NCIntentMatch, @NCIntentTerm("toks") toks: 
List[NCToken]): NCResult = {
-        val txt = ctx.getContext.getRequest.getNormalizedText
-        val toksStr = toks.map(t =>
-            s"[text=${t.getOriginalText}, elementId=${t.getId}, 
score=${t.getMetadata.get(s"${t.getId}:score")}]"
-        ).mkString(", ")
+        val table = NCAsciiTable()
+
+        table #= ("Token text", "Element ID", "Scores")
 
-        testsData += s"Matched [text=$txt, tokens=$toksStr"
+        for (t <- toks)
+            table += (
+                t.getOriginalText,
+                t.getId,
+                t.getMetadata.
+                    get(s"${t.getId}:scores").
+                    asInstanceOf[java.util.List[Double]].
+                    asScala.
+                    sortBy(-_).
+                    map(FMT.format).
+                    mkString(", ")
+            )
+
+        tables += ctx.getContext.getRequest.getNormalizedText -> table
 
         val elemIds = toks.map(_.getId).distinct.mkString(" ")
         val words = toks.map(_.getOriginalText).mkString(" ")
@@ -114,26 +130,33 @@ class NCContextWordSpec extends NCTestContext {
     }
 
     @BeforeEach
-    private[ctxword] def before(): Unit = testsData.clear()
+    private[ctxword] def before(): Unit = tables.clear()
 
     @AfterEach
-    private[ctxword] def after(): Unit = testsData.foreach(println)
+    private[ctxword] def after(): Unit = {
+        println("MATCHED:")
+
+        for ((txt, table) <- tables) {
+            println(s"Text: $txt")
+            table.render()
+        }
+
+        tables.clear()
+    }
 
     @Test
     private[ctxword] def test(): Unit = {
-        //check("I want to have a dogs and foxes", "class:animal", "dogs", 
"foxes")
-        //check("I bought dog's meat", "class:animal", "dog meat")
-        check("I bought the meat", "class:animal", "dog meat")
-
-
-        //check("I bought xxx dog's", "class:animal", "dog")
-//        check("I want to have a dog and fox", "class:animal", "dog", "fox")
-//        check("I fed your fish", "class:animal", "fish")
-//
-//        check("I like to drive my Porsche and Volkswagen", "class:cars", 
"Porsche", "Volkswagen")
-//        check("Peugeot added motorcycles to its range in 1901", 
"class:cars", "Peugeot", "motorcycles")
-//
-//        check("The frost is possible today", "class:weather", "frost")
-//        check("There's a very strong wind from the east now", 
"class:weather", "wind")
+        check("I want to have dogs and foxes", "class:animal", "dogs", "foxes")
+        check("I bought dog's meat", "class:animal", "dog")
+        check("I bought meat dog's", "class:animal", "dog")
+
+        check("I want to have a dog and fox", "class:animal", "dog", "fox")
+        check("I fed your fish", "class:animal", "fish")
+
+        check("I like to drive my Porsche and Volkswagen", "class:cars", 
"Porsche", "Volkswagen")
+        check("Peugeot added motorcycles to its range in 1901", "class:cars", 
"Peugeot", "motorcycles")
+
+        check("The frost is possible today", "class:weather", "frost")
+        check("There's a very strong wind from the east now", "class:weather", 
"wind")
     }
 }
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
new file mode 100644
index 0000000..2a0f407
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.ctxword
+
+import org.apache.nlpcraft.model.ctxword.NCContextWordSpecModel._
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
+
+/**
+  * Test model.
+  */
+class NCContextWordSpecModel2 extends NCContextWordSpecModel {
+    override val level: Double = 0
+}
+
+/**
+  * @see NCConversationSpecModel
+  */
+@NCTestEnvironment(model = classOf[NCContextWordSpecModel2], startClient = 
true)
+class NCContextWordSpec2 extends NCTestContext {
+    private def check(txts: String*): Unit =
+        for (txt <- txts)
+            checkIntent(txt, "classification")
+
+    @BeforeEach
+    private[ctxword] def before(): Unit = tables.clear()
+
+    @AfterEach
+    private[ctxword] def after(): Unit = {
+        println("MATCHED:")
+
+        for ((txt, table) <- tables) {
+            println(s"Text: $txt")
+            table.render()
+        }
+
+        tables.clear()
+    }
+
+    @Test
+    private[ctxword] def test(): Unit = {
+        check(
+            "I want to have dogs and foxes",
+//            "I bought dog's meat",
+//            "I bought meat dog's",
+//
+//            "I want to have a dog and fox",
+//            "I fed your fish",
+//
+//            "I like to drive my Porsche and Volkswagen",
+//            "Peugeot added motorcycles to its range in 1901",
+//
+//            "The frost is possible today",
+//            "There's a very strong wind from the east now"
+        )
+    }
+}

[incubator-nlpcraft] branch NLPCRAFT-70_NEW updated: WIP.

Reply via email to