This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-460
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 7e2854be3fde3b24c81945a7dc751d8b7b02f098
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Sep 29 14:51:50 2021 +0300

    WIP.
---
 .../server/sugsyn/NCSuggestSynonymManager.scala    | 92 ++++++++++++----------
 .../nlpcraft/server/rest/NCRestModelSpec.scala     | 31 +++++++-
 .../nlpcraft/server/rest/RestTestModel.scala       |  6 +-
 3 files changed, 81 insertions(+), 48 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index d89ba98..91b195d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -42,8 +42,8 @@ import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success}
 
 /**
- * Synonym suggestion manager.
- */
+  * Synonym suggestion manager.
+  */
 object NCSuggestSynonymManager extends NCService {
     // For context word server requests.
     private final val MAX_LIMIT: Int = 10000
@@ -82,40 +82,47 @@ object NCSuggestSynonymManager extends NCService {
 
                 case _ =>
                     throw new NCE(
-                    s"Unexpected HTTP response from `ctxword` server [" +
-                    s"code=$code, " +
-                    s"response=$js" +
-                    s"]"
-                )
+                        s"Unexpected HTTP response from `ctxword` server [" +
+                            s"code=$code, " +
+                            s"response=$js" +
+                            s"]"
+                    )
             }
         }
 
     case class Suggestion(word: String, score: Double)
     case class RequestData(sentence: String, ex: String, elmId: String, index: 
Int)
-    case class RestRequestSentence(text: String, indexes: util.List[Int])
+    case class RestRequestSentence(text: String, indexes: util.List[Int]) {
+        validate(text, indexes.asScala)
+
+        private def validate(text: String, indexes: Seq[Int]): Unit = {
+            val arr = splitAndNormalize(text)
+
+            require(
+                indexes.forall(i => i >= 0 && i < arr.length),
+                s"Invalid request [text=$text, 
indexes=${indexes.mkString(",")}"
+            )
+        }
+    }
     case class RestRequest(sentences: util.List[RestRequestSentence], limit: 
Int, minScore: Double)
     case class Word(word: String, stem: String) {
         require(!word.contains(" "), s"Word cannot contains spaces: $word")
-        require(
-            word.forall(ch =>
-                ch.isLetterOrDigit ||
-                ch == '\'' ||
-                SEPARATORS.contains(ch)
-            ),
-            s"Unsupported symbols: $word"
-        )
+        require(isSuitable4Suggestion(word), s"Unsupported symbols: $word")
     }
     case class SuggestionResult(synonym: String, score: Double)
 
     private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ")
     private def toStem(s: String): String = 
split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
     private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
+    private def splitAndNormalize(s: String) = s.split(" 
").map(_.strip).filter(_.nonEmpty)
+    private def isSuitable4Suggestion(word: String): Boolean =
+        word.forall(ch => ch.isLetterOrDigit || ch == '\'' || 
SEPARATORS.contains(ch))
 
     /**
-     *
-     * @param seq1
-     * @param seq2
-     */
+      *
+      * @param seq1
+      * @param seq2
+      */
     private def getAllSlices(seq1: Seq[String], seq2: Seq[String]): Seq[Int] = 
{
         val seq = mutable.Buffer.empty[Int]
 
@@ -131,12 +138,12 @@ object NCSuggestSynonymManager extends NCService {
     }
 
     /**
-     *
-     * @param mdlId
-     * @param minScoreOpt
-     * @param parent
-     * @return
-     */
+      *
+      * @param mdlId
+      * @param minScoreOpt
+      * @param parent
+      * @return
+      */
     def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = 
null): Future[NCSuggestSynonymResult] =
         startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ =>
             val now = U.now()
@@ -148,8 +155,8 @@ object NCSuggestSynonymManager extends NCService {
                     try {
                         require(
                             m.containsKey("macros") &&
-                            m.containsKey("synonyms") &&
-                            m.containsKey("samples")
+                                m.containsKey("synonyms") &&
+                                m.containsKey("samples")
                         )
 
                         val mdlMacros = m.get("macros").
@@ -187,7 +194,7 @@ object NCSuggestSynonymManager extends NCService {
                             if (allSamplesCnt < MIN_CNT_MODEL)
                                 warns +=
                                     s"Model has too few ($allSamplesCnt) 
intents samples. " +
-                                    s"Try to increase overall sample count to 
at least $MIN_CNT_MODEL."
+                                        s"Try to increase overall sample count 
to at least $MIN_CNT_MODEL."
 
                             else {
                                 val ids =
@@ -198,7 +205,7 @@ object NCSuggestSynonymManager extends NCService {
                                 if (ids.nonEmpty)
                                     warns +=
                                         s"Following model intent have too few 
samples (${ids.mkString(", ")}). " +
-                                        s"Try to increase overall sample count 
to at least $MIN_CNT_INTENT."
+                                            s"Try to increase overall sample 
count to at least $MIN_CNT_INTENT."
                             }
 
                             val parser = new NCMacroParser()
@@ -212,15 +219,18 @@ object NCSuggestSynonymManager extends NCService {
                                 flatMap { case (_, samples) => samples }.
                                 map(ex => SEPARATORS.foldLeft(ex)((s, ch) => 
s.replaceAll(s"\\$ch", s" $ch "))).
                                 map(ex => {
-                                    val seq = ex.split(" ")
+                                    val seq = splitAndNormalize(ex)
 
                                     seq -> seq.map(toStemWord)
                                 }).
                                 toMap
 
                             val elmSyns =
-                                mdlSyns.map { case (elmId, syns) => elmId -> 
syns.flatMap(parser.expand) }.
-                                    map { case (id, seq) => id -> seq.map(txt 
=> split(txt).map(p => Word(p, toStemWord(p)))) }
+                                mdlSyns.
+                                    map { case (elmId, syns) => elmId -> 
syns.flatMap(parser.expand) }.
+                                    map { case (elmId, syns) => elmId -> 
syns.filter(isSuitable4Suggestion) }.
+                                    filter { case (_, syns) => syns.nonEmpty }.
+                                    map { case (elmId, seq) => elmId -> 
seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) }
 
                             val allReqs =
                                 elmSyns.map {
@@ -276,7 +286,7 @@ object NCSuggestSynonymManager extends NCService {
                                 s"exs=${exs.size}, " +
                                 s"syns=$allSynsCnt, " +
                                 s"reqs=$allReqsCnt" +
-                            s"]")
+                                s"]")
 
                             if (allReqsCnt == 0)
                                 onError(s"Suggestions cannot be generated for 
model: '$mdlId'")
@@ -441,19 +451,19 @@ object NCSuggestSynonymManager extends NCService {
         }
 
     /**
-     *
-     * @param parent Optional parent span.
-     * @return
-     */
+      *
+      * @param parent Optional parent span.
+      * @return
+      */
     override def start(parent: Span): NCService = startScopedSpan("start", 
parent) { _ =>
         ackStarting()
         ackStarted()
     }
 
     /**
-     *
-     * @param parent Optional parent span.
-     */
+      *
+      * @param parent Optional parent span.
+      */
     override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { 
_ =>
         ackStopping()
         ackStopped()
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 2bbc9cb..387e8b0 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.nlpcraft.server.rest
 
-import org.apache.nlpcraft.model.NCElement
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, 
NCResult}
 import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@@ -25,10 +25,22 @@ import org.junit.jupiter.api.Test
 import java.util
 import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, 
SetHasAsJava, SetHasAsScala}
 
+class RestTestModelExt1 extends RestTestModel {
+    @NCIntent("intent=onX term(t)={# == 'a'}")
+    @NCIntentSample(Array(
+        "oh, cat will feel happy",
+        "oh , cat will feel happy",
+        "oh  cat will feel happy"
+    ))
+    private def x(): NCResult = NCResult.text("OK")
+
+    override def getElements: util.Set[NCElement] =
+        (super.getElements.asScala ++ Set(NCTestElement("cat", "cat", 
"{^^{is_alphanum(tok_txt)}^^}[1,3]"))).asJava
+}
 /**
   * Note that context word server should be started.
   */
-@NCTestEnvironment(model = classOf[RestTestModel], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt1], startClient = false)
 class NCRestModelSpec1 extends NCRestSpec {
     @Test
     def testSugsyn(): Unit = {
@@ -57,13 +69,24 @@ class NCRestModelSpec1 extends NCRestSpec {
             })
         )
 
+        post("model/sugsyn", "mdlId" -> "rest.test.model", "minScore" -> 0.5)(
+            ("$.status", (status: String) => assertEquals("API_OK", status)),
+            ("$.result.suggestions[:1].cat.*", (data: 
JList[java.util.Map[String, Object]]) => {
+                val scores = extract(data)
+
+                assertTrue(scores.nonEmpty)
+                assertTrue(scores.forall(s => s >= 0.5 && s <= 1))
+            })
+        )
+
+
         postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> 
"UNKNOWN")
         postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> 
"rest.test.model", "minScore" -> 2)
         postError("model/sugsyn", 400, "NC_ERROR")
     }
 }
 
-class RestTestModelExt extends RestTestModel {
+class RestTestModelExt2 extends RestTestModel {
     override def getMacros: util.Map[String, String] = {
         Map(
             "<M1>" -> "mtest1 {x|_}",
@@ -90,7 +113,7 @@ class RestTestModelExt extends RestTestModel {
 /**
   *
   */
-@NCTestEnvironment(model = classOf[RestTestModelExt], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt2], startClient = false)
 class NCRestModelSpec2 extends NCRestSpec {
     @Test
     def testSyns(): Unit = {
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
index 0cb519e..8fa5b15 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
@@ -55,15 +55,15 @@ class RestTestModel extends 
NCModelAdapter("rest.test.model", "REST test model",
 
     @NCIntent("intent=onA term(t)={# == 'a'}")
     @NCIntentSample(Array("My A"))
-    private def a(): NCResult = NCResult.text("OK")
+    def a(): NCResult = NCResult.text("OK")
 
     @NCIntent("intent=onB term(t)={# == 'b'}")
     @NCIntentSample(Array("My B"))
-    private def b(): NCResult = NCResult.text("OK")
+    def b(): NCResult = NCResult.text("OK")
 
     @NCIntent("intent=onMeta term(t)={# == 'meta'}")
     @NCIntentSample(Array("meta"))
-    private def meta(): NCResult = {
+    def meta(): NCResult = {
         val res = NCResult.text("OK")
 
         res.getMetadata.put(K1, V1)

Reply via email to