This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-460 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 7e2854be3fde3b24c81945a7dc751d8b7b02f098 Author: Sergey Kamov <[email protected]> AuthorDate: Wed Sep 29 14:51:50 2021 +0300 WIP. --- .../server/sugsyn/NCSuggestSynonymManager.scala | 92 ++++++++++++---------- .../nlpcraft/server/rest/NCRestModelSpec.scala | 31 +++++++- .../nlpcraft/server/rest/RestTestModel.scala | 6 +- 3 files changed, 81 insertions(+), 48 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala index d89ba98..91b195d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala @@ -42,8 +42,8 @@ import scala.jdk.CollectionConverters._ import scala.util.{Failure, Success} /** - * Synonym suggestion manager. - */ + * Synonym suggestion manager. + */ object NCSuggestSynonymManager extends NCService { // For context word server requests. private final val MAX_LIMIT: Int = 10000 @@ -82,40 +82,47 @@ object NCSuggestSynonymManager extends NCService { case _ => throw new NCE( - s"Unexpected HTTP response from `ctxword` server [" + - s"code=$code, " + - s"response=$js" + - s"]" - ) + s"Unexpected HTTP response from `ctxword` server [" + + s"code=$code, " + + s"response=$js" + + s"]" + ) } } case class Suggestion(word: String, score: Double) case class RequestData(sentence: String, ex: String, elmId: String, index: Int) - case class RestRequestSentence(text: String, indexes: util.List[Int]) + case class RestRequestSentence(text: String, indexes: util.List[Int]) { + validate(text, indexes.asScala) + + private def validate(text: String, indexes: Seq[Int]): Unit = { + val arr = splitAndNormalize(text) + + require( + indexes.forall(i => i >= 0 && i < arr.length), + s"Invalid request [text=$text, indexes=${indexes.mkString(",")}" + ) + } + } case class RestRequest(sentences: util.List[RestRequestSentence], limit: Int, minScore: Double) case class Word(word: String, stem: String) { require(!word.contains(" "), s"Word cannot contains spaces: $word") - require( - word.forall(ch => - ch.isLetterOrDigit || - ch == '\'' || - SEPARATORS.contains(ch) - ), - s"Unsupported symbols: $word" - ) + require(isSuitable4Suggestion(word), s"Unsupported symbols: $word") } case class SuggestionResult(synonym: String, score: Double) private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ") private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ") private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s) + private def splitAndNormalize(s: String) = s.split(" ").map(_.strip).filter(_.nonEmpty) + private def isSuitable4Suggestion(word: String): Boolean = + word.forall(ch => ch.isLetterOrDigit || ch == '\'' || SEPARATORS.contains(ch)) /** - * - * @param seq1 - * @param seq2 - */ + * + * @param seq1 + * @param seq2 + */ private def getAllSlices(seq1: Seq[String], seq2: Seq[String]): Seq[Int] = { val seq = mutable.Buffer.empty[Int] @@ -131,12 +138,12 @@ object NCSuggestSynonymManager extends NCService { } /** - * - * @param mdlId - * @param minScoreOpt - * @param parent - * @return - */ + * + * @param mdlId + * @param minScoreOpt + * @param parent + * @return + */ def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] = startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ => val now = U.now() @@ -148,8 +155,8 @@ object NCSuggestSynonymManager extends NCService { try { require( m.containsKey("macros") && - m.containsKey("synonyms") && - m.containsKey("samples") + m.containsKey("synonyms") && + m.containsKey("samples") ) val mdlMacros = m.get("macros"). @@ -187,7 +194,7 @@ object NCSuggestSynonymManager extends NCService { if (allSamplesCnt < MIN_CNT_MODEL) warns += s"Model has too few ($allSamplesCnt) intents samples. " + - s"Try to increase overall sample count to at least $MIN_CNT_MODEL." + s"Try to increase overall sample count to at least $MIN_CNT_MODEL." else { val ids = @@ -198,7 +205,7 @@ object NCSuggestSynonymManager extends NCService { if (ids.nonEmpty) warns += s"Following model intent have too few samples (${ids.mkString(", ")}). " + - s"Try to increase overall sample count to at least $MIN_CNT_INTENT." + s"Try to increase overall sample count to at least $MIN_CNT_INTENT." } val parser = new NCMacroParser() @@ -212,15 +219,18 @@ object NCSuggestSynonymManager extends NCService { flatMap { case (_, samples) => samples }. map(ex => SEPARATORS.foldLeft(ex)((s, ch) => s.replaceAll(s"\\$ch", s" $ch "))). map(ex => { - val seq = ex.split(" ") + val seq = splitAndNormalize(ex) seq -> seq.map(toStemWord) }). toMap val elmSyns = - mdlSyns.map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }. - map { case (id, seq) => id -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) } + mdlSyns. + map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }. + map { case (elmId, syns) => elmId -> syns.filter(isSuitable4Suggestion) }. + filter { case (_, syns) => syns.nonEmpty }. + map { case (elmId, seq) => elmId -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) } val allReqs = elmSyns.map { @@ -276,7 +286,7 @@ object NCSuggestSynonymManager extends NCService { s"exs=${exs.size}, " + s"syns=$allSynsCnt, " + s"reqs=$allReqsCnt" + - s"]") + s"]") if (allReqsCnt == 0) onError(s"Suggestions cannot be generated for model: '$mdlId'") @@ -441,19 +451,19 @@ object NCSuggestSynonymManager extends NCService { } /** - * - * @param parent Optional parent span. - * @return - */ + * + * @param parent Optional parent span. + * @return + */ override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ => ackStarting() ackStarted() } /** - * - * @param parent Optional parent span. - */ + * + * @param parent Optional parent span. + */ override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ => ackStopping() ackStopped() diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala index 2bbc9cb..387e8b0 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala @@ -17,7 +17,7 @@ package org.apache.nlpcraft.server.rest -import org.apache.nlpcraft.model.NCElement +import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, NCResult} import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment} import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test @@ -25,10 +25,22 @@ import org.junit.jupiter.api.Test import java.util import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SetHasAsJava, SetHasAsScala} +class RestTestModelExt1 extends RestTestModel { + @NCIntent("intent=onX term(t)={# == 'a'}") + @NCIntentSample(Array( + "oh, cat will feel happy", + "oh , cat will feel happy", + "oh cat will feel happy" + )) + private def x(): NCResult = NCResult.text("OK") + + override def getElements: util.Set[NCElement] = + (super.getElements.asScala ++ Set(NCTestElement("cat", "cat", "{^^{is_alphanum(tok_txt)}^^}[1,3]"))).asJava +} /** * Note that context word server should be started. */ -@NCTestEnvironment(model = classOf[RestTestModel], startClient = false) +@NCTestEnvironment(model = classOf[RestTestModelExt1], startClient = false) class NCRestModelSpec1 extends NCRestSpec { @Test def testSugsyn(): Unit = { @@ -57,13 +69,24 @@ class NCRestModelSpec1 extends NCRestSpec { }) ) + post("model/sugsyn", "mdlId" -> "rest.test.model", "minScore" -> 0.5)( + ("$.status", (status: String) => assertEquals("API_OK", status)), + ("$.result.suggestions[:1].cat.*", (data: JList[java.util.Map[String, Object]]) => { + val scores = extract(data) + + assertTrue(scores.nonEmpty) + assertTrue(scores.forall(s => s >= 0.5 && s <= 1)) + }) + ) + + postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "UNKNOWN") postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "rest.test.model", "minScore" -> 2) postError("model/sugsyn", 400, "NC_ERROR") } } -class RestTestModelExt extends RestTestModel { +class RestTestModelExt2 extends RestTestModel { override def getMacros: util.Map[String, String] = { Map( "<M1>" -> "mtest1 {x|_}", @@ -90,7 +113,7 @@ class RestTestModelExt extends RestTestModel { /** * */ -@NCTestEnvironment(model = classOf[RestTestModelExt], startClient = false) +@NCTestEnvironment(model = classOf[RestTestModelExt2], startClient = false) class NCRestModelSpec2 extends NCRestSpec { @Test def testSyns(): Unit = { diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala index 0cb519e..8fa5b15 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala @@ -55,15 +55,15 @@ class RestTestModel extends NCModelAdapter("rest.test.model", "REST test model", @NCIntent("intent=onA term(t)={# == 'a'}") @NCIntentSample(Array("My A")) - private def a(): NCResult = NCResult.text("OK") + def a(): NCResult = NCResult.text("OK") @NCIntent("intent=onB term(t)={# == 'b'}") @NCIntentSample(Array("My B")) - private def b(): NCResult = NCResult.text("OK") + def b(): NCResult = NCResult.text("OK") @NCIntent("intent=onMeta term(t)={# == 'meta'}") @NCIntentSample(Array("meta")) - private def meta(): NCResult = { + def meta(): NCResult = { val res = NCResult.text("OK") res.getMetadata.put(K1, V1)
