[incubator-nlpcraft] branch master updated: Tests extended.

sergeykamov Mon, 14 Mar 2022 06:31:55 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/master by this push:
     new debebc3  Tests extended.
debebc3 is described below

commit debebc30cc221e7e53d7b52546d0bd3a53e0509f
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Mar 14 16:31:18 2022 +0300

    Tests extended.
---
 .../internal/impl/NCModelPipelineManager.scala     |   3 +-
 .../intent/matcher/NCIntentSolverManager.scala     |   3 +
 .../parser/impl/NCSemanticEntityParserImpl.scala   |  20 +++-
 .../nlpcraft/nlp/NCENDefaultPipelineSpec.scala     |  60 ----------
 .../semantic/NCSemanticEntityParserLemmaSpec.scala | 124 +++++++++++++++++++++
 .../semantic/NCSemanticEntityParserSpec.scala      |  45 ++------
 .../parser/semantic/NCSemanticTestElement.scala    |  51 +++++++++
 .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala |  22 ++++
 8 files changed, 229 insertions(+), 99 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
index 9652d2c..907bc7e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
@@ -184,7 +184,8 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: 
NCModelPipeline) exte
             check()
             variants = varFilterOpt.get.filter(req, cfg, variants)
 
-        val vrnts = variants.asScala.toSeq
+        // Skips empty variants.
+        val vrnts = variants.asScala.toSeq.filter(!_.getEntities.isEmpty)
 
         for ((v, i) <- vrnts.zipWithIndex)
             val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position", 
"Properties")
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
index afd1bb1..74cacda 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
@@ -691,6 +691,9 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, 
intents: Map[NCIDLInten
         var res: NCResult = mdl.onContext(ctx)
 
         if res != null then
+            // TODO: text.
+            if intents.nonEmpty then logger.warn("`onContext` method overrides 
existed intents. They are ignored.")
+
             res
         else
             if intents.isEmpty then
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
index 2b4e35b..5a28675 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
@@ -230,10 +230,12 @@ class NCSemanticEntityParserImpl(
         val cache = mutable.HashSet.empty[Seq[Int]] // Variants (tokens 
without stopwords) can be repeated.
 
         case class Holder(elemId: String, tokens: Seq[NCToken], value: 
Option[String]):
-            private val idxs = tokens.map(_.getIndex).toSet
+            val tokensSet = tokens.toSet
+            val idxs = tokensSet.map(_.getIndex)
+
             def isSuperSet(toks: Seq[NCToken]): Boolean = idxs.size > 
toks.size && toks.map(_.getIndex).toSet.subsetOf(idxs)
 
-        val hs = mutable.ArrayBuffer.empty[Holder]
+        var hs = mutable.ArrayBuffer.empty[Holder]
 
         for (piece <- getPieces(toks) if 
!hs.exists(_.isSuperSet(piece.baseTokens));
             variant <- Seq(piece.baseTokens) ++ piece.variants)
@@ -271,6 +273,20 @@ class NCSemanticEntityParserImpl(
 
                                 if found then add(elemId, Option.when(s.value 
!= null)(s.value))
 
+        // Deletes redundant.
+        hs = hs.distinct
+        
+        val del = mutable.ArrayBuffer.empty[Holder]
+        // 1. Look at each element with its value.
+        for (((_, _), seq) <- hs.groupBy(h => (h.elemId, h.value)) if seq.size 
> 1)
+            // 2. If some variants are duplicated - keep only one, with most 
tokens counts.
+            val seqIdxs = seq.zipWithIndex
+
+            for ((h, idx) <- seqIdxs if !del.contains(h))
+                del ++= seqIdxs.filter { (_, oIdx) => oIdx != idx }.map { (h, 
_) => h }.filter(_.tokensSet.subsetOf(h.tokensSet))
+
+        hs --= del
+
         hs.toSeq.map(h => {
             val e = elemsMap(h.elemId)
             new NCPropertyMapAdapter with NCEntity:
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
deleted file mode 100644
index e8b7931..0000000
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp
-
-import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, 
NCSemanticEntityParser}
-import org.apache.nlpcraft.nlp.util.NCTestModelAdapter
-import org.junit.jupiter.api.Test
-
-import scala.util.Using
-
-class NCENDefaultPipelineSpec:
-    /**
-      *
-      * @param cfg
-      * @param pipeline
-      * @return
-      */
-    private def mkModel(cfg: NCModelConfig, pipeline: NCModelPipeline): 
NCModel =
-        new NCModelAdapter(cfg, pipeline):
-            @NCIntent("intent=ls term(act)={has(ent_groups, 'act')} 
term(loc)={# == 'ls:loc'}*")
-            @NCIntentSample(Array(
-                "Please, put the light out in the upstairs bedroom.",
-            ))
-            def onMatch(
-                @NCIntentTerm("act") actEnt: NCEntity,
-                @NCIntentTerm("loc") locEnts: List[NCEntity]
-            ): NCResult =
-                val status = if actEnt.getId == "ls:on" then "on" else "off"
-                val locations = if locEnts.isEmpty then "entire house" else 
locEnts.map(_.mkText()).mkString(", ")
-                new NCResult(
-                    s"Lights are [$status] in [${locations.toLowerCase}].",
-                    NCResultType.ASK_RESULT
-                )
-
-    @Test
-    def test(): Unit =
-        val cfg = new NCModelConfig("test.id", "Test model", "1.0")
-        // Default EN pipeline with default EN semantic parser.
-
-        val pipeline = new 
NCModelPipelineBuilder().withLanguage("EN").withEntityParser(new 
NCEnSemanticEntityParser("models/lightswitch_model.yaml")).build()
-
-        Using.resource(new NCModelClient(mkModel(cfg, pipeline))) { client =>
-            println(client.ask("Please, put the light out in the upstairs 
bedroom.", null, "userId").getBody)
-        }
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
new file mode 100644
index 0000000..8ca532e
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.semantic
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.impl.*
+import org.apache.nlpcraft.internal.util.*
+import org.apache.nlpcraft.nlp.entity.parser.*
+import 
org.apache.nlpcraft.nlp.token.enricher.{NCENOpenNlpLemmaPosTokenEnricher, 
NCENStopWordsTokenEnricher}
+import org.apache.nlpcraft.nlp.token.enricher.en.*
+import org.apache.nlpcraft.nlp.token.parser.NCENOpenNLPTokenParser
+import org.apache.nlpcraft.nlp.util.NCTestUtils
+import org.apache.nlpcraft.nlp.util.opennlp.*
+import org.junit.jupiter.api.*
+
+import java.util
+import java.util.{UUID, List as JList, Map as JMap, Set as JSet}
+import scala.collection.mutable
+import scala.jdk.CollectionConverters.*
+
+/**
+  *
+  */
+class NCSemanticEntityParserLemmaSpec:
+    private val lemmaTokEnricher = new NCENOpenNlpLemmaPosTokenEnricher
+    private val swTokEnricher = new NCENStopWordsTokenEnricher
+    private val tokParser = new NCENOpenNLPTokenParser
+    private val lemmaStemmer =
+        new NCSemanticStemmer():
+            override def stem(txt: String): String = if wrapped(txt) then 
unwrap(txt) else UUID.randomUUID().toString
+
+    case class Data(text: String, elemId: String)
+
+    private def wrap(s: String): String =
+        require(s != null)
+        s"_$s"
+
+    private def deepWrap(s: String): String =
+        require(s != null)
+        s.split(" ").map(wrap).mkString(" ")
+
+    private def wrapped(s: String): Boolean = s.length > 1 && s.head == '_'
+    private def unwrap(s: String): String =
+        require(wrapped(s))
+        s.drop(1)
+
+    private def ask(txt: String, elems: Seq[NCSemanticTestElement], expVrnts: 
Seq[Seq[Data]]): Unit =
+        val mgr = new NCModelPipelineManager(
+            CFG,
+            new NCModelPipelineBuilder().
+                withTokenParser(tokParser).
+                withTokenEnricher(lemmaTokEnricher).
+                withTokenEnricher(swTokEnricher).
+                // 1. Wraps lemmas.
+                withTokenEnricher((req: NCRequest, cfg: NCModelConfig, toks: 
JList[NCToken]) =>
+                    toks.forEach(t => t.put("lemma", 
wrap(t.get[String]("lemma"))))
+                ).
+                // 2. Semantic parser with fixed stemmer which stems only 
lemmas.
+                withEntityParser(new NCSemanticEntityParser(lemmaStemmer, 
tokParser, elems.asJava)).
+                build()
+        )
+
+        mgr.start()
+
+        try
+            val data = mgr.prepare(txt, null, "userId")
+
+            NCTestUtils.printVariants(txt, data.variants)
+
+            require(expVrnts.size == data.variants.size, s"Variant count: 
${data.variants.size}, expected: ${expVrnts.size}")
+
+            val vrnts = mutable.ArrayBuffer.empty[NCVariant] ++ data.variants
+
+            for (expData <- expVrnts)
+                val idx = vrnts.zipWithIndex.
+                    find { case (v, idx) => expData == 
v.getEntities.asScala.map(e => Data(e.mkText(), e.getId)) }.
+                    getOrElse(throw new AssertionError(s"Cannot find variant: 
$expData"))._2
+                vrnts.remove(idx)
+
+            require(vrnts.isEmpty)
+        finally
+            mgr.close()
+    /**
+      *
+      */
+    @Test
+    def test(): Unit =
+        import NCSemanticTestElement as E
+
+        // Lemma.
+        ask(
+            "my test",
+            Seq(E("X", synonyms = Set(deepWrap("my test")))),
+            Seq(Seq(Data("my test", "X")))
+        )
+
+        // Regex.
+        ask(
+            "my test",
+            Seq(E("X", synonyms = Set(wrap("my //[a-z]+//")))),
+            Seq(Seq(Data("my test", "X")))
+        )
+
+        // Both.
+        ask(
+            "my test",
+            Seq(E("X", synonyms = Set(deepWrap("my test"), wrap("my 
//[a-z]+//")))),
+            Seq(Seq(Data("my test", "X")))
+        )
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 76f84f6..2275cab 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -32,60 +32,33 @@ import scala.collection.mutable
 import scala.concurrent.ExecutionContext
 import scala.jdk.CollectionConverters.*
 import scala.jdk.OptionConverters.RichOptional
-
-/**
-  *
-  * @param id
-  * @param synonyms
-  * @param values
-  * @param groups
-  */
-case class NCSemanticTestElement(
-    id: String,
-    synonyms: Set[String] = Set.empty,
-    values: Map[String, Set[String]] = Map.empty,
-    groups: Seq[String] = Seq.empty,
-    props: Map[String, AnyRef] = Map.empty
-) extends NCSemanticElement:
-    override def getId: String = id
-    override def getGroups: JSet[String] = groups.toSet.asJava
-    override def getValues: JMap[String, JSet[String]] = values.map { (k, v) 
=> k -> v.asJava}.asJava
-    override def getSynonyms: JSet[String] = synonyms.asJava
-    override def getProperties: JMap[String, Object] = props.asJava
-
-/**
-  *
-  */
-object NCSemanticTestElement:
-    def apply(id: String, synonyms: String*) = new NCSemanticTestElement(id, 
synonyms = synonyms.toSet)
-
 /**
   *
   */
 class NCSemanticEntityParserSpec:
+    import NCSemanticTestElement as E
+
     private val parser =
         new NCEnSemanticEntityParser(
             Seq(
                 // Standard.
-                NCSemanticTestElement("t1", synonyms = Set("t1")),
+                E("t1", synonyms = Set("t1")),
                 // No extra synonyms.
-                NCSemanticTestElement("t2"),
+                E("t2"),
                 // Multiple words.
-                NCSemanticTestElement("t3", synonyms = Set("t3 t3")),
+                E("t3", synonyms = Set("t3 t3")),
                 // Value. No extra synonyms.
-                NCSemanticTestElement("t4", values = Map("value4" -> 
Set.empty)),
+                E("t4", values = Map("value4" -> Set.empty)),
                 // Value. Multiple words.
-                NCSemanticTestElement("t5", values = Map("value5" -> 
Set("value 5"))),
+                E("t5", values = Map("value5" -> Set("value 5"))),
                 // Elements data.
-                NCSemanticTestElement("t6", props = Map("testKey" -> 
"testValue")),
+                E("t6", props = Map("testKey" -> "testValue")),
                 // Regex.
-                NCSemanticTestElement("t7", synonyms = Set("x //[a-d]+//"))
-
+                E("t7", synonyms = Set("x //[a-d]+//"))
             ).asJava
         )
 
     private val stopWordsEnricher = new NCENStopWordsTokenEnricher()
-
     private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher()
 
     /**
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
new file mode 100644
index 0000000..76528e3
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.semantic
+
+import org.apache.nlpcraft.nlp.entity.parser.NCSemanticElement
+import org.apache.nlpcraft.*
+
+import java.util
+import java.util.{List as JList, Map as JMap, Set as JSet}
+import scala.jdk.CollectionConverters.*
+
+/**
+  *
+  * @param id
+  * @param synonyms
+  * @param values
+  * @param groups
+  */
+case class NCSemanticTestElement(
+    id: String,
+    synonyms: Set[String] = Set.empty,
+    values: Map[String, Set[String]] = Map.empty,
+    groups: Seq[String] = Seq.empty,
+    props: Map[String, AnyRef] = Map.empty
+) extends NCSemanticElement:
+    override def getId: String = id
+    override def getGroups: JSet[String] = groups.toSet.asJava
+    override def getValues: JMap[String, JSet[String]] = values.map { (k, v) 
=> k -> v.asJava }.asJava
+    override def getSynonyms: JSet[String] = synonyms.asJava
+    override def getProperties: JMap[String, Object] = props.asJava
+
+/**
+  *
+  */
+object NCSemanticTestElement:
+    def apply(id: String, synonyms: String*) = new NCSemanticTestElement(id, 
synonyms = synonyms.toSet)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index 342dca6..971ad4f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -80,6 +80,28 @@ object NCTestUtils:
 
     /**
       *
+      * @param req
+      * @param vs
+      */
+    def printVariants(req: String, vs: Seq[NCVariant]): Unit =
+        println(s"Request $req variants:")
+
+        for ((v, idx) <- vs.zipWithIndex)
+            val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position", 
"Properties")
+
+            for (e <- v.getEntities.asScala)
+                val toks = e.getTokens.asScala
+                tbl += (
+                    e.getId,
+                    toks.map(_.getText).mkString("|"),
+                    toks.map(p => 
s"${p.getStartCharIndex}-${p.getEndCharIndex}").mkString("|"),
+                    mkProps(e)
+                )
+
+            tbl.print(s"Variant: ${idx + 1}")
+
+    /**
+      *
       * @param mdl
       * @param expectedOk
       */

[incubator-nlpcraft] branch master updated: Tests extended.

Reply via email to