This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new debebc3 Tests extended.
debebc3 is described below
commit debebc30cc221e7e53d7b52546d0bd3a53e0509f
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Mar 14 16:31:18 2022 +0300
Tests extended.
---
.../internal/impl/NCModelPipelineManager.scala | 3 +-
.../intent/matcher/NCIntentSolverManager.scala | 3 +
.../parser/impl/NCSemanticEntityParserImpl.scala | 20 +++-
.../nlpcraft/nlp/NCENDefaultPipelineSpec.scala | 60 ----------
.../semantic/NCSemanticEntityParserLemmaSpec.scala | 124 +++++++++++++++++++++
.../semantic/NCSemanticEntityParserSpec.scala | 45 ++------
.../parser/semantic/NCSemanticTestElement.scala | 51 +++++++++
.../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 22 ++++
8 files changed, 229 insertions(+), 99 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
index 9652d2c..907bc7e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
@@ -184,7 +184,8 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline:
NCModelPipeline) exte
check()
variants = varFilterOpt.get.filter(req, cfg, variants)
- val vrnts = variants.asScala.toSeq
+ // Skips empty variants.
+ val vrnts = variants.asScala.toSeq.filter(!_.getEntities.isEmpty)
for ((v, i) <- vrnts.zipWithIndex)
val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position",
"Properties")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
index afd1bb1..74cacda 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
@@ -691,6 +691,9 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager,
intents: Map[NCIDLInten
var res: NCResult = mdl.onContext(ctx)
if res != null then
+ // TODO: text.
+ if intents.nonEmpty then logger.warn("`onContext` method overrides
existed intents. They are ignored.")
+
res
else
if intents.isEmpty then
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
index 2b4e35b..5a28675 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala
@@ -230,10 +230,12 @@ class NCSemanticEntityParserImpl(
val cache = mutable.HashSet.empty[Seq[Int]] // Variants (tokens
without stopwords) can be repeated.
case class Holder(elemId: String, tokens: Seq[NCToken], value:
Option[String]):
- private val idxs = tokens.map(_.getIndex).toSet
+ val tokensSet = tokens.toSet
+ val idxs = tokensSet.map(_.getIndex)
+
def isSuperSet(toks: Seq[NCToken]): Boolean = idxs.size >
toks.size && toks.map(_.getIndex).toSet.subsetOf(idxs)
- val hs = mutable.ArrayBuffer.empty[Holder]
+ var hs = mutable.ArrayBuffer.empty[Holder]
for (piece <- getPieces(toks) if
!hs.exists(_.isSuperSet(piece.baseTokens));
variant <- Seq(piece.baseTokens) ++ piece.variants)
@@ -271,6 +273,20 @@ class NCSemanticEntityParserImpl(
if found then add(elemId, Option.when(s.value
!= null)(s.value))
+ // Deletes redundant.
+ hs = hs.distinct
+
+ val del = mutable.ArrayBuffer.empty[Holder]
+ // 1. Look at each element with its value.
+ for (((_, _), seq) <- hs.groupBy(h => (h.elemId, h.value)) if seq.size
> 1)
+ // 2. If some variants are duplicated - keep only one, with most
tokens counts.
+ val seqIdxs = seq.zipWithIndex
+
+ for ((h, idx) <- seqIdxs if !del.contains(h))
+ del ++= seqIdxs.filter { (_, oIdx) => oIdx != idx }.map { (h,
_) => h }.filter(_.tokensSet.subsetOf(h.tokensSet))
+
+ hs --= del
+
hs.toSeq.map(h => {
val e = elemsMap(h.elemId)
new NCPropertyMapAdapter with NCEntity:
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
deleted file mode 100644
index e8b7931..0000000
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp
-
-import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser,
NCSemanticEntityParser}
-import org.apache.nlpcraft.nlp.util.NCTestModelAdapter
-import org.junit.jupiter.api.Test
-
-import scala.util.Using
-
-class NCENDefaultPipelineSpec:
- /**
- *
- * @param cfg
- * @param pipeline
- * @return
- */
- private def mkModel(cfg: NCModelConfig, pipeline: NCModelPipeline):
NCModel =
- new NCModelAdapter(cfg, pipeline):
- @NCIntent("intent=ls term(act)={has(ent_groups, 'act')}
term(loc)={# == 'ls:loc'}*")
- @NCIntentSample(Array(
- "Please, put the light out in the upstairs bedroom.",
- ))
- def onMatch(
- @NCIntentTerm("act") actEnt: NCEntity,
- @NCIntentTerm("loc") locEnts: List[NCEntity]
- ): NCResult =
- val status = if actEnt.getId == "ls:on" then "on" else "off"
- val locations = if locEnts.isEmpty then "entire house" else
locEnts.map(_.mkText()).mkString(", ")
- new NCResult(
- s"Lights are [$status] in [${locations.toLowerCase}].",
- NCResultType.ASK_RESULT
- )
-
- @Test
- def test(): Unit =
- val cfg = new NCModelConfig("test.id", "Test model", "1.0")
- // Default EN pipeline with default EN semantic parser.
-
- val pipeline = new
NCModelPipelineBuilder().withLanguage("EN").withEntityParser(new
NCEnSemanticEntityParser("models/lightswitch_model.yaml")).build()
-
- Using.resource(new NCModelClient(mkModel(cfg, pipeline))) { client =>
- println(client.ask("Please, put the light out in the upstairs
bedroom.", null, "userId").getBody)
- }
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
new file mode 100644
index 0000000..8ca532e
--- /dev/null
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.semantic
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.impl.*
+import org.apache.nlpcraft.internal.util.*
+import org.apache.nlpcraft.nlp.entity.parser.*
+import
org.apache.nlpcraft.nlp.token.enricher.{NCENOpenNlpLemmaPosTokenEnricher,
NCENStopWordsTokenEnricher}
+import org.apache.nlpcraft.nlp.token.enricher.en.*
+import org.apache.nlpcraft.nlp.token.parser.NCENOpenNLPTokenParser
+import org.apache.nlpcraft.nlp.util.NCTestUtils
+import org.apache.nlpcraft.nlp.util.opennlp.*
+import org.junit.jupiter.api.*
+
+import java.util
+import java.util.{UUID, List as JList, Map as JMap, Set as JSet}
+import scala.collection.mutable
+import scala.jdk.CollectionConverters.*
+
+/**
+ *
+ */
+class NCSemanticEntityParserLemmaSpec:
+ private val lemmaTokEnricher = new NCENOpenNlpLemmaPosTokenEnricher
+ private val swTokEnricher = new NCENStopWordsTokenEnricher
+ private val tokParser = new NCENOpenNLPTokenParser
+ private val lemmaStemmer =
+ new NCSemanticStemmer():
+ override def stem(txt: String): String = if wrapped(txt) then
unwrap(txt) else UUID.randomUUID().toString
+
+ case class Data(text: String, elemId: String)
+
+ private def wrap(s: String): String =
+ require(s != null)
+ s"_$s"
+
+ private def deepWrap(s: String): String =
+ require(s != null)
+ s.split(" ").map(wrap).mkString(" ")
+
+ private def wrapped(s: String): Boolean = s.length > 1 && s.head == '_'
+ private def unwrap(s: String): String =
+ require(wrapped(s))
+ s.drop(1)
+
+ private def ask(txt: String, elems: Seq[NCSemanticTestElement], expVrnts:
Seq[Seq[Data]]): Unit =
+ val mgr = new NCModelPipelineManager(
+ CFG,
+ new NCModelPipelineBuilder().
+ withTokenParser(tokParser).
+ withTokenEnricher(lemmaTokEnricher).
+ withTokenEnricher(swTokEnricher).
+ // 1. Wraps lemmas.
+ withTokenEnricher((req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]) =>
+ toks.forEach(t => t.put("lemma",
wrap(t.get[String]("lemma"))))
+ ).
+ // 2. Semantic parser with fixed stemmer which stems only
lemmas.
+ withEntityParser(new NCSemanticEntityParser(lemmaStemmer,
tokParser, elems.asJava)).
+ build()
+ )
+
+ mgr.start()
+
+ try
+ val data = mgr.prepare(txt, null, "userId")
+
+ NCTestUtils.printVariants(txt, data.variants)
+
+ require(expVrnts.size == data.variants.size, s"Variant count:
${data.variants.size}, expected: ${expVrnts.size}")
+
+ val vrnts = mutable.ArrayBuffer.empty[NCVariant] ++ data.variants
+
+ for (expData <- expVrnts)
+ val idx = vrnts.zipWithIndex.
+ find { case (v, idx) => expData ==
v.getEntities.asScala.map(e => Data(e.mkText(), e.getId)) }.
+ getOrElse(throw new AssertionError(s"Cannot find variant:
$expData"))._2
+ vrnts.remove(idx)
+
+ require(vrnts.isEmpty)
+ finally
+ mgr.close()
+ /**
+ *
+ */
+ @Test
+ def test(): Unit =
+ import NCSemanticTestElement as E
+
+ // Lemma.
+ ask(
+ "my test",
+ Seq(E("X", synonyms = Set(deepWrap("my test")))),
+ Seq(Seq(Data("my test", "X")))
+ )
+
+ // Regex.
+ ask(
+ "my test",
+ Seq(E("X", synonyms = Set(wrap("my //[a-z]+//")))),
+ Seq(Seq(Data("my test", "X")))
+ )
+
+ // Both.
+ ask(
+ "my test",
+ Seq(E("X", synonyms = Set(deepWrap("my test"), wrap("my
//[a-z]+//")))),
+ Seq(Seq(Data("my test", "X")))
+ )
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 76f84f6..2275cab 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -32,60 +32,33 @@ import scala.collection.mutable
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
import scala.jdk.OptionConverters.RichOptional
-
-/**
- *
- * @param id
- * @param synonyms
- * @param values
- * @param groups
- */
-case class NCSemanticTestElement(
- id: String,
- synonyms: Set[String] = Set.empty,
- values: Map[String, Set[String]] = Map.empty,
- groups: Seq[String] = Seq.empty,
- props: Map[String, AnyRef] = Map.empty
-) extends NCSemanticElement:
- override def getId: String = id
- override def getGroups: JSet[String] = groups.toSet.asJava
- override def getValues: JMap[String, JSet[String]] = values.map { (k, v)
=> k -> v.asJava}.asJava
- override def getSynonyms: JSet[String] = synonyms.asJava
- override def getProperties: JMap[String, Object] = props.asJava
-
-/**
- *
- */
-object NCSemanticTestElement:
- def apply(id: String, synonyms: String*) = new NCSemanticTestElement(id,
synonyms = synonyms.toSet)
-
/**
*
*/
class NCSemanticEntityParserSpec:
+ import NCSemanticTestElement as E
+
private val parser =
new NCEnSemanticEntityParser(
Seq(
// Standard.
- NCSemanticTestElement("t1", synonyms = Set("t1")),
+ E("t1", synonyms = Set("t1")),
// No extra synonyms.
- NCSemanticTestElement("t2"),
+ E("t2"),
// Multiple words.
- NCSemanticTestElement("t3", synonyms = Set("t3 t3")),
+ E("t3", synonyms = Set("t3 t3")),
// Value. No extra synonyms.
- NCSemanticTestElement("t4", values = Map("value4" ->
Set.empty)),
+ E("t4", values = Map("value4" -> Set.empty)),
// Value. Multiple words.
- NCSemanticTestElement("t5", values = Map("value5" ->
Set("value 5"))),
+ E("t5", values = Map("value5" -> Set("value 5"))),
// Elements data.
- NCSemanticTestElement("t6", props = Map("testKey" ->
"testValue")),
+ E("t6", props = Map("testKey" -> "testValue")),
// Regex.
- NCSemanticTestElement("t7", synonyms = Set("x //[a-d]+//"))
-
+ E("t7", synonyms = Set("x //[a-d]+//"))
).asJava
)
private val stopWordsEnricher = new NCENStopWordsTokenEnricher()
-
private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher()
/**
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
new file mode 100644
index 0000000..76528e3
--- /dev/null
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTestElement.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.semantic
+
+import org.apache.nlpcraft.nlp.entity.parser.NCSemanticElement
+import org.apache.nlpcraft.*
+
+import java.util
+import java.util.{List as JList, Map as JMap, Set as JSet}
+import scala.jdk.CollectionConverters.*
+
+/**
+ *
+ * @param id
+ * @param synonyms
+ * @param values
+ * @param groups
+ */
+case class NCSemanticTestElement(
+ id: String,
+ synonyms: Set[String] = Set.empty,
+ values: Map[String, Set[String]] = Map.empty,
+ groups: Seq[String] = Seq.empty,
+ props: Map[String, AnyRef] = Map.empty
+) extends NCSemanticElement:
+ override def getId: String = id
+ override def getGroups: JSet[String] = groups.toSet.asJava
+ override def getValues: JMap[String, JSet[String]] = values.map { (k, v)
=> k -> v.asJava }.asJava
+ override def getSynonyms: JSet[String] = synonyms.asJava
+ override def getProperties: JMap[String, Object] = props.asJava
+
+/**
+ *
+ */
+object NCSemanticTestElement:
+ def apply(id: String, synonyms: String*) = new NCSemanticTestElement(id,
synonyms = synonyms.toSet)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index 342dca6..971ad4f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -80,6 +80,28 @@ object NCTestUtils:
/**
*
+ * @param req
+ * @param vs
+ */
+ def printVariants(req: String, vs: Seq[NCVariant]): Unit =
+ println(s"Request $req variants:")
+
+ for ((v, idx) <- vs.zipWithIndex)
+ val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position",
"Properties")
+
+ for (e <- v.getEntities.asScala)
+ val toks = e.getTokens.asScala
+ tbl += (
+ e.getId,
+ toks.map(_.getText).mkString("|"),
+ toks.map(p =>
s"${p.getStartCharIndex}-${p.getEndCharIndex}").mkString("|"),
+ mkProps(e)
+ )
+
+ tbl.print(s"Variant: ${idx + 1}")
+
+ /**
+ *
* @param mdl
* @param expectedOk
*/