This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 72c6624 CR WIP
72c6624 is described below
commit 72c6624567fe32b5642442ed5d8f2c02591080fe
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Jan 13 12:35:45 2022 -0800
CR WIP
---
.../org/apache/nlpcraft/NCVariantValidator.java | 4 +-
.../nlpcraft/internal/impl/NCModelClientImpl.scala | 12 +-
...cessor.scala => NCModelPipelineProcessor.scala} | 104 +++++++++--------
...ec.scala => NCModelPipelineProcessorSpec.scala} | 18 +--
.../internal/NCPipelineValidatorsSpec.scala | 126 ---------------------
5 files changed, 72 insertions(+), 192 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
index 98aeea5..0252faf 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
@@ -27,7 +27,7 @@ public interface NCVariantValidator extends NCLifecycle {
*
* @param req
* @param cfg
- * @param toks
+ * @param vars
*/
- List<NCVariant> filter(NCRequest req, NCModelConfig cfg, List<NCVariant>
variants);
+ List<NCVariant> validateAndFilter(NCRequest req, NCModelConfig cfg,
List<NCVariant> vars);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
index f49917f..5fc70bb 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
@@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.*
* @param mdl
*/
class NCModelClientImpl(mdl: NCModel) extends LazyLogging:
- private val plProc = NCPipelineProcessor(mdl)
+ private val plProc = NCModelPipelineProcessor(mdl)
private var plSrvs: Seq[NCLifecycle] = _
init(mdl.getConfig, mdl.getPipeline)
@@ -68,14 +68,14 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging:
/**
*
- * @param action
- * @param actionVerb
+ * @param act
+ * @param actVerb
*/
- private def processServices(action: NCLifecycle => Unit, actionVerb:
String): Unit =
+ private def processServices(act: NCLifecycle => Unit, actVerb: String):
Unit =
NCUtils.execPar(plSrvs.map(p =>
() => {
- action(p)
- logger.info(s"Service $actionVerb: '${p.getClass.getName}'")
+ act(p)
+ logger.info(s"Service $actVerb: '${p.getClass.getName}'")
}
)*)(ExecutionContext.Implicits.global)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCPipelineProcessor.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineProcessor.scala
similarity index 66%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCPipelineProcessor.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineProcessor.scala
index ff78b7b..b3b730b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCPipelineProcessor.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineProcessor.scala
@@ -19,34 +19,34 @@ package org.apache.nlpcraft.internal.impl
import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.internal.impl.NCSentenceHelper
-import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.internal.impl.*
+import org.apache.nlpcraft.internal.util.*
import java.util
import java.util.concurrent.*
-import java.util.concurrent.atomic.AtomicReference
+import java.util.concurrent.atomic.*
import java.util.{ArrayList, UUID, List as JList, Map as JMap}
import scala.collection.immutable
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
-// TODO: move it to right package.
-
/**
*
+ * @param mdl
*/
-object NCPipelineProcessor {
- case class VariantsHolder(request: NCRequest, variants: Seq[NCVariant],
checkCancel: Option[() => Unit])
-}
-
-import org.apache.nlpcraft.internal.impl.NCPipelineProcessor.*
+class NCModelPipelineProcessor(mdl: NCModel) extends LazyLogging:
+ /**
+ *
+ * @param req
+ * @param vars
+ * @param checkCancel
+ */
+ case class VariantsHolder(req: NCRequest, vars: Seq[NCVariant],
checkCancel: Option[() => Unit])
-/**
- *
- * @param mdl */
-class NCPipelineProcessor(mdl: NCModel) extends LazyLogging :
require(mdl != null)
require(mdl.getPipeline.getTokenParser != null)
+ require(mdl.getPipeline.getEntityParsers != null)
+ require(mdl.getPipeline.getEntityParsers.size() > 0)
private val pipeline = mdl.getPipeline
private val pool = new java.util.concurrent.ForkJoinPool()
@@ -72,7 +72,7 @@ class NCPipelineProcessor(mdl: NCModel) extends LazyLogging :
* @param h
* @return
*/
- private def matchAndExecute(h: VariantsHolder): NCResult = ??? // TODO:
implement.
+ private def matchIntent(h: VariantsHolder): NCResult = ???
/**
*
@@ -82,17 +82,24 @@ class NCPipelineProcessor(mdl: NCModel) extends LazyLogging
:
* @param checkCancel
* @return
*/
- // It returns intermediate variants holder just for test reasons.
- private[internal] def prepare(
- txt: String, data: JMap[String, AnyRef], usrId: String, checkCancel:
Option[() => Unit] = None
+ private[internal] def prepVariants(
+ txt: String,
+ data: JMap[String, AnyRef],
+ usrId: String,
+ checkCancel: Option[() => Unit] = None
): VariantsHolder =
require(txt != null && usrId != null)
- val check = checkCancel.getOrElse(() => ())
-
- val toks = tokParser.tokenize(txt)
- if toks.isEmpty then throw new NCException(s"Unsupported empty
request: $txt") // TODO: error text
+ /**
+ *
+ * @param ents
+ * @return
+ */
+ def variant(ents: Seq[NCEntity]): NCVariant =
+ new NCVariant:
+ override val getEntities: JList[NCEntity] = ents.asJava
+ val check = checkCancel.getOrElse(() => ())
val req: NCRequest = new NCRequest:
override val getUserId: String = usrId
override val getRequestId: String = UUID.randomUUID().toString
@@ -100,9 +107,12 @@ class NCPipelineProcessor(mdl: NCModel) extends
LazyLogging :
override val getReceiveTimestamp: Long = System.currentTimeMillis()
override val getRequestData: JMap[String, AnyRef] = data
- for (e <- tokEnrichers)
- check()
- e.enrich(req, cfg, toks)
+ val toks = tokParser.tokenize(txt)
+
+ if toks.size() > 0 then
+ for (e <- tokEnrichers)
+ check()
+ e.enrich(req, cfg, toks)
for (v <- tokVals)
check()
@@ -112,49 +122,39 @@ class NCPipelineProcessor(mdl: NCModel) extends
LazyLogging :
for (p <- entParsers)
check()
- val ents = p.parse(req, cfg, toks)
- if ents == null then
- // TODO: error text.
- throw new NCException(s"Invalid entities parser null result
[text=$txt, parser=${p.getClass.getName}]")
- entsList.addAll(ents)
+ entsList.addAll(p.parse(req, cfg, toks))
- // TODO: error text.
- if entsList.isEmpty then throw new NCException(s"No entities found for
text: $txt")
+ if entsList.size() > 0 then
+ for (e <- entEnrichers)
+ check()
+ e.enrich(req, cfg, entsList)
- for (e <- entEnrichers)
- check()
- e.enrich(req, cfg, entsList)
for (v <- entVals)
check()
v.validate(req, cfg, entsList)
val entities = entsList.asScala.toSeq
- val overEntities: Seq[Set[NCEntity]] =
+ val overlapEnts: Seq[Set[NCEntity]] =
toks.asScala.
// Looks at each token.
map(t => t.getIndex -> entities.filter(_.getTokens.contains(t))).
// Collects all overlapped entities.
map { case (_, ents) => if (ents.sizeIs > 1) ents.toSet else
Set.empty }.filter(_.nonEmpty).toSeq
- def mkVariant(entities: Seq[NCEntity]): NCVariant =
- new NCVariant:
- override def getEntities: JList[NCEntity] = entities.asJava
-
var variants: JList[NCVariant] =
- if overEntities.nonEmpty then
- val dels =
NCSentenceHelper.findCombinations(overEntities.map(_.asJava).asJava,
pool).asScala.map(_.asScala)
-
- dels.map(delComb =>
+ if overlapEnts.nonEmpty then
+
NCSentenceHelper.findCombinations(overlapEnts.map(_.asJava).asJava, pool)
+ .asScala.map(_.asScala).map(delComb =>
val delSet = delComb.toSet
- mkVariant(entities.filter(e => !delSet.contains(e)))
+ variant(entities.filter(!delSet.contains(_)))
).asJava
else
- Seq(mkVariant(entities)).asJava
+ Seq(variant(entities)).asJava
for (v <- varVals)
check()
- variants = v.filter(req, cfg, variants)
+ variants = v.validateAndFilter(req, cfg, variants)
VariantsHolder(req, variants.asScala.toSeq, checkCancel)
@@ -164,9 +164,12 @@ class NCPipelineProcessor(mdl: NCModel) extends
LazyLogging :
* @param data
* @param usrId
* @return
+ * @throws NCRejection
+ * @throws NCCuration
+ * @throws NCException
*/
def askSync(txt: String, data: JMap[String, AnyRef], usrId: String):
NCResult =
- matchAndExecute(prepare(txt, data, usrId))
+ matchIntent(prepVariants(txt, data, usrId))
/**
*
@@ -174,6 +177,9 @@ class NCPipelineProcessor(mdl: NCModel) extends LazyLogging
:
* @param data
* @param usrId
* @return
+ * @throws NCRejection
+ * @throws NCCuration
+ * @throws NCException
*/
def ask(txt: String, data: JMap[String, AnyRef], usrId: String):
CompletableFuture[NCResult] =
val fut = new CompletableFuture[NCResult]
@@ -185,7 +191,7 @@ class NCPipelineProcessor(mdl: NCModel) extends LazyLogging
:
logger.warn(txt)
throw new NCException(txt)
- fut.completeAsync(() => matchAndExecute(prepare(txt, data, usrId,
Option(check))))
+ fut.completeAsync(() => matchIntent(prepVariants(txt, data, usrId,
Option(check))))
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCModelPipelineProcessorSpec.scala
similarity index 87%
rename from
nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala
rename to
nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCModelPipelineProcessorSpec.scala
index 736da78..6eec102 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCModelPipelineProcessorSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.internal
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.internal.impl.NCPipelineProcessor
+import org.apache.nlpcraft.internal.impl.NCModelPipelineProcessor
import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl
import org.apache.nlpcraft.nlp.entity.parser.semantic.*
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnPorterStemmer
@@ -36,7 +36,7 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCPipelineProcessorSpec:
+class NCModelPipelineProcessorSpec:
@Test
def test(): Unit =
def test(txt: String, variantCnt: Int, elements: NCSemanticElement*):
Unit =
@@ -47,15 +47,15 @@ class NCPipelineProcessorSpec:
pipeline.getEntityParsers.add(parser)
val res =
- new NCPipelineProcessor(new NCModelAdapter(CFG, pipeline)).
- prepare(txt, null, "userId")
+ new NCModelPipelineProcessor(new NCModelAdapter(CFG,
pipeline)).
+ prepVariants(txt, null, "userId")
- println(s"Variants count: ${res.variants.size}")
- for ((v, idx) <- res.variants.zipWithIndex)
+ println(s"Variants count: ${res.vars.size}")
+ for ((v, idx) <- res.vars.zipWithIndex)
println(s"Variant: $idx")
NCTestUtils.printEntities(txt, v.getEntities.asScala.toSeq)
- require(res.variants.sizeIs == variantCnt)
+ require(res.vars.sizeIs == variantCnt)
test("t1 t2", 4, NCSemanticTestElement("t1", "t2"),
NCSemanticTestElement("t2", "t1"))
test("t1 t2", 2, NCSemanticTestElement("t1", "t2"),
NCSemanticTestElement("t2"))
@@ -66,7 +66,7 @@ class NCPipelineProcessorSpec:
* @param itersCnt
* @return
*/
- private def mkSlowPipelineProcessor(delayMs: Long, itersCnt: Int):
NCPipelineProcessor =
+ private def mkSlowPipelineProcessor(delayMs: Long, itersCnt: Int):
NCModelPipelineProcessor =
val pipeline = EN_PIPELINE.clone()
pipeline.getEntityParsers.clear()
@@ -80,7 +80,7 @@ class NCPipelineProcessorSpec:
(0 until itersCnt).foreach(i =>
pipeline.getEntityParsers.add(mkSlowParser(i)))
- NCPipelineProcessor(new NCModelAdapter(CFG, pipeline))
+ NCModelPipelineProcessor(new NCModelAdapter(CFG, pipeline))
@Test
def testCancel(): Unit =
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineValidatorsSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineValidatorsSpec.scala
deleted file mode 100644
index 2790368..0000000
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineValidatorsSpec.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.internal
-
-import org.apache.nlpcraft.*
-import org.apache.nlpcraft.internal.impl.NCPipelineProcessor
-import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNlpEntityParser
-import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.opennlp.*
-import org.junit.jupiter.api.*
-
-import java.util
-import java.util.List as JList
-import scala.concurrent.CancellationException
-import scala.jdk.CollectionConverters.*
-
-/**
- *
- */
-class NCPipelineValidatorsSpec:
- class TestException extends Exception
-
- /**
- *
- * @param addComponent
- * @return
- */
- private def prepare(addComponent: NCTestPipeline => Unit):
NCPipelineProcessor.VariantsHolder =
- val pipeline = EN_PIPELINE.clone()
-
- pipeline.getEntityParsers.clear()
- pipeline.getEntityParsers.add(new NCNlpEntityParser)
- addComponent(pipeline)
-
- NCPipelineProcessor(new NCModelAdapter(CFG, pipeline)).
- prepare("test1 test2", null, "testId")
-
- /**
- *
- * @param addComponent
- * @param isError
- * @param expVariantCnt
- */
- private def testOk(addComponent: NCTestPipeline => Unit, expVariantCnt:
Int): Unit =
- val h = prepare(addComponent)
-
- println(s"Variants count: ${h.variants.size}")
-
- for ((v, idx) <- h.variants.zipWithIndex)
- println(s"Variant: $idx")
- NCTestUtils.printEntities(h.request.getText,
v.getEntities.asScala.toSeq)
-
- require(h.variants.sizeIs == expVariantCnt)
-
- /**
- *
- * @param addComponent
- * @param isError
- * @param expVariantCnt
- */
- private def testError(addComponent: NCTestPipeline => Unit): Unit =
- Assertions.assertThrows(classOf[TestException], () =>
prepare(addComponent))
-
- @Test
- def testNoValidator(): Unit = testOk(_ => (), expVariantCnt = 1)
-
- @Test
- def testVariantValidator1(): Unit =
- val v = new NCVariantValidator:
- override def filter(req: NCRequest, cfg: NCModelConfig, variants:
JList[NCVariant]): JList[NCVariant] =
- java.util.Collections.emptyList()
-
- testOk(_.getVariantValidators.add(v), expVariantCnt = 0)
-
- @Test
- def testVariantValidator2(): Unit =
- val v = new NCVariantValidator:
- override def filter(req: NCRequest, cfg: NCModelConfig, variants:
JList[NCVariant]): JList[NCVariant] =
- variants
-
- testOk(_.getVariantValidators.add(v), expVariantCnt = 1)
-
- @Test
- def testTokenValidator1(): Unit =
- val v = new NCTokenValidator:
- override def validate(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
- throw new TestException()
-
- testError(_.getTokenValidators.add(v))
-
- @Test
- def testTokenValidator2(): Unit =
- val v = new NCTokenValidator:
- override def validate(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit = ()
-
- testOk(_.getTokenValidators.add(v), 1)
-
- @Test
- def testEntityValidator1(): Unit =
- val v = new NCEntityValidator:
- override def validate(req: NCRequest, cfg: NCModelConfig, ents:
JList[NCEntity]): Unit =
- throw new TestException()
-
- testError(_.getEntityValidators.add(v))
-
- @Test
- def testEntityValidator2(): Unit =
- val v = new NCEntityValidator:
- override def validate(req: NCRequest, cfg: NCModelConfig, ents:
JList[NCEntity]): Unit = ()
-
- testOk(_.getEntityValidators.add(v), 1)
\ No newline at end of file