This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41-1 by this push:
new 4ed0e3f WIP.
4ed0e3f is described below
commit 4ed0e3f5a8af6ce2f8560ed3fa594c54d3446839
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Sep 10 12:51:57 2020 +0300
WIP.
---
.../apache/nlpcraft/model/impl/NCTokenImpl.scala | 8 +-
.../test/impl/NCTestAutoModelValidatorImpl.scala | 11 +-
.../nlpcraft/probe/mgrs/cmd/NCCommandManager.scala | 8 +-
.../probe/mgrs/conn/NCConnectionManager.scala | 4 +-
.../probe/mgrs/deploy/NCDeployManager.scala | 521 ++++++++++++++++++++-
.../probe/mgrs/deploy/NCIntentScanner.scala | 518 --------------------
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 35 +-
.../NCModelWrapper.scala => nlp/NCModelData.scala} | 8 +-
.../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala | 5 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 42 +-
.../dictionary/NCDictionaryEnricher.scala | 7 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 7 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 35 +-
.../enrichers/relation/NCRelationEnricher.scala | 7 +-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 7 +-
.../enrichers/stopword/NCStopWordEnricher.scala | 20 +-
.../suspicious/NCSuspiciousNounsEnricher.scala | 9 +-
.../mgrs/nlp/validate/NCValidateManager.scala | 14 +-
18 files changed, 608 insertions(+), 658 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 0c5dd48..39b4914 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -23,7 +23,7 @@ import java.util.Collections
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
+import org.apache.nlpcraft.probe.mgrs.nlp.NCModelData
import scala.collection.JavaConverters._
import scala.collection.{Seq, mutable}
@@ -99,7 +99,7 @@ private[nlpcraft] class NCTokenImpl(
}
private[nlpcraft] object NCTokenImpl {
- def apply(mdl: NCModelWrapper, srvReqId: String, tok: NCNlpSentenceToken):
NCTokenImpl = {
+ def apply(mdl: NCModelData, srvReqId: String, tok: NCNlpSentenceToken):
NCTokenImpl = {
// nlpcraft:nlp and some optional (after collapsing).
require(tok.size <= 2, s"Unexpected token [size=${tok.size},
token=$tok]")
@@ -142,7 +142,7 @@ private[nlpcraft] object NCTokenImpl {
elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k,
v.asInstanceOf[java.io.Serializable]) }
new NCTokenImpl(
- mdl.proxy,
+ mdl.model,
srvReqId = srvReqId,
id = elm.getId,
grps = elm.getGroups.asScala,
@@ -165,7 +165,7 @@ private[nlpcraft] object NCTokenImpl {
md.put("nlpcraft:nlp:freeword", !isStop && note.isNlp)
new NCTokenImpl(
- mdl.proxy,
+ mdl.model,
srvReqId = srvReqId,
id = note.noteType, // Use NLP note type as synthetic
element ID.
grps = Seq(note.noteType), // Use NLP note type as
synthetic element group.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
index 9cc7a80..ae87657 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
@@ -22,8 +22,8 @@ import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.common.util.NCUtils
import org.apache.nlpcraft.model.tools.embedded.NCEmbeddedProbe
import org.apache.nlpcraft.model.tools.test.NCTestClientBuilder
-import org.apache.nlpcraft.probe.mgrs.deploy.NCIntentScanner
import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
/**
* Implementation for `NCTestAutoModelValidator` class.
@@ -58,17 +58,10 @@ private [test] object NCTestAutoModelValidatorImpl extends
LazyLogging {
@throws[Exception]
private def isValid(classes: Seq[Class[_ <: NCModel]]) = {
- val samples =
- classes.
- map(_.getDeclaredConstructor().newInstance()).
- map(mdl ⇒ mdl.getId → NCIntentScanner.scanSamples(mdl).toMap).
- toMap.
- filter(_._2.nonEmpty)
-
NCEmbeddedProbe.start(classes: _*)
try
- process(samples)
+ process(NCModelManager.getAllModelsData().map(p ⇒ p.model.getId →
p.samples.toMap).toMap.filter(_._2.nonEmpty))
finally
NCEmbeddedProbe.stop()
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
index 5442d71..f53aa6e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
@@ -108,12 +108,12 @@ object NCCommandManager extends NCService {
case "S2P_MODEL_INFO" ⇒
val mdlId = msg.data[String]("mdlId")
- val w =
NCModelManager.getModelWrapper(mdlId).getOrElse(throw new NCE(s"Model not
found: '$mdlId'"))
+ val mdlData = NCModelManager.getModelData(mdlId)
- val macros =
w.proxy.getMacros.asInstanceOf[Serializable]
- val syns = w.proxy.getElements.asScala.
+ val macros =
mdlData.model.getMacros.asInstanceOf[Serializable]
+ val syns = mdlData.model.getElements.asScala.
map(p ⇒ p.getId →
p.getSynonyms).toMap.asJava.asInstanceOf[Serializable]
- val samples = w.samples.map(p ⇒ p._1 → p._2.asJava).
+ val samples = mdlData.samples.map(p ⇒ p._1 →
p._2.asJava).
asJava.asInstanceOf[Serializable]
NCConnectionManager.send(
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 5a8c290..bacfe76 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -227,8 +227,8 @@ object NCConnectionManager extends NCService {
"PROBE_HOST_ADDR" → localHost.getHostAddress,
"PROBE_HW_ADDR" → hwAddrs,
"PROBE_MODELS" →
- NCModelManager.getAllModelWrappers().map(wrapper ⇒ {
- val mdl = wrapper.proxy
+ NCModelManager.getAllModelsData().map(wrapper ⇒ {
+ val mdl = wrapper.model
// Model already validated.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index f4faa8a..7671661 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -18,6 +18,9 @@
package org.apache.nlpcraft.probe.mgrs.deploy
import java.io._
+import java.lang.reflect.{InvocationTargetException, Method,
ParameterizedType, Type}
+import java.util
+import java.util.function.Function
import java.util.jar.{JarInputStream => JIS}
import java.util.regex.{Pattern, PatternSyntaxException}
@@ -25,19 +28,21 @@ import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.config.NCConfigurable
import org.apache.nlpcraft.common.makro.NCMacroParser
-import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
+import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreManager,
NCNlpPorterStemmer}
import org.apache.nlpcraft.common.util.NCUtils.{DSL_FIX, REGEX_FIX}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.factories.basic.NCBasicModelFactory
-import org.apache.nlpcraft.model.intent.impl.NCIntentSolver
+import org.apache.nlpcraft.model.intent.impl.{NCIntentDslCompiler,
NCIntentSolver}
+import org.apache.nlpcraft.model.intent.utils.NCDslIntent
import org.apache.nlpcraft.probe.mgrs.NCSynonymChunkKind.{DSL, REGEX, TEXT}
import org.apache.nlpcraft.probe.mgrs.{NCSynonym, NCSynonymChunk, deploy}
import org.apache.nlpcraft.probe.mgrs.model.NCModelSynonymDslCompiler
+import org.apache.nlpcraft.probe.mgrs.nlp.NCModelData
import resource.managed
import scala.collection.JavaConverters._
import scala.collection.convert.DecorateAsScala
-import scala.collection.{Seq, mutable}
+import scala.collection.{Map, Seq, Set, mutable}
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.util.control.Exception._
@@ -48,7 +53,36 @@ object NCDeployManager extends NCService with
DecorateAsScala {
private final val TOKENS_PROVIDERS_PREFIXES = Set("nlpcraft:", "google:",
"stanford:", "opennlp:", "spacy:")
private final val ID_REGEX = "^[_a-zA-Z]+[a-zA-Z0-9:-_]*$"
- @volatile private var wrappers: ArrayBuffer[NCModelWrapper] = _
+ type Callback = Function[NCIntentMatch, NCResult]
+
+ private final val CLS_INTENT = classOf[NCIntent]
+ private final val CLS_INTENT_REF = classOf[NCIntentRef]
+ private final val CLS_TERM = classOf[NCIntentTerm]
+ private final val CLS_QRY_RES = classOf[NCResult]
+ private final val CLS_SLV_CTX = classOf[NCIntentMatch]
+ private final val CLS_SAMPLE = classOf[NCIntentSample]
+
+ // Java and scala lists.
+ private final val CLS_SCALA_SEQ = classOf[Seq[_]]
+ private final val CLS_SCALA_LST = classOf[List[_]]
+ private final val CLS_SCALA_OPT = classOf[Option[_]]
+ private final val CLS_JAVA_LST = classOf[util.List[_]]
+ private final val CLS_JAVA_OPT = classOf[util.Optional[_]]
+
+ private final val CLS_TOKEN = classOf[NCToken]
+
+ private final val COMP_CLS: Set[Class[_]] = Set(
+ CLS_SCALA_SEQ,
+ CLS_SCALA_LST,
+ CLS_SCALA_OPT,
+ CLS_JAVA_LST,
+ CLS_JAVA_OPT
+ )
+
+ private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
+
+
+ @volatile private var wrappers: ArrayBuffer[NCModelData] = _
@volatile private var modelFactory: NCModelFactory = _
object Config extends NCConfigurable {
@@ -94,7 +128,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
* @return
*/
@throws[NCE]
- private def wrap(mdl: NCModel): NCModelWrapper = {
+ private def wrap(mdl: NCModel): NCModelData = {
val mdlId = mdl.getId
@throws[NCE]
@@ -119,7 +153,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
)
// Scan for intent annotations in the model class.
- val intents = NCIntentScanner.scan(mdl)
+ val intents = scanIntents(mdl)
val parser = new NCMacroParser
@@ -391,16 +425,16 @@ object NCDeployManager extends NCService with
DecorateAsScala {
mdl.getEnabledBuiltInTokens.asScala
)
- deploy.NCModelWrapper(
- proxy = mdl,
+ NCModelData(
+ model = mdl,
solver = solver,
synonyms = mkFastAccessMap(filter(syns, dsl = false)),
synonymsDsl = mkFastAccessMap(filter(syns, dsl = true)),
- addStopWordsStems = addStopWords,
- exclStopWordsStems = exclStopWords,
- suspWordsStems = suspWords,
+ addStopWordsStems = addStopWords.toSet,
+ exclStopWordsStems = exclStopWords.toSet,
+ suspWordsStems = suspWords.toSet,
elements = mdl.getElements.asScala.map(elm ⇒ (elm.getId,
elm)).toMap,
- samples = NCIntentScanner.scanSamples(mdl)
+ samples = scanSamples(mdl)
)
}
@@ -425,7 +459,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
* @param clsName Model class name.
*/
@throws[NCE]
- private def makeModelWrapper(clsName: String): NCModelWrapper =
+ private def makeModelWrapper(clsName: String): NCModelData =
try
wrap(
makeModelFromSource(
@@ -481,7 +515,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
* @param jarFile JAR file to extract from.
*/
@throws[NCE]
- private def extractModels(jarFile: File): Seq[NCModelWrapper] = {
+ private def extractModels(jarFile: File): Seq[NCModelData] = {
val clsLdr = Thread.currentThread().getContextClassLoader
val classes = mutable.ArrayBuffer.empty[Class[_ <: NCModel]]
@@ -521,7 +555,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
@throws[NCE]
override def start(parent: Span = null): NCService =
startScopedSpan("start", parent) { _ ⇒
modelFactory = new NCBasicModelFactory
- wrappers = ArrayBuffer.empty[NCModelWrapper]
+ wrappers = ArrayBuffer.empty[NCModelData]
// Initialize model factory (if configured).
Config.modelFactoryType match {
@@ -555,7 +589,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
// Verify models' identities.
wrappers.foreach(w ⇒ {
- val mdl = w.proxy
+ val mdl = w.model
val mdlName = mdl.getName
val mdlId = mdl.getId
val mdlVer = mdl.getVersion
@@ -584,7 +618,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
throw new NCE(s"Model element ID '${elm.getId}' does not
match '$ID_REGEX' regex in: $mdlId")
})
- if (U.containsDups(wrappers.map(_.proxy.getId).toList))
+ if (U.containsDups(wrappers.map(_.model.getId).toList))
throw new NCE("Duplicate model IDs detected.")
super.start()
@@ -605,7 +639,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
*
* @return
*/
- def getModels: Seq[NCModelWrapper] = wrappers
+ def getModels: Seq[NCModelData] = wrappers
/**
* Permutes and drops duplicated.
@@ -912,4 +946,455 @@ object NCDeployManager extends NCService with
DecorateAsScala {
throw new NCE(s"Element has duplicate synonyms with elements
'$cross' [modelId=$mdlId, elementId=$elemId]")
}
}
+
+
+ /**
+ *
+ * @param cls
+ * @return
+ */
+ private def class2Str(cls: Class[_]): String = if (cls == null) "null"
else s"'${cls.getSimpleName}'"
+
+ /**
+ *
+ * @param m
+ * @return
+ */
+ private def method2Str(m: Method): String = {
+ val cls = m.getDeclaringClass.getSimpleName
+ val name = m.getName
+ val args = m.getParameters.map(_.getType.getSimpleName).mkString(", ")
+
+ s"method '$cls#$name($args)'"
+ }
+
+ /**
+ *
+ * @param m
+ * @param argIdx
+ * @param cxtFirstParam
+ */
+ private def arg2Str(m: Method, argIdx: Int, cxtFirstParam: Boolean):
String =
+ s"argument #${argIdx + (if (cxtFirstParam) 1 else 0)} of
${method2Str(m)}"
+
+ /**
+ *
+ * @param m
+ * @param obj
+ * @param intent
+ */
+ @throws[NCE]
+ private def prepareCallback(m: Method, obj: Any, intent: NCDslIntent):
Callback = {
+ // Checks method result type.
+ if (m.getReturnType != CLS_QRY_RES)
+ throw new NCE(s"@NCIntent error - unexpected result type
${class2Str(m.getReturnType)} for ${method2Str(m)}")
+
+ val allParamTypes = m.getParameterTypes.toSeq
+
+ val ctxFirstParam = allParamTypes.nonEmpty && allParamTypes.head ==
CLS_SLV_CTX
+
+ def getTokensSeq[T](data: Seq[T]): Seq[T] =
+ if (data == null)
+ Seq.empty
+ else if (ctxFirstParam)
+ data.drop(1)
+ else
+ data
+
+ val allAnns = m.getParameterAnnotations
+ val tokParamAnns = getTokensSeq(allAnns).filter(_ != null)
+ val tokParamTypes = getTokensSeq(allParamTypes)
+
+ // Checks tokens parameters annotations count.
+ if (tokParamAnns.length != tokParamTypes.length)
+ throw new NCE(s"@NCIntent error - unexpected annotations count
${tokParamAnns.size} for ${method2Str(m)}")
+
+ // Gets terms identifiers.
+ val termIds =
+ tokParamAnns.zipWithIndex.
+ map { case (anns, idx) ⇒
+ def mkArg: String = arg2Str(m, idx, ctxFirstParam)
+
+ val annsTerms = anns.filter(_.isInstanceOf[NCIntentTerm])
+
+ // Each method arguments (second and later) must have one
NCIntentTerm annotation.
+ annsTerms.length match {
+ case 1 ⇒
annsTerms.head.asInstanceOf[NCIntentTerm].value()
+
+ case 0 ⇒ throw new NCE(s"@NCIntentTerm error - " +
+ s"missed annotation ${class2Str(CLS_TERM)} for
$mkArg")
+ case _ ⇒ throw new NCE(s"@NCIntentTerm error - " +
+ s"too many annotations ${class2Str(CLS_TERM)} for
$mkArg")
+ }
+ }
+
+ val terms = intent.terms.toSeq
+
+ // Checks correctness of term IDs.
+ // Note we don't restrict them to be duplicated.
+ val intentTermIds = terms.filter(_.getId != null).map(_.getId)
+ val invalidIds = termIds.filter(id ⇒ !intentTermIds.contains(id))
+
+ if (invalidIds.nonEmpty)
+ throw new NCE(s"@NCIntentTerm error - invalid term identifiers
'${invalidIds.mkString(", ")}' for ${method2Str(m)}")
+
+ val paramGenTypes = getTokensSeq(m.getGenericParameterTypes)
+
+ require(tokParamTypes.length == paramGenTypes.length)
+
+ // Checks parameters.
+ checkTypes(m, tokParamTypes, paramGenTypes, ctxFirstParam)
+
+ // Checks limits.
+ val allLimits = terms.map(t ⇒ t.getId → (t.getMin, t.getMax)).toMap
+
+ checkMinMax(m, tokParamTypes, termIds.map(allLimits), ctxFirstParam)
+
+ // Prepares invocation method.
+ (ctx: NCIntentMatch) ⇒ {
+ invoke(
+ m,
+ obj,
+ (
+ (if (ctxFirstParam) Seq(ctx)
+ else Seq.empty) ++
+ prepareParams(m, tokParamTypes,
termIds.map(ctx.getTermTokens), ctxFirstParam)
+ ).toArray
+ )
+ }
+ }
+
+ /**
+ *
+ * @param m
+ * @param obj
+ * @param args
+ */
+ @throws[NCE]
+ private def invoke(m: Method, obj: Any, args: Array[AnyRef]): NCResult = {
+ var flag = m.canAccess(obj)
+
+ try {
+ if (!flag) {
+ m.setAccessible(true)
+
+ flag = true
+ }
+ else
+ flag = false
+
+ m.invoke(obj, args: _*).asInstanceOf[NCResult]
+ }
+ catch {
+ case e: InvocationTargetException ⇒
+ e.getTargetException match {
+ case e: NCIntentSkip ⇒ throw e
+ case e: NCRejection ⇒ throw e
+ case e: NCE ⇒ throw e
+ case e: Throwable ⇒ throw new NCE(s"Invocation error in
${method2Str(m)}", e)
+ }
+ case e: Throwable ⇒ throw new NCE(s"Invocation error in
${method2Str(m)}", e)
+ }
+ finally
+ if (flag)
+ try
+ m.setAccessible(false)
+ catch {
+ case e: SecurityException ⇒ throw new NCE(s"Access error
in ${method2Str(m)}", e)
+ }
+ }
+
+ /**
+ *
+ * @param m
+ * @param paramClss
+ * @param argsList
+ * @param ctxFirstParam
+ */
+ @throws[NCE]
+ private def prepareParams(
+ m: Method,
+ paramClss: Seq[Class[_]],
+ argsList: Seq[util.List[NCToken]],
+ ctxFirstParam: Boolean
+ ): Seq[AnyRef] =
+ paramClss.zip(argsList).zipWithIndex.map { case ((paramCls, argList),
i) ⇒
+ def mkArg: String = arg2Str(m, i, ctxFirstParam)
+
+ val toksCnt = argList.size()
+
+ // Single token.
+ if (paramCls == CLS_TOKEN) {
+ if (toksCnt != 1)
+ throw new NCE(s"@NCIntentTerm error - expected single
token, but found $toksCnt for $mkArg")
+
+ argList.get(0)
+ }
+ // Array of tokens.
+ else if (paramCls.isArray)
+ argList.asScala.toArray
+ // Scala and java list of tokens.
+ else if (paramCls == CLS_SCALA_SEQ)
+ argList.asScala
+ else if (paramCls == CLS_SCALA_LST)
+ argList.asScala.toList
+ else if (paramCls == CLS_JAVA_LST)
+ argList
+ // Scala and java optional token.
+ else if (paramCls == CLS_SCALA_OPT)
+ toksCnt match {
+ case 0 ⇒ None
+ case 1 ⇒ Some(argList.get(0))
+ case _ ⇒ throw new NCE(s"@NCIntentTerm error - too many
tokens $toksCnt for option $mkArg")
+ }
+ else if (paramCls == CLS_JAVA_OPT)
+ toksCnt match {
+ case 0 ⇒ util.Optional.empty()
+ case 1 ⇒ util.Optional.of(argList.get(0))
+ case _ ⇒ throw new NCE(s"@NCIntentTerm error - too many
tokens $toksCnt for optional $mkArg")
+ }
+ else
+ // Arguments types already checked.
+ throw new AssertionError(s"Unexpected type $paramCls for
$mkArg")
+ }
+
+ /**
+ *
+ * @param m
+ * @param paramCls
+ * @param paramGenTypes
+ * @param ctxFirstParam
+ */
+ @throws[NCE]
+ private def checkTypes(m: Method, paramCls: Seq[Class[_]], paramGenTypes:
Seq[Type], ctxFirstParam: Boolean): Unit = {
+ require(paramCls.length == paramGenTypes.length)
+
+ paramCls.zip(paramGenTypes).zipWithIndex.foreach { case ((pClass,
pGenType), i) ⇒
+ def mkArg: String = arg2Str(m, i, ctxFirstParam)
+
+ // Token.
+ if (pClass == CLS_TOKEN) {
+ // No-op.
+ }
+ else if (pClass.isArray) {
+ val compType = pClass.getComponentType
+
+ if (compType != CLS_TOKEN)
+ throw new NCE(s"@NCIntentTerm error - unexpected array
element type ${class2Str(compType)} for $mkArg")
+ }
+ // Tokens collection and optionals.
+ else if (COMP_CLS.contains(pClass))
+ pGenType match {
+ case pt: ParameterizedType ⇒
+ val actTypes = pt.getActualTypeArguments
+ val compTypes = if (actTypes == null) Seq.empty else
actTypes.toSeq
+
+ if (compTypes.length != 1)
+ throw new NCE(
+ s"@NCIntentTerm error - unexpected generic
types count ${compTypes.length} for $mkArg"
+ )
+
+ val compType = compTypes.head
+
+ compType match {
+ case _: Class[_] ⇒
+ val genClass =
compTypes.head.asInstanceOf[Class[_]]
+
+ if (genClass != CLS_TOKEN)
+ throw new NCE(
+ s"@NCIntentTerm error - unexpected
generic type ${class2Str(genClass)} for $mkArg"
+ )
+ case _ ⇒
+ throw new NCE(
+ s"@NCIntentTerm error - unexpected generic
type ${compType.getTypeName} for $mkArg"
+ )
+ }
+
+ case _ ⇒ throw new NCE(
+ s"@NCIntentTerm error - unexpected parameter type
${pGenType.getTypeName} for $mkArg"
+ )
+ }
+ // Other types.
+ else
+ throw new NCE(s"@NCIntentTerm error - unexpected parameter
type ${class2Str(pClass)} for $mkArg")
+ }
+ }
+
+ /**
+ *
+ * @param m
+ * @param paramCls
+ * @param limits
+ * @param ctxFirstParam
+ */
+ @throws[NCE]
+ private def checkMinMax(m: Method, paramCls: Seq[Class[_]], limits:
Seq[(Int, Int)], ctxFirstParam: Boolean): Unit = {
+ require(paramCls.length == limits.length)
+
+ paramCls.zip(limits).zipWithIndex.foreach { case ((cls, (min, max)),
i) ⇒
+ def mkArg: String = arg2Str(m, i, ctxFirstParam)
+
+ // Argument is single token but defined as not single token.
+ if (cls == CLS_TOKEN && (min != 1 || max != 1))
+ throw new NCE(s"@NCIntentTerm error - term must have [1,1]
quantifier for $mkArg " +
+ s"because this argument is a single value.")
+ // Argument is not single token but defined as single token.
+ else if (cls != CLS_TOKEN && (min == 1 && max == 1))
+ throw new NCE(s"@NCIntentTerm error - term has [1,1]
quantifier for $mkArg " +
+ s"but this argument is not a single value.")
+ // Argument is optional but defined as not optional.
+ else if ((cls == CLS_SCALA_OPT || cls == CLS_JAVA_OPT) && (min !=
0 || max != 1))
+ throw new NCE(s"@NCIntentTerm error - term must have [0,1]
quantifier for $mkArg " +
+ s"because this argument is optional.")
+ // Argument is not optional but defined as optional.
+ else if ((cls != CLS_SCALA_OPT && cls != CLS_JAVA_OPT) && (min ==
0 && max == 1))
+ throw new NCE(s"@NCIntentTerm error - term has [0,1]
quantifier for $mkArg " +
+ s"but this argument is not optional.")
+ }
+ }
+
+ /**
+ *
+ * @param mdl
+ */
+ @throws[NCE]
+ private def scanIntents(mdl: NCModel): Map[NCDslIntent, Callback] =
+ mdl.getClass.getDeclaredMethods.flatMap(m ⇒ {
+ // Direct in-the-class and referenced intents.
+ val clsArr = m.getAnnotationsByType(CLS_INTENT)
+ val refArr = m.getAnnotationsByType(CLS_INTENT_REF)
+
+ if (clsArr.length > 1 || refArr.length > 1 || (clsArr.nonEmpty &&
refArr.nonEmpty))
+ throw new NCE(s"Only one @NCIntent or @NCIntentRef annotation
is allowed in: ${method2Str(m)}")
+
+ val cls = m.getAnnotation(CLS_INTENT)
+
+ if (cls != null)
+ Some(NCIntentDslCompiler.compile(cls.value(), mdl.getId), m)
+ else {
+ val ref = m.getAnnotation(CLS_INTENT_REF)
+
+ if (ref != null)
+ mdl match {
+ case adapter: NCModelFileAdapter ⇒
+ val refId = ref.value().trim
+
+ val compiledIntents = adapter
+ .getIntents
+ .asScala
+ .map(NCIntentDslCompiler.compile(_, mdl.getId))
+
+ U.getDups(compiledIntents.toSeq.map(_.id)) match {
+ case ids if ids.nonEmpty ⇒ throw new
NCE(s"Duplicate intent IDs found for model from '${adapter.getOrigin}':
${ids.mkString(",")}")
+ case _ ⇒ ()
+ }
+
+ compiledIntents.find(_.id == refId) match {
+ case Some(intent) ⇒ Some(intent, m)
+ case None ⇒ throw new NCE(s"@IntentRef($refId)
references unknown intent ID '$refId' in ${method2Str(m)}.")
+ }
+
+ case _ ⇒ throw new NCE(s"@IntentRef annotation in
${method2Str(m)} can be used only " +
+ s"for models extending 'NCModelFileAdapter'.")
+ }
+ else
+ None
+ }
+ })
+ .map {
+ case (intent, m) ⇒ intent → prepareCallback(m, mdl, intent)
+ }
+ .toMap
+
+ /**
+ * Scans given model for intent samples.
+ *
+ * @param mdl Model to scan.
+ */
+ @throws[NCE]
+ private def scanSamples(mdl: NCModel): Map[String, Seq[String]] = {
+ var annFound = false
+
+ val samples =
+ mdl.getClass.getDeclaredMethods.flatMap(method ⇒ {
+ def mkMethodName: String =
s"${method.getDeclaringClass.getName}#${method.getName}(...)"
+
+ val smpAnn = method.getAnnotation(CLS_SAMPLE)
+ val intAnn = method.getAnnotation(CLS_INTENT)
+ val refAnn = method.getAnnotation(CLS_INTENT_REF)
+
+ if (smpAnn != null || intAnn != null || refAnn != null) {
+ annFound = true
+
+ def mkIntentId(): String =
+ if (intAnn != null)
+ NCIntentDslCompiler.compile(intAnn.value(),
mdl.getId).id
+ else if (refAnn != null)
+ refAnn.value().trim
+ else
+ throw new AssertionError()
+
+ if (smpAnn != null) {
+ if (intAnn == null && refAnn == null) {
+ logger.warn(
+ "@NCTestSample annotation without
corresponding @NCIntent or @NCIntentRef annotations " +
+ s"[modelId=${mdl.getId},
callback=$mkMethodName]")
+
+ None
+ }
+ else {
+ val samples = smpAnn.value().toList
+
+ if (samples.isEmpty) {
+ logger.warn(
+ "@NCTestSample annotation is empty " +
+ s"[modelId=${mdl.getId},
callback=$mkMethodName]"
+ )
+
+ None
+ }
+ else
+ Some(mkIntentId() → samples)
+ }
+ }
+ else {
+ logger.warn(
+ "@NCTestSample annotation is missing " +
+ s"[modelId=${mdl.getId},
callback=$mkMethodName]"
+ )
+
+ None
+ }
+ }
+ else
+ None
+ }).toMap
+
+ if (!annFound)
+ logger.warn(s"No intents found [modelId=${mdl.getId}")
+
+ val parser = new NCMacroParser
+
+ mdl.getMacros.asScala.foreach { case (name, str) ⇒
parser.addMacro(name, str) }
+
+ val allSyns: Set[Seq[String]] =
+ mdl.getElements.
+ asScala.
+ flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
+ map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
+ toSet
+
+ samples.
+ flatMap { case (_, samples) ⇒ samples.map(_.toLowerCase) }.
+ map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒
s.replaceAll(s"\\$ch", s" $ch "))).
+ foreach {
+ case (s, sNorm) ⇒
+ val seq: Seq[String] = sNorm.split("
").map(NCNlpPorterStemmer.stem)
+
+ if (!allSyns.exists(_.intersect(seq).nonEmpty))
+ logger.warn(s"Intent sample doesn't contain any direct
synonyms [modelId=${mdl.getId}, sample=$s]")
+ }
+
+ samples
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCIntentScanner.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCIntentScanner.scala
deleted file mode 100644
index 9cffa81..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCIntentScanner.scala
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.probe.mgrs.deploy
-
-import java.lang.reflect.{InvocationTargetException, Method,
ParameterizedType, Type}
-import java.util
-import java.util.function.Function
-
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.makro.NCMacroParser
-import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
-import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.intent.impl.NCIntentDslCompiler
-import org.apache.nlpcraft.model.intent.utils.NCDslIntent
-
-import scala.collection.JavaConverters._
-import scala.collection._
-
-case class NCIntentSamplesScanResult(samples: Map[String, Seq[String]],
warnings: Seq[String])
-
-/**
- * Scanner for `NCIntent`, `NCIntentRef` and `NCIntentTerm` annotations.
- */
-object NCIntentScanner extends LazyLogging {
- type Callback = Function[NCIntentMatch, NCResult]
-
- private final val CLS_INTENT = classOf[NCIntent]
- private final val CLS_INTENT_REF = classOf[NCIntentRef]
- private final val CLS_TERM = classOf[NCIntentTerm]
- private final val CLS_QRY_RES = classOf[NCResult]
- private final val CLS_SLV_CTX = classOf[NCIntentMatch]
- private final val CLS_SAMPLE = classOf[NCIntentSample]
-
- // Java and scala lists.
- private final val CLS_SCALA_SEQ = classOf[Seq[_]]
- private final val CLS_SCALA_LST = classOf[List[_]]
- private final val CLS_SCALA_OPT = classOf[Option[_]]
- private final val CLS_JAVA_LST = classOf[util.List[_]]
- private final val CLS_JAVA_OPT = classOf[util.Optional[_]]
-
- private final val CLS_TOKEN = classOf[NCToken]
-
- private final val COMP_CLS: Set[Class[_]] = Set(
- CLS_SCALA_SEQ,
- CLS_SCALA_LST,
- CLS_SCALA_OPT,
- CLS_JAVA_LST,
- CLS_JAVA_OPT
- )
-
- private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
-
- /**
- *
- * @param cls
- * @return
- */
- private def class2Str(cls: Class[_]): String = if (cls == null) "null"
else s"'${cls.getSimpleName}'"
-
- /**
- *
- * @param m
- * @return
- */
- private def method2Str(m: Method): String = {
- val cls = m.getDeclaringClass.getSimpleName
- val name = m.getName
- val args = m.getParameters.map(_.getType.getSimpleName).mkString(", ")
-
- s"method '$cls#$name($args)'"
- }
-
- /**
- *
- * @param m
- * @param argIdx
- * @param cxtFirstParam
- */
- private def arg2Str(m: Method, argIdx: Int, cxtFirstParam: Boolean):
String =
- s"argument #${argIdx + (if (cxtFirstParam) 1 else 0)} of
${method2Str(m)}"
-
- /**
- *
- * @param m
- * @param obj
- * @param intent
- */
- @throws[NCE]
- private def prepareCallback(m: Method, obj: Any, intent: NCDslIntent):
Callback = {
- // Checks method result type.
- if (m.getReturnType != CLS_QRY_RES)
- throw new NCE(s"@NCIntent error - unexpected result type
${class2Str(m.getReturnType)} for ${method2Str(m)}")
-
- val allParamTypes = m.getParameterTypes.toSeq
-
- val ctxFirstParam = allParamTypes.nonEmpty && allParamTypes.head ==
CLS_SLV_CTX
-
- def getTokensSeq[T](data: Seq[T]): Seq[T] =
- if (data == null)
- Seq.empty
- else if (ctxFirstParam)
- data.drop(1)
- else
- data
-
- val allAnns = m.getParameterAnnotations
- val tokParamAnns = getTokensSeq(allAnns).filter(_ != null)
- val tokParamTypes = getTokensSeq(allParamTypes)
-
- // Checks tokens parameters annotations count.
- if (tokParamAnns.length != tokParamTypes.length)
- throw new NCE(s"@NCIntent error - unexpected annotations count
${tokParamAnns.size} for ${method2Str(m)}")
-
- // Gets terms identifiers.
- val termIds =
- tokParamAnns.zipWithIndex.
- map { case (anns, idx) ⇒
- def mkArg: String = arg2Str(m, idx, ctxFirstParam)
-
- val annsTerms = anns.filter(_.isInstanceOf[NCIntentTerm])
-
- // Each method arguments (second and later) must have one
NCIntentTerm annotation.
- annsTerms.length match {
- case 1 ⇒
annsTerms.head.asInstanceOf[NCIntentTerm].value()
-
- case 0 ⇒ throw new NCE(s"@NCIntentTerm error - " +
- s"missed annotation ${class2Str(CLS_TERM)} for
$mkArg")
- case _ ⇒ throw new NCE(s"@NCIntentTerm error - " +
- s"too many annotations ${class2Str(CLS_TERM)} for
$mkArg")
- }
- }
-
- val terms = intent.terms.toSeq
-
- // Checks correctness of term IDs.
- // Note we don't restrict them to be duplicated.
- val intentTermIds = terms.filter(_.getId != null).map(_.getId)
- val invalidIds = termIds.filter(id ⇒ !intentTermIds.contains(id))
-
- if (invalidIds.nonEmpty)
- throw new NCE(s"@NCIntentTerm error - invalid term identifiers
'${invalidIds.mkString(", ")}' for ${method2Str(m)}")
-
- val paramGenTypes = getTokensSeq(m.getGenericParameterTypes)
-
- require(tokParamTypes.length == paramGenTypes.length)
-
- // Checks parameters.
- checkTypes(m, tokParamTypes, paramGenTypes, ctxFirstParam)
-
- // Checks limits.
- val allLimits = terms.map(t ⇒ t.getId → (t.getMin, t.getMax)).toMap
-
- checkMinMax(m, tokParamTypes, termIds.map(allLimits), ctxFirstParam)
-
- // Prepares invocation method.
- (ctx: NCIntentMatch) ⇒ {
- invoke(
- m,
- obj,
- (
- (if (ctxFirstParam) Seq(ctx)
- else Seq.empty) ++
- prepareParams(m, tokParamTypes,
termIds.map(ctx.getTermTokens), ctxFirstParam)
- ).toArray
- )
- }
- }
-
- /**
- *
- * @param m
- * @param obj
- * @param args
- */
- @throws[NCE]
- private def invoke(m: Method, obj: Any, args: Array[AnyRef]): NCResult = {
- var flag = m.canAccess(obj)
-
- try {
- if (!flag) {
- m.setAccessible(true)
-
- flag = true
- }
- else
- flag = false
-
- m.invoke(obj, args: _*).asInstanceOf[NCResult]
- }
- catch {
- case e: InvocationTargetException ⇒
- e.getTargetException match {
- case e: NCIntentSkip ⇒ throw e
- case e: NCRejection ⇒ throw e
- case e: NCE ⇒ throw e
- case e: Throwable ⇒ throw new NCE(s"Invocation error in
${method2Str(m)}", e)
- }
- case e: Throwable ⇒ throw new NCE(s"Invocation error in
${method2Str(m)}", e)
- }
- finally
- if (flag)
- try
- m.setAccessible(false)
- catch {
- case e: SecurityException ⇒ throw new NCE(s"Access error
in ${method2Str(m)}", e)
- }
- }
-
- /**
- *
- * @param m
- * @param paramClss
- * @param argsList
- * @param ctxFirstParam
- */
- @throws[NCE]
- private def prepareParams(
- m: Method,
- paramClss: Seq[Class[_]],
- argsList: Seq[util.List[NCToken]],
- ctxFirstParam: Boolean
- ): Seq[AnyRef] =
- paramClss.zip(argsList).zipWithIndex.map { case ((paramCls, argList),
i) ⇒
- def mkArg: String = arg2Str(m, i, ctxFirstParam)
-
- val toksCnt = argList.size()
-
- // Single token.
- if (paramCls == CLS_TOKEN) {
- if (toksCnt != 1)
- throw new NCE(s"@NCIntentTerm error - expected single
token, but found $toksCnt for $mkArg")
-
- argList.get(0)
- }
- // Array of tokens.
- else if (paramCls.isArray)
- argList.asScala.toArray
- // Scala and java list of tokens.
- else if (paramCls == CLS_SCALA_SEQ)
- argList.asScala
- else if (paramCls == CLS_SCALA_LST)
- argList.asScala.toList
- else if (paramCls == CLS_JAVA_LST)
- argList
- // Scala and java optional token.
- else if (paramCls == CLS_SCALA_OPT)
- toksCnt match {
- case 0 ⇒ None
- case 1 ⇒ Some(argList.get(0))
- case _ ⇒ throw new NCE(s"@NCIntentTerm error - too many
tokens $toksCnt for option $mkArg")
- }
- else if (paramCls == CLS_JAVA_OPT)
- toksCnt match {
- case 0 ⇒ util.Optional.empty()
- case 1 ⇒ util.Optional.of(argList.get(0))
- case _ ⇒ throw new NCE(s"@NCIntentTerm error - too many
tokens $toksCnt for optional $mkArg")
- }
- else
- // Arguments types already checked.
- throw new AssertionError(s"Unexpected type $paramCls for
$mkArg")
- }
-
- /**
- *
- * @param m
- * @param paramCls
- * @param paramGenTypes
- * @param ctxFirstParam
- */
- @throws[NCE]
- private def checkTypes(m: Method, paramCls: Seq[Class[_]], paramGenTypes:
Seq[Type], ctxFirstParam: Boolean): Unit = {
- require(paramCls.length == paramGenTypes.length)
-
- paramCls.zip(paramGenTypes).zipWithIndex.foreach { case ((pClass,
pGenType), i) ⇒
- def mkArg: String = arg2Str(m, i, ctxFirstParam)
-
- // Token.
- if (pClass == CLS_TOKEN) {
- // No-op.
- }
- else if (pClass.isArray) {
- val compType = pClass.getComponentType
-
- if (compType != CLS_TOKEN)
- throw new NCE(s"@NCIntentTerm error - unexpected array
element type ${class2Str(compType)} for $mkArg")
- }
- // Tokens collection and optionals.
- else if (COMP_CLS.contains(pClass))
- pGenType match {
- case pt: ParameterizedType ⇒
- val actTypes = pt.getActualTypeArguments
- val compTypes = if (actTypes == null) Seq.empty else
actTypes.toSeq
-
- if (compTypes.length != 1)
- throw new NCE(
- s"@NCIntentTerm error - unexpected generic
types count ${compTypes.length} for $mkArg"
- )
-
- val compType = compTypes.head
-
- compType match {
- case _: Class[_] ⇒
- val genClass =
compTypes.head.asInstanceOf[Class[_]]
-
- if (genClass != CLS_TOKEN)
- throw new NCE(
- s"@NCIntentTerm error - unexpected
generic type ${class2Str(genClass)} for $mkArg"
- )
- case _ ⇒
- throw new NCE(
- s"@NCIntentTerm error - unexpected generic
type ${compType.getTypeName} for $mkArg"
- )
- }
-
- case _ ⇒ throw new NCE(
- s"@NCIntentTerm error - unexpected parameter type
${pGenType.getTypeName} for $mkArg"
- )
- }
- // Other types.
- else
- throw new NCE(s"@NCIntentTerm error - unexpected parameter
type ${class2Str(pClass)} for $mkArg")
- }
- }
-
- /**
- *
- * @param m
- * @param paramCls
- * @param limits
- * @param ctxFirstParam
- */
- @throws[NCE]
- private def checkMinMax(m: Method, paramCls: Seq[Class[_]], limits:
Seq[(Int, Int)], ctxFirstParam: Boolean): Unit = {
- require(paramCls.length == limits.length)
-
- paramCls.zip(limits).zipWithIndex.foreach { case ((cls, (min, max)),
i) ⇒
- def mkArg: String = arg2Str(m, i, ctxFirstParam)
-
- // Argument is single token but defined as not single token.
- if (cls == CLS_TOKEN && (min != 1 || max != 1))
- throw new NCE(s"@NCIntentTerm error - term must have [1,1]
quantifier for $mkArg " +
- s"because this argument is a single value.")
- // Argument is not single token but defined as single token.
- else if (cls != CLS_TOKEN && (min == 1 && max == 1))
- throw new NCE(s"@NCIntentTerm error - term has [1,1]
quantifier for $mkArg " +
- s"but this argument is not a single value.")
- // Argument is optional but defined as not optional.
- else if ((cls == CLS_SCALA_OPT || cls == CLS_JAVA_OPT) && (min !=
0 || max != 1))
- throw new NCE(s"@NCIntentTerm error - term must have [0,1]
quantifier for $mkArg " +
- s"because this argument is optional.")
- // Argument is not optional but defined as optional.
- else if ((cls != CLS_SCALA_OPT && cls != CLS_JAVA_OPT) && (min ==
0 && max == 1))
- throw new NCE(s"@NCIntentTerm error - term has [0,1]
quantifier for $mkArg " +
- s"but this argument is not optional.")
- }
- }
-
- /**
- *
- * @param mdl
- */
- @throws[NCE]
- def scan(mdl: NCModel): Map[NCDslIntent, Callback] =
- mdl.getClass.getDeclaredMethods.flatMap(m ⇒ {
- // Direct in-the-class and referenced intents.
- val clsArr = m.getAnnotationsByType(CLS_INTENT)
- val refArr = m.getAnnotationsByType(CLS_INTENT_REF)
-
- if (clsArr.length > 1 || refArr.length > 1 || (clsArr.nonEmpty &&
refArr.nonEmpty))
- throw new NCE(s"Only one @NCIntent or @NCIntentRef annotation
is allowed in: ${method2Str(m)}")
-
- val cls = m.getAnnotation(CLS_INTENT)
-
- if (cls != null)
- Some(NCIntentDslCompiler.compile(cls.value(), mdl.getId), m)
- else {
- val ref = m.getAnnotation(CLS_INTENT_REF)
-
- if (ref != null)
- mdl match {
- case adapter: NCModelFileAdapter ⇒
- val refId = ref.value().trim
-
- val compiledIntents = adapter
- .getIntents
- .asScala
- .map(NCIntentDslCompiler.compile(_, mdl.getId))
-
- U.getDups(compiledIntents.toSeq.map(_.id)) match {
- case ids if ids.nonEmpty ⇒ throw new
NCE(s"Duplicate intent IDs found for model from '${adapter.getOrigin}':
${ids.mkString(",")}")
- case _ ⇒ ()
- }
-
- compiledIntents.find(_.id == refId) match {
- case Some(intent) ⇒ Some(intent, m)
- case None ⇒ throw new NCE(s"@IntentRef($refId)
references unknown intent ID '$refId' in ${method2Str(m)}.")
- }
-
- case _ ⇒ throw new NCE(s"@IntentRef annotation in
${method2Str(m)} can be used only " +
- s"for models extending 'NCModelFileAdapter'.")
- }
- else
- None
- }
- })
- .map {
- case (intent, m) ⇒ intent → prepareCallback(m, mdl, intent)
- }
- .toMap
-
- /**
- * Scans given model for intent samples.
- *
- * @param mdl Model to scan.
- */
- @throws[NCE]
- def scanSamples(mdl: NCModel): Map[String, Seq[String]] = {
- var annFound = false
-
- val samples =
- mdl.getClass.getDeclaredMethods.flatMap(method ⇒ {
- def mkMethodName: String =
s"${method.getDeclaringClass.getName}#${method.getName}(...)"
-
- val smpAnn = method.getAnnotation(CLS_SAMPLE)
- val intAnn = method.getAnnotation(CLS_INTENT)
- val refAnn = method.getAnnotation(CLS_INTENT_REF)
-
- if (smpAnn != null || intAnn != null || refAnn != null) {
- annFound = true
-
- def mkIntentId(): String =
- if (intAnn != null)
- NCIntentDslCompiler.compile(intAnn.value(),
mdl.getId).id
- else if (refAnn != null)
- refAnn.value().trim
- else
- throw new AssertionError()
-
- if (smpAnn != null) {
- if (intAnn == null && refAnn == null) {
- logger.warn(
- "@NCTestSample annotation without
corresponding @NCIntent or @NCIntentRef annotations " +
- s"[modelId=${mdl.getId},
callback=$mkMethodName]")
-
- None
- }
- else {
- val samples = smpAnn.value().toList
-
- if (samples.isEmpty) {
- logger.warn(
- "@NCTestSample annotation is empty " +
- s"[modelId=${mdl.getId},
callback=$mkMethodName]"
- )
-
- None
- }
- else
- Some(mkIntentId() → samples)
- }
- }
- else {
- logger.warn(
- "@NCTestSample annotation is missing " +
- s"[modelId=${mdl.getId}, callback=$mkMethodName]"
- )
-
- None
- }
- }
- else
- None
- }).toMap
-
- if (!annFound)
- logger.warn(s"No intents found [modelId=${mdl.getId}")
-
- val parser = new NCMacroParser
-
- mdl.getMacros.asScala.foreach { case (name, str) ⇒
parser.addMacro(name, str) }
-
- val allSyns: Set[Seq[String]] =
- mdl.getElements.
- asScala.
- flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
- map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
- toSet
-
- samples.
- flatMap { case (_, samples) ⇒ samples.map(_.toLowerCase) }.
- map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒
s.replaceAll(s"\\$ch", s" $ch "))).
- foreach {
- case (s, sNorm) ⇒
- val seq: Seq[String] = sNorm.split("
").map(NCNlpPorterStemmer.stem)
-
- if (!allSyns.exists(_.intersect(seq).nonEmpty))
- logger.warn(s"Intent sample doesn't contain any direct
synonyms [modelId=${mdl.getId}, sample=$s]")
- }
-
- samples
- }
-}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 06130e7..106e822 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -21,7 +21,8 @@ import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.deploy.{NCModelWrapper, _}
+import org.apache.nlpcraft.probe.mgrs.deploy._
+import org.apache.nlpcraft.probe.mgrs.nlp.NCModelData
import scala.collection.convert.DecorateAsScala
import scala.util.control.Exception._
@@ -31,7 +32,7 @@ import scala.util.control.Exception._
*/
object NCModelManager extends NCService with DecorateAsScala {
// Deployed models keyed by their IDs.
- @volatile private var wrappers: Map[String, NCModelWrapper] = _
+ @volatile private var data: Map[String, NCModelData] = _
// Access mutex.
private final val mux = new Object()
@@ -41,14 +42,14 @@ object NCModelManager extends NCService with
DecorateAsScala {
val tbl = NCAsciiTable("Model ID", "Name", "Ver.", "Elements",
"Synonyms")
mux.synchronized {
- wrappers = NCDeployManager.getModels.map(w ⇒ {
- w.proxy.onInit()
+ data = NCDeployManager.getModels.map(w ⇒ {
+ w.model.onInit()
- w.proxy.getId → w
+ w.model.getId → w
}).toMap
- wrappers.values.foreach(w ⇒ {
- val mdl = w.proxy
+ data.values.foreach(w ⇒ {
+ val mdl = w.model
val synCnt = w.synonyms.values.flatMap(_.values).flatten.size
@@ -62,11 +63,11 @@ object NCModelManager extends NCService with
DecorateAsScala {
})
}
- tbl.info(logger, Some(s"Models deployed: ${wrappers.size}\n"))
+ tbl.info(logger, Some(s"Models deployed: ${data.size}\n"))
addTags(
span,
- "deployedModels" → wrappers.values.map(_.proxy.getId).mkString(",")
+ "deployedModels" → data.values.map(_.model.getId).mkString(",")
)
super.start()
@@ -92,8 +93,8 @@ object NCModelManager extends NCService with DecorateAsScala {
*/
override def stop(parent: Span = null): Unit = startScopedSpan("stop",
parent) { _ ⇒
mux.synchronized {
- if (wrappers != null)
- wrappers.values.foreach(m ⇒ discardModel(m.proxy))
+ if (data != null)
+ data.values.foreach(m ⇒ discardModel(m.model))
}
super.stop()
@@ -103,11 +104,9 @@ object NCModelManager extends NCService with
DecorateAsScala {
*
* @return
*/
- def getAllModelWrappers(parent: Span = null): List[NCModelWrapper] =
+ def getAllModelsData(parent: Span = null): List[NCModelData] =
startScopedSpan("getAllModels", parent) { _ ⇒
- mux.synchronized {
- wrappers.values.toList
- }
+ mux.synchronized { data.values.toList }
}
/**
@@ -115,10 +114,8 @@ object NCModelManager extends NCService with
DecorateAsScala {
* @param mdlId Model ID.
* @return
*/
- def getModelWrapper(mdlId: String, parent: Span = null):
Option[NCModelWrapper] =
+ def getModelData(mdlId: String, parent: Span = null): NCModelData =
startScopedSpan("getModel", parent, "modelId" → mdlId) { _ ⇒
- mux.synchronized {
- wrappers.get(mdlId)
- }
+ mux.synchronized { data.getOrElse(mdlId, throw new NCE(s"Model not
found: $mdlId")) }
}
}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCModelData.scala
similarity index 96%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCModelData.scala
index e50a347..22706df 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCModelData.scala
@@ -1,4 +1,4 @@
-package org.apache.nlpcraft.probe.mgrs.deploy
+package org.apache.nlpcraft.probe.mgrs.nlp
import java.io.Serializable
import java.util
@@ -15,7 +15,7 @@ import scala.collection.{Map, Seq, mutable}
/**
*
- * @param proxy
+ * @param model
* @param solver
* @param synonyms
* @param synonymsDsl
@@ -24,8 +24,8 @@ import scala.collection.{Map, Seq, mutable}
* @param suspWordsStems
* @param elements
*/
-case class NCModelWrapper(
- proxy: NCModel,
+case class NCModelData(
+ model: NCModel,
solver: NCIntentSolver,
synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
Seq[NCSynonym]]], // Fast access map.
synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
Seq[NCSynonym]]], // Fast access map.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
index ede7298..09e06cc 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
@@ -23,7 +23,6 @@ import com.typesafe.scalalogging.LazyLogging
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.nlp._
import org.apache.nlpcraft.common.{NCService, _}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
import scala.collection.Map
import scala.language.implicitConversions
@@ -36,11 +35,11 @@ abstract class NCProbeEnricher extends NCService with
LazyLogging {
*
* Processes this NLP sentence.
*
- * @param mdl Model decorator.
+ * @param mdlData Model decorator.
* @param ns NLP sentence to enrich.
* @param senMeta Sentence metadata.
* @param parent Span parent.
*/
@throws[NCE]
- def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta: Map[String,
Serializable], parent: Span): Unit
+ def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta: Map[String,
Serializable], parent: Span): Unit
}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 8acbb5a..f30e729 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -315,14 +315,14 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
logger.info(s"REJECT response $msgName sent
[srvReqId=$srvReqId, response=${errMsg.get}]")
}
- val w = NCModelManager.getModelWrapper(mdlId, span).getOrElse(throw
new NCE(s"Model not found: $mdlId"))
+ val mdlData = NCModelManager.getModelData(mdlId, span)
var errData: Option[(String, Int)] = None
val validNlpSens =
nlpSens.flatMap(nlpSen ⇒
try {
- NCValidateManager.preValidate(w, nlpSen, span)
+ NCValidateManager.preValidate(mdlData, nlpSen, span)
Some(nlpSen)
}
@@ -359,14 +359,14 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
val sensSeq = validNlpSens.flatMap(nlpSen ⇒ {
// Independent of references.
- NCDictionaryEnricher.enrich(w, nlpSen, senMeta, span)
- NCSuspiciousNounsEnricher.enrich(w, nlpSen, senMeta, span)
- NCStopWordEnricher.enrich(w, nlpSen, senMeta, span)
+ NCDictionaryEnricher.enrich(mdlData, nlpSen, senMeta, span)
+ NCSuspiciousNounsEnricher.enrich(mdlData, nlpSen, senMeta, span)
+ NCStopWordEnricher.enrich(mdlData, nlpSen, senMeta, span)
case class Holder(enricher: NCProbeEnricher, getNotes: () ⇒
Seq[NCNlpSentenceNote])
def get(name: String, e: NCProbeEnricher): Option[Holder] =
- if (w.proxy.getEnabledBuiltInTokens.contains(name))
+ if (mdlData.model.getEnabledBuiltInTokens.contains(name))
Some(Holder(e, () ⇒ nlpSen.flatten.filter(_.noteType ==
name)))
else
None
@@ -392,7 +392,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
def get(): Seq[NCNlpSentenceNote] = h.getNotes().sortBy(p
⇒ (p.tokenIndexes.head, p.noteType))
val notes1 = get()
- h → h.enricher.enrich(w, nlpSen, senMeta, span)
+ h → h.enricher.enrich(mdlData, nlpSen, senMeta, span)
val notes2 = get()
@@ -432,7 +432,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
}).toMap
// Loop has sense if model is complex (has user defined
parsers or DSL based synonyms)
- continue = NCModelEnricher.isComplex(w) && res.exists { case
(_, same) ⇒ !same }
+ continue = NCModelEnricher.isComplex(mdlData) && res.exists {
case (_, same) ⇒ !same }
if (DEEP_DEBUG)
if (continue) {
@@ -462,7 +462,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
// Final validation before execution.
try
- sensSeq.foreach(NCValidateManager.postValidate(w, _, span))
+ sensSeq.foreach(NCValidateManager.postValidate(mdlData, _, span))
catch {
case e: NCValidateException ⇒
val (errMsg, errCode) = getError(e.code)
@@ -485,13 +485,13 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
val meta = mutable.HashMap.empty[String, Any] ++ senMeta
val req = NCRequestImpl(meta, srvReqId)
- var senVars = w.makeVariants(srvReqId, sensSeq)
+ var senVars = mdlData.makeVariants(srvReqId, sensSeq)
// Sentence variants can be filtered by model.
val fltSenVars: Seq[(NCVariant, Int)] =
senVars.
zipWithIndex.
- flatMap { case (variant, i) ⇒ if
(w.proxy.onParsedVariant(variant)) Some(variant, i) else None }
+ flatMap { case (variant, i) ⇒ if
(mdlData.model.onParsedVariant(variant)) Some(variant, i) else None }
senVars = fltSenVars.map(_._1)
val allVars = senVars.flatMap(_.asScala)
@@ -526,7 +526,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
// Create model query context.
val ctx: NCContext = new NCContext {
override lazy val getRequest: NCRequest = req
- override lazy val getModel: NCModel = w.proxy
+ override lazy val getModel: NCModel = mdlData.model
override lazy val getServerRequestId: String = srvReqId
override lazy val getConversation: NCConversation = new
NCConversation {
@@ -544,7 +544,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
logKey = U.mkLogHolderKey(srvReqId)
- val meta = w.proxy.getMetadata
+ val meta = mdlData.model.getMetadata
meta.synchronized {
meta.put(logKey, logHldr)
@@ -570,7 +570,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
def onFinish(): Unit = {
if (logKey != null)
- w.proxy.getMetadata.remove(logKey)
+ mdlData.model.getMetadata.remove(logKey)
span.end()
}
@@ -580,16 +580,16 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
// Execute model query asynchronously.
U.asFuture(
_ ⇒ {
- var res = w.proxy.onContext(ctx)
+ var res = mdlData.model.onContext(ctx)
start = System.currentTimeMillis()
- if (res == null && w.solver != null)
+ if (res == null && mdlData.solver != null)
startScopedSpan("intentMatching", span) { _ ⇒
- res = w.solver.solve(solverIn, span)
+ res = mdlData.solver.solve(solverIn, span)
}
- if (res == null && w.solver == null)
+ if (res == null && mdlData.solver == null)
throw new IllegalStateException("No intents and no results
from model callbacks.")
recordStats(M_USER_LATENCY_MS → (System.currentTimeMillis() -
start))
@@ -623,7 +623,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
if (e.getCause != null)
logger.info(s"Rejection cause:", e.getCause)
- val res = w.proxy.onRejection(solverIn.intentMatch, e)
+ val res =
mdlData.model.onRejection(solverIn.intentMatch, e)
if (res != null)
respondWithResult(res, None)
@@ -652,7 +652,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
logger.error(s"Unexpected error for server request ID:
$srvReqId", e)
- val res = w.proxy.onError(ctx, e)
+ val res = mdlData.model.onError(ctx, e)
if (res != null)
respondWithResult(res, None)
@@ -678,7 +678,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
"resBody" → res.getBody
)
- val res0 = w.proxy.onResult(solverIn.intentMatch, res)
+ val res0 = mdlData.model.onResult(solverIn.intentMatch,
res)
respondWithResult(if (res0 != null) res0 else res, if
(logHldr != null) Some(logHldr.toJson) else None)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
index bf49bf7..8619b91 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
@@ -24,8 +24,7 @@ import org.apache.nlpcraft.common.nlp._
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.dict._
import org.apache.nlpcraft.common.{NCService, _}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.collection.Map
@@ -54,10 +53,10 @@ object NCDictionaryEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → mdl.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
ns.foreach(t ⇒ {
// Dictionary.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 5c3d71e..e9c1ff0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -25,8 +25,7 @@ import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.numeric.{NCNumeric, NCNumericManager}
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote,
NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.collection.JavaConverters._
import scala.collection.{Map, Seq, mutable}
@@ -236,10 +235,10 @@ object NCLimitEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → mdl.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
val notes = mutable.HashSet.empty[NCNlpSentenceNote]
val numsMap =
NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 4551bc0..f9174b0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -24,10 +24,9 @@ import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, _}
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
import org.apache.nlpcraft.probe.mgrs.NCSynonym
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
import scala.collection.JavaConverters._
import scala.collection.convert.DecorateAsScala
@@ -298,15 +297,15 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
*/
private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String):
Boolean = toks.forall(_.isTypeOf(elemId))
- def isComplex(mdl: NCModelWrapper): Boolean = mdl.synonymsDsl.nonEmpty ||
!mdl.proxy.getParsers.isEmpty
+ def isComplex(mdl: NCModelData): Boolean = mdl.synonymsDsl.nonEmpty ||
!mdl.model.getParsers.isEmpty
@throws[NCE]
- override def enrich(w: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → w.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { span ⇒
- val jiggleFactor = w.proxy.getJiggleFactor
+ val jiggleFactor = mdlData.model.getJiggleFactor
val cache = mutable.HashSet.empty[Seq[Int]]
val matches = ArrayBuffer.empty[ElementMatch]
@@ -353,7 +352,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
var seq: Seq[Seq[Complex]] = null
// Attempt to match each element.
- for (elm ← w.elements.values if !alreadyMarked(toks,
elm.getId)) {
+ for (elm ← mdlData.elements.values if
!alreadyMarked(toks, elm.getId)) {
var found = false
def addMatch(
@@ -366,21 +365,21 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
}
// Optimization - plain synonyms can be used only
on first iteration
- if (w.synonyms.nonEmpty && !ns.exists(_.isUser))
- for (syn ← fastAccess(w.synonyms, elm.getId,
toks.length) if !found)
+ if (mdlData.synonyms.nonEmpty &&
!ns.exists(_.isUser))
+ for (syn ← fastAccess(mdlData.synonyms,
elm.getId, toks.length) if !found)
if (syn.isMatch(toks))
addMatch(elm, toks, syn, Seq.empty)
- if (w.synonymsDsl.nonEmpty) {
+ if (mdlData.synonymsDsl.nonEmpty) {
found = false
if (collapsedSens == null)
- collapsedSens =
w.makeVariants(ns.srvReqId, ns.clone().collapse()).map(_.asScala)
+ collapsedSens =
mdlData.makeVariants(ns.srvReqId, ns.clone().collapse()).map(_.asScala)
if (seq == null)
seq = convert(ns, collapsedSens, toks)
- for (comb ← seq; syn ←
fastAccess(w.synonymsDsl, elm.getId, comb.length) if !found)
+ for (comb ← seq; syn ←
fastAccess(mdlData.synonymsDsl, elm.getId, comb.length) if !found)
if (syn.isMatch(comb.map(_.data)))
addMatch(elm, toks, syn,
comb.filter(_.isToken).map(_.token))
}
@@ -393,7 +392,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
startScopedSpan("jiggleProc", span,
"srvReqId" → ns.srvReqId,
- "modelId" → w.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
// Iterate over depth-limited permutations of the original
sentence with and without stopwords.
jiggle(ns, jiggleFactor).foreach(procPerm)
@@ -414,7 +413,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
for ((m, idx) ← matches.zipWithIndex) {
if (DEEP_DEBUG)
logger.trace(
- s"Model '${w.proxy.getId}' element found (${idx + 1}
of $matchCnt) [" +
+ s"Model '${mdlData.model.getId}' element found (${idx
+ 1} of $matchCnt) [" +
s"elementId=${m.element.getId}, " +
s"synonym=${m.synonym}, " +
s"tokens=${tokString(m.tokens)}" +
@@ -430,14 +429,14 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
mark(ns, elem = elm, toks = m.tokens, direct = direct, syn =
Some(syn), metaOpt = None, parts = m.parts)
}
- val parsers = w.proxy.getParsers
+ val parsers = mdlData.model.getParsers
for (parser ← parsers.asScala) {
parser.onInit()
startScopedSpan("customParser", span,
"srvReqId" → ns.srvReqId,
- "modelId" → w.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
def to(t: NCNlpSentenceToken): NCCustomWord =
new NCCustomWord {
@@ -459,7 +458,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val res = parser.parse(
NCRequestImpl(senMeta, ns.srvReqId),
- w.proxy,
+ mdlData.model,
ns.map(to).asJava,
ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
val noteId = n.noteType
@@ -495,7 +494,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
if (!alreadyMarked(matchedToks, elemId))
mark(
ns,
- elem = w.elements.getOrElse(elemId, throw
new NCE(s"Custom model parser returned unknown element ID: $elemId")),
+ elem = mdlData.elements.getOrElse(elemId,
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
toks = matchedToks,
direct = true,
syn = None,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 4bd2b03..b5ade6c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -24,8 +24,7 @@ import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote,
NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.collection.JavaConverters._
import scala.collection.{Map, Seq, mutable}
@@ -138,10 +137,10 @@ object NCRelationEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → mdl.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
// Tries to grab tokens direct way.
// Example: A, B, C ⇒ ABC, AB, BC .. (AB will be processed first)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index e4cb56f..c0e65e5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -24,8 +24,7 @@ import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote,
NCNlpSentenceToken}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
@@ -415,10 +414,10 @@ object NCSortEnricher extends NCProbeEnricher {
toks.length == toks2.length || toks.count(isImportant) ==
toks2.count(isImportant)
}
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, meta:
Map[String, Serializable], parent: Span): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, meta:
Map[String, Serializable], parent: Span): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → mdl.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
val notes = mutable.HashSet.empty[NCNlpSentenceNote]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index e293e73..cd2a434 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -23,8 +23,7 @@ import io.opencensus.trace.Span
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService, U}
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.annotation.tailrec
import scala.collection.{Map, Seq}
@@ -176,12 +175,12 @@ object NCStopWordEnricher extends NCProbeEnricher {
/**
* Marks as stopwords, words with POS from configured list, which also
placed before another stop words.
*/
- private def processCommonStops(mdl: NCModelWrapper, ns: NCNlpSentence):
Unit = {
+ private def processCommonStops(mdl: NCModelData, ns: NCNlpSentence): Unit
= {
/**
* Marks as stopwords, words with POS from configured list, which
also placed before another stop words.
*/
@tailrec
- def processCommonStops0(mdl: NCModelWrapper, ns: NCNlpSentence): Unit
= {
+ def processCommonStops0(mdl: NCModelData, ns: NCNlpSentence): Unit = {
val max = ns.size - 1
var stop = true
@@ -206,18 +205,19 @@ object NCStopWordEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
def mark(stems: Set[String], f: Boolean): Unit =
ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒
ns.fixNote(t.getNlpNote, "stopWord" → f))
- startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "modelId"
→ mdl.proxy.getId, "txt" → ns.text) { _ ⇒
-
- mark(mdl.exclStopWordsStems, f = false)
- mark(mdl.addStopWordsStems, f = true)
+ startScopedSpan(
+ "enrich", parent, "srvReqId" → ns.srvReqId, "modelId" →
mdlData.model.getId, "txt" → ns.text
+ ) { _ ⇒
+ mark(mdlData.exclStopWordsStems, f = false)
+ mark(mdlData.addStopWordsStems, f = true)
processGeo(ns)
processDate(ns)
processNums(ns)
- processCommonStops(mdl, ns)
+ processCommonStops(mdlData, ns)
}
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
index 5d234db..cf6c742 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
@@ -22,8 +22,7 @@ import java.io.Serializable
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.{NCE, NCService}
import org.apache.nlpcraft.common.nlp._
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
-import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
+import org.apache.nlpcraft.probe.mgrs.nlp.{NCModelData, NCProbeEnricher}
import scala.collection.Map
@@ -40,11 +39,11 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelWrapper, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdlData: NCModelData, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
- "modelId" → mdl.proxy.getId,
+ "modelId" → mdlData.model.getId,
"txt" → ns.text) { _ ⇒
- ns.filter(t ⇒ mdl.suspWordsStems.contains(t.stem)).foreach(t ⇒
ns.fixNote(t.getNlpNote, "suspNoun" → true))
+ ns.filter(t ⇒ mdlData.suspWordsStems.contains(t.stem)).foreach(t ⇒
ns.fixNote(t.getNlpNote, "suspNoun" → true))
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/validate/NCValidateManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/validate/NCValidateManager.scala
index 0412247..fe1b074 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/validate/NCValidateManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/validate/NCValidateManager.scala
@@ -22,7 +22,7 @@ import io.opencensus.trace.Span
import org.apache.tika.langdetect.OptimaizeLangDetector
import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.nlp.NCNlpSentence
-import org.apache.nlpcraft.probe.mgrs.deploy.NCModelWrapper
+import org.apache.nlpcraft.probe.mgrs.nlp.NCModelData
/**
* Probe pre/post enrichment validator.
@@ -51,12 +51,12 @@ object NCValidateManager extends NCService with LazyLogging
{
* @param parent Parent tracing span.
*/
@throws[NCValidateException]
- def preValidate(w: NCModelWrapper, ns: NCNlpSentence, parent: Span =
null): Unit =
+ def preValidate(w: NCModelData, ns: NCNlpSentence, parent: Span = null):
Unit =
startScopedSpan("validate", parent,
"srvReqId" → ns.srvReqId,
"txt" → ns.text,
- "modelId" → w.proxy.getId) { _ ⇒
- val mdl = w.proxy
+ "modelId" → w.model.getId) { _ ⇒
+ val mdl = w.model
if (!mdl.isNotLatinCharsetAllowed &&
!ns.text.matches("""[\s\w\p{Punct}]+"""))
throw NCValidateException("ALLOW_NON_LATIN_CHARSET")
@@ -77,12 +77,12 @@ object NCValidateManager extends NCService with LazyLogging
{
* @param parent Optional parent span.
*/
@throws[NCValidateException]
- def postValidate(w: NCModelWrapper, ns: NCNlpSentence, parent: Span =
null): Unit =
+ def postValidate(w: NCModelData, ns: NCNlpSentence, parent: Span = null):
Unit =
startScopedSpan("validate", parent,
"srvReqId" → ns.srvReqId,
"txt" → ns.text,
- "modelId" → w.proxy.getId) { _ ⇒
- val mdl = w.proxy
+ "modelId" → w.model.getId) { _ ⇒
+ val mdl = w.model
val types = ns.flatten.filter(!_.isNlp).map(_.noteType).distinct
val overlapNotes = ns.map(tkn ⇒ types.flatMap(tp ⇒
tkn.getNotes(tp))).filter(_.size > 1).flatten