This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
new 55cba61 WIP.
55cba61 is described below
commit 55cba61d448fff7a7a37cfd8df8a538abfa8519f
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Aug 16 12:28:07 2020 +0300
WIP.
---
.../scala/org/apache/nlpcraft/model/NCElement.java | 4 +-
.../org/apache/nlpcraft/model/NCModelView.java | 4 +-
.../apache/nlpcraft/model/impl/NCTokenImpl.scala | 3 +-
.../probe/mgrs/conn/NCConnectionManager.scala | 3 +
.../probe/mgrs/deploy/NCDeployManager.scala | 19 +++
.../nlpcraft/probe/mgrs/deploy/NCModelHolder.scala | 5 +-
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 4 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 148 ++++++++++-----------
.../nlpcraft/server/rest/NCBasicRestApi.scala | 2 +-
.../server/suggestion/NCSuggestionsManager.scala | 18 ++-
10 files changed, 118 insertions(+), 92 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index d120941..39de184 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -149,10 +149,10 @@ public interface NCElement extends NCMetadata,
Serializable {
* ]
* </pre>
*
- * @return Element's metadata or {@code null} if none provided. Default
implementation return {@code null}.
+ * @return Element's metadata or empty collection if none provided.
Default implementation return empty collection. TODO:
*/
default Map<String, Object> getMetadata() {
- return null;
+ return Collections.emptyMap();
}
/**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index ce3dcdd..a0dcf3c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -669,7 +669,7 @@ public interface NCModelView extends NCMetadata {
* }
* </pre>
*
- * @return Optional user defined model metadata.
+ * @return Optional user defined model metadata. TODO: cannot be null
*/
default Map<String, Object> getMetadata() {
return DFLT_METADATA;
@@ -808,7 +808,7 @@ public interface NCModelView extends NCMetadata {
* }
* </pre>
*
- * @return Custom user parsers for model elements or {@code null} if not
used (default).
+ * @return Custom user parsers for model elements or {@code null} if not
used (default). TODO: cannot be null!
*/
default List<NCCustomParser> getParsers() {
return Collections.emptyList();
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 629d1b7..52d1d8d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -139,8 +139,7 @@ private[nlpcraft] object NCTokenImpl {
// Special synthetic meta data element.
md.put("nlpcraft:nlp:freeword", false)
- if (elm.getMetadata != null)
- elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k,
v.asInstanceOf[java.io.Serializable]) }
+ elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k,
v.asInstanceOf[java.io.Serializable]) }
new NCTokenImpl(
mdl.model,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 838bccb..906c9f2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -233,6 +233,9 @@ object NCConnectionManager extends NCService {
NCModelManager.getAllModels().map(m ⇒ {
val mdl = m.model
+ require(m.intentsSamples != null)
+ // Model already validated.
+
// util.HashSet created to avoid scala collections
serialization error.
// Seems to be a Scala bug.
(
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index f6d5b21..8bda420 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -80,6 +80,15 @@ object NCDeployManager extends NCService with
DecorateAsScala {
*/
@throws[NCE]
private def wrap(mdl: NCModel): NCModelHolder = {
+ checkCollection("additionalStopWords", mdl.getAdditionalStopWords)
+ checkCollection("elements", mdl.getElements)
+ checkCollection("enabledBuiltInTokens", mdl.getEnabledBuiltInTokens)
+ checkCollection("excludedStopWords", mdl.getExcludedStopWords)
+ checkCollection("parsers", mdl.getParsers)
+ checkCollection("suspiciousWords", mdl.getSuspiciousWords)
+ checkCollection("macros", mdl.getMacros)
+ checkCollection("metadata", mdl.getMetadata)
+
// Scan for intent annotations in the model class.
val intents = NCIntentScanner.scan(mdl)
@@ -107,6 +116,16 @@ object NCDeployManager extends NCService with
DecorateAsScala {
/**
*
+ * @param name
+ * @param col
+ */
+ @throws[NCE]
+ private def checkCollection(name: String, col: Any): Unit =
+ if (col == null)
+ throw new NCE(s"Collection '$name' can be empty but cannot be
null")
+
+ /**
+ *
* @param clsName Factory class name.
*/
@throws[NCE]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
index 1a94481..4786571 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
@@ -24,4 +24,7 @@ import org.apache.nlpcraft.model.NCModel
* @param model
* @param intentSamples
*/
-case class NCModelHolder(model: NCModel, intentSamples: Map[String,
Seq[String]])
+case class NCModelHolder(model: NCModel, intentSamples: Map[String,
Seq[String]]) {
+ require(model != null)
+ require(intentSamples != null)
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 9f67045..56940d5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -67,11 +67,9 @@ object NCModelManager extends NCService with DecorateAsScala
{
checkModelConfig(h.model)
val parser = new NCMacroParser
- val macros = h.model.getMacros
// Initialize macro parser.
- if (macros != null)
- macros.asScala.foreach(t ⇒ parser.addMacro(t._1, t._2))
+ h.model.getMacros.asScala.foreach(t ⇒ parser.addMacro(t._1, t._2))
models += h.model.getId → verifyAndDecorate(h, parser)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6845150..9ae77f9 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -297,8 +297,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
*/
private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String):
Boolean = toks.forall(_.isTypeOf(elemId))
- def isComplex(mdl: NCModelDecorator): Boolean =
- mdl.synonymsDsl.nonEmpty || (mdl.model.getParsers != null &&
!mdl.model.getParsers.isEmpty)
+ def isComplex(mdl: NCModelDecorator): Boolean = mdl.synonymsDsl.nonEmpty
|| !mdl.model.getParsers.isEmpty
@throws[NCE]
override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit =
@@ -432,81 +431,80 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val parsers = mdl.model.getParsers
- if (parsers != null)
- for (parser ← parsers.asScala) {
- parser.onInit()
-
- startScopedSpan("customParser", span,
- "srvReqId" → ns.srvReqId,
- "modelId" → mdl.model.getId,
- "txt" → ns.text) { _ ⇒
- def to(t: NCNlpSentenceToken): NCCustomWord =
- new NCCustomWord {
- override def getNormalizedText: String =
t.normText
- override def getOriginalText: String =
t.origText
- override def getStartCharIndex: Int =
t.startCharIndex
- override def getEndCharIndex: Int =
t.endCharIndex
- override def getPos: String = t.pos
- override def getPosDescription: String =
t.posDesc
- override def getLemma: String = t.lemma
- override def getStem: String = t.stem
- override def isStopWord: Boolean = t.isStopWord
- override def isBracketed: Boolean =
t.isBracketed
- override def isQuoted: Boolean = t.isQuoted
- override def isKnownWord: Boolean =
t.isKnownWord
- override def isSwearWord: Boolean =
t.isSwearWord
- override def isEnglish: Boolean = t.isEnglish
+ for (parser ← parsers.asScala) {
+ parser.onInit()
+
+ startScopedSpan("customParser", span,
+ "srvReqId" → ns.srvReqId,
+ "modelId" → mdl.model.getId,
+ "txt" → ns.text) { _ ⇒
+ def to(t: NCNlpSentenceToken): NCCustomWord =
+ new NCCustomWord {
+ override def getNormalizedText: String = t.normText
+ override def getOriginalText: String = t.origText
+ override def getStartCharIndex: Int =
t.startCharIndex
+ override def getEndCharIndex: Int = t.endCharIndex
+ override def getPos: String = t.pos
+ override def getPosDescription: String = t.posDesc
+ override def getLemma: String = t.lemma
+ override def getStem: String = t.stem
+ override def isStopWord: Boolean = t.isStopWord
+ override def isBracketed: Boolean = t.isBracketed
+ override def isQuoted: Boolean = t.isQuoted
+ override def isKnownWord: Boolean = t.isKnownWord
+ override def isSwearWord: Boolean = t.isSwearWord
+ override def isEnglish: Boolean = t.isEnglish
+ }
+
+ val res = parser.parse(
+ NCRequestImpl(senMeta, ns.srvReqId),
+ mdl.model,
+ ns.map(to).asJava,
+ ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
+ val noteId = n.noteType
+ val words = ns.filter(t ⇒ t.index >= n.tokenFrom
&& t.index <= n.tokenTo).map(to).asJava
+ val md = n.asMetadata()
+
+ new NCCustomElement() {
+ override def getElementId: String = noteId
+ override def getWords: util.List[NCCustomWord]
= words
+ override def getMetadata: util.Map[String,
AnyRef] =
+ md.map(p ⇒ p._1 →
p._2.asInstanceOf[AnyRef]).asJava
}
-
- val res = parser.parse(
- NCRequestImpl(senMeta, ns.srvReqId),
- mdl.model,
- ns.map(to).asJava,
- ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
- val noteId = n.noteType
- val words = ns.filter(t ⇒ t.index >=
n.tokenFrom && t.index <= n.tokenTo).map(to).asJava
- val md = n.asMetadata()
-
- new NCCustomElement() {
- override def getElementId: String = noteId
- override def getWords:
util.List[NCCustomWord] = words
- override def getMetadata: util.Map[String,
AnyRef] =
- md.map(p ⇒ p._1 →
p._2.asInstanceOf[AnyRef]).asJava
- }
- }).asJava
- )
-
- if (res != null)
- res.asScala.foreach(e ⇒ {
- val elemId = e.getElementId
- val words = e.getWords
-
- if (elemId == null)
- throw new NCE(s"Custom model parser cannot
return 'null' element ID.")
-
- if (words == null || words.isEmpty)
- throw new NCE(s"Custom model parser cannot
return empty custom tokens [elementId=$elemId]")
-
- val matchedToks = words.asScala.map(w ⇒
- ns.find(t ⇒
- t.startCharIndex ==
w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
- ).getOrElse(throw new
AssertionError(s"Custom model parser returned an invalid custom token: $w"))
+ }).asJava
+ )
+
+ if (res != null)
+ res.asScala.foreach(e ⇒ {
+ val elemId = e.getElementId
+ val words = e.getWords
+
+ if (elemId == null)
+ throw new NCE(s"Custom model parser cannot
return 'null' element ID.")
+
+ if (words == null || words.isEmpty)
+ throw new NCE(s"Custom model parser cannot
return empty custom tokens [elementId=$elemId]")
+
+ val matchedToks = words.asScala.map(w ⇒
+ ns.find(t ⇒
+ t.startCharIndex == w.getStartCharIndex &&
t.endCharIndex == w.getEndCharIndex
+ ).getOrElse(throw new AssertionError(s"Custom
model parser returned an invalid custom token: $w"))
+ )
+
+ if (!alreadyMarked(matchedToks, elemId))
+ mark(
+ ns,
+ elem = mdl.elements.getOrElse(elemId,
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
+ toks = matchedToks,
+ direct = true,
+ syn = None,
+ metaOpt = Some(e.getMetadata.asScala),
+ parts = Seq.empty
)
-
- if (!alreadyMarked(matchedToks, elemId))
- mark(
- ns,
- elem = mdl.elements.getOrElse(elemId,
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
- toks = matchedToks,
- direct = true,
- syn = None,
- metaOpt = Some(e.getMetadata.asScala),
- parts = Seq.empty
- )
- })
- }
-
- parser.onDiscard()
+ })
}
+
+ parser.onDiscard()
+ }
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
index 41c5d58..0c6a34c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
@@ -661,7 +661,7 @@ class NCBasicRestApi extends NCRestApi with LazyLogging
with NCOpenCensusTrace w
val admin = authenticateAsAdmin(req.acsTok)
if (!NCProbeManager.getAllProbes(admin.companyId,
span).exists(_.models.exists(_.id == req.mdlId)))
- throw new NCE(s"Probe not found: ${req.mdlId}")
+ throw new NCE(s"Probe not found for model: ${req.mdlId}")
val res: Map[String, Seq[Suggestion]] =
NCSuggestionsManager.suggest(req.mdlId, req.minScore,
span).
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
index 24bc2f1..b966937 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
@@ -139,20 +139,26 @@ object NCSuggestionsManager extends NCService {
require(mdl.macros != null, "Macros cannot be null")
require(mdl.intentsSamples.forall { case (_, samples) ⇒
samples.nonEmpty}, "Samples cannot be empty")
- if (mdl.intentsSamples.map { case (_, samples) ⇒ samples.size
}.sum < MIN_CNT_MODEL)
+ val allSamplesCnt = mdl.intentsSamples.map { case (_, samples) ⇒
samples.size }.sum
+
+ if (allSamplesCnt < MIN_CNT_MODEL) {
+ // TODO: text
logger.warn(
- s"Model: '$mdl' has too small synonyms count. " +
- "Try to increase their count to improve synonyms
suggestions quality."
+ s"Model: '$mdlId' has too small intents samples count:
$allSamplesCnt. " +
+ s"Potentially is can be not enough for suggestions service
high quality work. " +
+ s"Try to increase their count at least to $MIN_CNT_MODEL."
)
- else {
+ } else {
val ids =
mdl.intentsSamples.
filter { case (_, samples) ⇒ samples.size <
MIN_CNT_INTENT }.
map { case (intentId, _) ⇒ intentId }
if (ids.nonEmpty)
- logger.warn(s"Models '$mdl' has intents:
[${ids.mkString(", ")}] with too small synonyms count." +
- "Try to increase their count to improve synonyms
suggestions quality."
+ // TODO: text
+ logger.warn(s"Models '$mdlId' has intents:
[${ids.mkString(", ")}] with too small intents samples count." +
+ s"Potentially is can be not enough for suggestions
service high quality work. " +
+ s"Try to increase their count at least to
$MIN_CNT_INTENT."
)
}