This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
new 11f0c98 WIP.
11f0c98 is described below
commit 11f0c98b71377079aff9d6f64b210fc8dddfc562
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Aug 28 15:05:40 2020 +0300
WIP.
---
.../model/intent/impl/NCIntentScanner.scala | 28 +++-------------
.../test/impl/NCTestAutoModelValidatorImpl.scala | 8 ++++-
.../inspectors/NCInspectorIntents.scala | 38 ++++++++++++++++++++--
.../NCInspectorSynonymsSuggestions.scala | 2 +-
4 files changed, 47 insertions(+), 29 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
index 617be92..7b56e31 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
@@ -29,6 +29,8 @@ import org.apache.nlpcraft.model.intent.utils.NCDslIntent
import scala.collection.JavaConverters._
import scala.collection._
+case class NCIntentSamplesScanResult(samples: Map[String, Seq[String]],
warnings: Seq[String])
+
/**
* Scanner for `NCIntent`, `NCIntentRef` and `NCIntentTerm` annotations.
*/
@@ -420,33 +422,11 @@ object NCIntentScanner extends LazyLogging {
/**
* Scans given model for intent samples.
- *
- * @param mdl Model to scan.
- */
- @throws[NCE]
- def scanIntentsSamples(mdl: NCModel): Map[String, Seq[String]] = {
- val (res, warns) = validateIntentsSamples0(mdl)
-
- warns.foreach(w ⇒ logger.warn(w))
-
- res
- }
-
- /**
- * Scans given model for intent samples.
- *
- * @param mdl Model to scan.
- */
- @throws[NCE]
- def validateIntentsSamples(mdl: NCModel): Seq[String] =
validateIntentsSamples0(mdl)._2
-
- /**
- * Scans given model for intent samples.
*
* @param mdl Model to scan.
*/
@throws[NCE]
- private def validateIntentsSamples0(mdl: NCModel): (Map[String,
Seq[String]], Seq[String]) = {
+ def scanIntentsSamples(mdl: NCModel): NCIntentSamplesScanResult = {
var annFound = false
val warns = mutable.ArrayBuffer.empty[String]
@@ -502,6 +482,6 @@ object NCIntentScanner extends LazyLogging {
if (!annFound)
warns += s"Model '${mdl.getId}' doesn't have any intents."
- (res, warns)
+ NCIntentSamplesScanResult(res, warns)
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
index 55bcf4d..3230490 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
@@ -61,7 +61,13 @@ private [test] object NCTestAutoModelValidatorImpl extends
LazyLogging {
val samples =
classes.
map(_.getDeclaredConstructor().newInstance()).
- map(mdl ⇒ mdl.getId →
NCIntentScanner.scanIntentsSamples(mdl).toMap).
+ map(mdl ⇒ {
+ val res = NCIntentScanner.scanIntentsSamples(mdl)
+
+ res.warnings.foreach(w ⇒ logger.warn(w))
+
+ mdl.getId → res.samples.toMap
+ }).
toMap.
filter(_._2.nonEmpty)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
index ee30dce..bf0eb7e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
@@ -19,16 +19,48 @@ package
org.apache.nlpcraft.probe.mgrs.inspections.inspectors
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.inspections.{NCInspection, NCInspector}
+import org.apache.nlpcraft.common.makro.NCMacroParser
+import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
import org.apache.nlpcraft.common.{NCE, NCService}
import org.apache.nlpcraft.model.intent.impl.NCIntentScanner
import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
+import scala.collection.JavaConverters._
+import scala.collection._
+
object NCInspectorIntents extends NCService with NCInspector {
+ private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
+
override def inspect(mdlId: String, prevLayerInspection:
Option[NCInspection], parent: Span = null): NCInspection =
startScopedSpan("inspect", parent) { _ ⇒
- val warns = NCIntentScanner.validateIntentsSamples(
- NCModelManager.getModel(mdlId).getOrElse(throw new NCE(s"Model
not found: '$mdlId'")).model.proxy
- )
+ val mdl = NCModelManager.getModel(mdlId).getOrElse(throw new
NCE(s"Model not found: '$mdlId'")).model
+
+ val res = NCIntentScanner.scanIntentsSamples(mdl.proxy)
+
+ val warns = mutable.ArrayBuffer.empty[String] ++ res.warnings
+
+ val parser = new NCMacroParser
+
+ mdl.getMacros.asScala.foreach { case (name, str) ⇒
parser.addMacro(name, str) }
+
+ val allSyns: Set[Seq[String]] =
+ mdl.getElements.
+ asScala.
+ flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
+ map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
+ toSet
+
+ res.samples.
+ flatMap { case (_, samples) ⇒ samples.map(_.toLowerCase) }.
+ // Note that we don't use system tokenizer, because
ContextWordServer doesn't have this tokenizer.
+ // We just split examples words with spaces. Also we divide
SEPARATORS as separated words.
+ map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒
s.replaceAll(s"\\$ch", s" $ch "))).
+ foreach { case (s, sNorm) ⇒
+ val seq: Seq[String] = sNorm.split("
").map(NCNlpPorterStemmer.stem)
+
+ if (!allSyns.exists(_.intersect(seq).nonEmpty))
+ warns += s"Sample: '$s' doesn't contain synonyms"
+ }
NCInspection(errors = None, warnings = if (warns.isEmpty) None
else Some(warns), suggestions = None, data = None)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
index dadcc28..15c6512 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
@@ -36,7 +36,7 @@ object NCInspectorSynonymsSuggestions extends NCService with
NCInspector {
data.put("macros", mdl.getMacros)
data.put("elementsSynonyms", mdl.getElements.asScala.map(p ⇒
p.getId → p.getSynonyms).toMap.asJava)
- data.put("intentsSamples",
NCIntentScanner.scanIntentsSamples(mdl.proxy).map(p ⇒ p._1 →
p._2.asJava).asJava)
+ data.put("intentsSamples",
NCIntentScanner.scanIntentsSamples(mdl.proxy).samples.map(p ⇒ p._1 →
p._2.asJava).asJava)
NCInspection(errors = None, warnings = None, suggestions = None,
data = Some(data))
}