This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
     new 11f0c98  WIP.
11f0c98 is described below

commit 11f0c98b71377079aff9d6f64b210fc8dddfc562
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Aug 28 15:05:40 2020 +0300

    WIP.
---
 .../model/intent/impl/NCIntentScanner.scala        | 28 +++-------------
 .../test/impl/NCTestAutoModelValidatorImpl.scala   |  8 ++++-
 .../inspectors/NCInspectorIntents.scala            | 38 ++++++++++++++++++++--
 .../NCInspectorSynonymsSuggestions.scala           |  2 +-
 4 files changed, 47 insertions(+), 29 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
index 617be92..7b56e31 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentScanner.scala
@@ -29,6 +29,8 @@ import org.apache.nlpcraft.model.intent.utils.NCDslIntent
 import scala.collection.JavaConverters._
 import scala.collection._
 
+case class NCIntentSamplesScanResult(samples: Map[String, Seq[String]], 
warnings: Seq[String])
+
 /**
   * Scanner for `NCIntent`, `NCIntentRef` and `NCIntentTerm` annotations.
   */
@@ -420,33 +422,11 @@ object NCIntentScanner extends LazyLogging {
 
     /**
       * Scans given model for intent samples.
-     *
-      * @param mdl Model to scan.
-      */
-    @throws[NCE]
-    def scanIntentsSamples(mdl: NCModel): Map[String, Seq[String]] = {
-        val (res, warns) = validateIntentsSamples0(mdl)
-
-        warns.foreach(w ⇒ logger.warn(w))
-
-        res
-    }
-
-    /**
-      * Scans given model for intent samples.
-      *
-      * @param mdl Model to scan.
-      */
-    @throws[NCE]
-    def validateIntentsSamples(mdl: NCModel): Seq[String] = 
validateIntentsSamples0(mdl)._2
-
-    /**
-      * Scans given model for intent samples.
       *
       * @param mdl Model to scan.
       */
     @throws[NCE]
-    private def validateIntentsSamples0(mdl: NCModel): (Map[String, 
Seq[String]], Seq[String]) = {
+    def scanIntentsSamples(mdl: NCModel): NCIntentSamplesScanResult = {
         var annFound = false
 
         val warns = mutable.ArrayBuffer.empty[String]
@@ -502,6 +482,6 @@ object NCIntentScanner extends LazyLogging {
         if (!annFound)
             warns += s"Model '${mdl.getId}' doesn't have any intents."
 
-        (res, warns)
+        NCIntentSamplesScanResult(res, warns)
     }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
index 55bcf4d..3230490 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
@@ -61,7 +61,13 @@ private [test] object NCTestAutoModelValidatorImpl extends 
LazyLogging {
         val samples =
             classes.
                 map(_.getDeclaredConstructor().newInstance()).
-                map(mdl ⇒ mdl.getId → 
NCIntentScanner.scanIntentsSamples(mdl).toMap).
+                map(mdl ⇒ {
+                    val res = NCIntentScanner.scanIntentsSamples(mdl)
+
+                    res.warnings.foreach(w ⇒ logger.warn(w))
+
+                    mdl.getId → res.samples.toMap
+                }).
                 toMap.
                 filter(_._2.nonEmpty)
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
index ee30dce..bf0eb7e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorIntents.scala
@@ -19,16 +19,48 @@ package 
org.apache.nlpcraft.probe.mgrs.inspections.inspectors
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.inspections.{NCInspection, NCInspector}
+import org.apache.nlpcraft.common.makro.NCMacroParser
+import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
 import org.apache.nlpcraft.common.{NCE, NCService}
 import org.apache.nlpcraft.model.intent.impl.NCIntentScanner
 import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
 
+import scala.collection.JavaConverters._
+import scala.collection._
+
 object NCInspectorIntents extends NCService with NCInspector {
+    private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
+
     override def inspect(mdlId: String, prevLayerInspection: 
Option[NCInspection], parent: Span = null): NCInspection =
         startScopedSpan("inspect", parent) { _ ⇒
-            val warns = NCIntentScanner.validateIntentsSamples(
-                NCModelManager.getModel(mdlId).getOrElse(throw new NCE(s"Model 
not found: '$mdlId'")).model.proxy
-            )
+            val mdl = NCModelManager.getModel(mdlId).getOrElse(throw new 
NCE(s"Model not found: '$mdlId'")).model
+
+            val res = NCIntentScanner.scanIntentsSamples(mdl.proxy)
+
+            val warns = mutable.ArrayBuffer.empty[String] ++ res.warnings
+
+            val parser = new NCMacroParser
+
+            mdl.getMacros.asScala.foreach { case (name, str) ⇒ 
parser.addMacro(name, str) }
+
+            val allSyns: Set[Seq[String]] =
+                mdl.getElements.
+                    asScala.
+                    flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
+                    map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
+                    toSet
+
+            res.samples.
+                flatMap { case (_, samples) ⇒ samples.map(_.toLowerCase) }.
+                // Note that we don't use system tokenizer, because 
ContextWordServer doesn't have this tokenizer.
+                // We just split examples words with spaces. Also we divide 
SEPARATORS as separated words.
+                map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒ 
s.replaceAll(s"\\$ch", s" $ch "))).
+                foreach { case (s, sNorm) ⇒
+                    val seq: Seq[String] = sNorm.split(" 
").map(NCNlpPorterStemmer.stem)
+
+                    if (!allSyns.exists(_.intersect(seq).nonEmpty))
+                        warns += s"Sample: '$s' doesn't contain synonyms"
+                }
 
             NCInspection(errors = None, warnings = if (warns.isEmpty) None 
else Some(warns), suggestions = None, data = None)
         }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
index dadcc28..15c6512 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/inspections/inspectors/NCInspectorSynonymsSuggestions.scala
@@ -36,7 +36,7 @@ object NCInspectorSynonymsSuggestions extends NCService with 
NCInspector {
 
             data.put("macros", mdl.getMacros)
             data.put("elementsSynonyms", mdl.getElements.asScala.map(p ⇒ 
p.getId → p.getSynonyms).toMap.asJava)
-            data.put("intentsSamples", 
NCIntentScanner.scanIntentsSamples(mdl.proxy).map(p ⇒ p._1 → 
p._2.asJava).asJava)
+            data.put("intentsSamples", 
NCIntentScanner.scanIntentsSamples(mdl.proxy).samples.map(p ⇒ p._1 → 
p._2.asJava).asJava)
 
             NCInspection(errors = None, warnings = None, suggestions = None, 
data = Some(data))
         }

Reply via email to