This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41-1 by this push:
new aa6399d WIP.
aa6399d is described below
commit aa6399d7f334d8412f956c58b5f9a95d863dace8
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Sep 9 14:52:47 2020 +0300
WIP.
---
.../probe/mgrs/deploy/NCDeployManager.scala | 3 +-
.../probe/mgrs/deploy/NCModelWrapper.scala | 95 ++++++++++++++++++++++
2 files changed, 96 insertions(+), 2 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 2fb3e52..571de80 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -142,8 +142,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
val exclStopWords = checkAndStemmatize(mdl.getExcludedStopWords,
"Excluded stopword")
val suspWords = checkAndStemmatize(mdl.getSuspiciousWords, "Suspicious
word")
- // TODO: skh
- //checkStopwordsDups(addStopWords, exclStopWords)
+ checkStopwordsDups(addStopWords, exclStopWords)
val syns = mutable.HashSet.empty[SynonymHolder]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
new file mode 100644
index 0000000..d83cdfb
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelWrapper.scala
@@ -0,0 +1,95 @@
+package org.apache.nlpcraft.probe.mgrs.deploy
+
+import java.io.Serializable
+import java.util
+
+import org.apache.nlpcraft.common.TOK_META_ALIASES_KEY
+import org.apache.nlpcraft.common.nlp.NCNlpSentence
+import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCVariantImpl}
+import org.apache.nlpcraft.model.intent.impl.NCIntentSolver
+import org.apache.nlpcraft.model.{NCElement, NCModel, NCVariant}
+import org.apache.nlpcraft.probe.mgrs.NCSynonym
+
+import scala.collection.JavaConverters._
+import scala.collection.{Seq, mutable}
+
+/**
+ *
+ * @param proxy
+ * @param solver
+ * @param synonyms
+ * @param synonymsDsl
+ * @param addStopWordsStems
+ * @param exclStopWordsStems
+ * @param suspWordsStems
+ * @param elements
+ */
+case class NCModelWrapper(
+ proxy: NCModel,
+ solver: NCIntentSolver,
+ synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
Seq[NCSynonym]]], // Fast access map.
+ synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
Seq[NCSynonym]]], // Fast access map.
+ addStopWordsStems: Set[String],
+ exclStopWordsStems: Set[String],
+ suspWordsStems: Set[String],
+ elements: Map[String /*Element ID*/ , NCElement]
+) {
+ /**
+ * Makes variants for given sentences.
+ *
+ * @param srvReqId Server request ID.
+ * @param sens Sentences.
+ */
+ def makeVariants(srvReqId: String, sens: Seq[NCNlpSentence]):
Seq[NCVariant] = {
+ val seq = sens.map(_.toSeq.map(nlpTok ⇒ NCTokenImpl(this, srvReqId,
nlpTok) → nlpTok))
+ val toks = seq.map(_.map { case (tok, _) ⇒ tok })
+
+ case class Key(id: String, from: Int, to: Int)
+
+ val keys2Toks = toks.flatten.map(t ⇒ Key(t.getId, t.getStartCharIndex,
t.getEndCharIndex) → t).toMap
+ val partsKeys = mutable.HashSet.empty[Key]
+
+ seq.flatten.foreach { case (tok, tokNlp) ⇒
+ if (tokNlp.isUser) {
+ val userNotes = tokNlp.filter(_.isUser)
+
+ require(userNotes.size == 1)
+
+ val optList: Option[util.List[util.HashMap[String,
Serializable]]] = userNotes.head.dataOpt("parts")
+
+ optList match {
+ case Some(list) ⇒
+ val keys =
+ list.asScala.map(m ⇒
+ Key(
+ m.get("id").asInstanceOf[String],
+
m.get("startcharindex").asInstanceOf[Integer],
+ m.get("endcharindex").asInstanceOf[Integer]
+ )
+ )
+ val parts = keys.map(keys2Toks)
+
+ parts.zip(list.asScala).foreach { case (part, map) ⇒
+ map.get(TOK_META_ALIASES_KEY) match {
+ case null ⇒ // No-op.
+ case aliases ⇒
part.getMetadata.put(TOK_META_ALIASES_KEY, aliases.asInstanceOf[Object])
+ }
+ }
+
+ tok.setParts(parts)
+ partsKeys ++= keys
+
+ case None ⇒ // No-op.
+ }
+ }
+ }
+
+ // We can't collapse parts earlier, because we need them here
(setParts method, few lines above.)
+ toks.filter(sen ⇒
+ !sen.exists(t ⇒
+ t.getId != "nlpcraft:nlp" &&
+ partsKeys.contains(Key(t.getId, t.getStartCharIndex,
t.getEndCharIndex))
+ )
+ ).map(p ⇒ new NCVariantImpl(p.asJava))
+ }
+}