This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
new 1440cfd WIP.
1440cfd is described below
commit 1440cfd9eda0c5a98bfb5a9f6df825a2a3460c1a
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Apr 14 22:23:03 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 9 +-
.../scala/org/apache/nlpcraft/common/package.scala | 2 +-
.../org/apache/nlpcraft/common/util/NCUtils.scala | 12 +-
.../nlpcraft/examples/sql/sql_model_init.yaml | 4 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 191 +++++++++------------
5 files changed, 95 insertions(+), 123 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index f2965cb..792ad1b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -39,17 +39,20 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* @param text Normalized text.
* @param enabledBuiltInToks Enabled built-in tokens.
* @param tokens Initial buffer.
- * @param deletedNotes Deleted overridden notes with their tokens.
+ * @param firstProbePhase Flag (processing state issue).
+ * @param deletedNotes Deleted overridden notes with their tokens (processing
state issue).
+ * @param initNlpNotes Initial NLP tokens (processing state issue).
+ * @param nlpTokens NLP tokens (processing state issue).
*/
class NCNlpSentence(
val srvReqId: String,
val text: String,
val enabledBuiltInToks: Set[String],
override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new
mutable.ArrayBuffer[NCNlpSentenceToken](32),
+ var firstProbePhase: Boolean = true,
private val deletedNotes: mutable.HashMap[NCNlpSentenceNote,
Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
- private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] =
mutable.HashMap.empty,
- var firstProbePhase: Boolean = true
+ private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] =
mutable.HashMap.empty
) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
@transient
private var hash: java.lang.Integer = _
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
index 74a0e3e..c4d8bad 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/package.scala
@@ -36,7 +36,7 @@ package object common {
final val U = NCUtils
// Internal deep debug flag (more verbose tracing).
- final val DEEP_DEBUG = true
+ final val DEEP_DEBUG = false
// Model and token **internal** metadata keys.
final val TOK_META_ALIASES_KEY = "__NLPCRAFT_TOK_META_ALIASES"
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 13a1c89..5d25c25 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -1424,14 +1424,12 @@ object NCUtils extends LazyLogging {
* @param e
*/
def prettyError(logger: Logger, title: String, e: Throwable): Unit = {
- e.printStackTrace()
-
// Keep the full trace in the 'trace' log level.
-// logger.trace(title, e)
-//
-// prettyErrorImpl(new PrettyErrorLogger {
-// override def log(s: String): Unit = logger.error(s)
-// }, title, e)
+ logger.trace(title, e)
+
+ prettyErrorImpl(new PrettyErrorLogger {
+ override def log(s: String): Unit = logger.error(s)
+ }, title, e)
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
index d8cfc3e..9438cfa 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
@@ -1422,5 +1422,5 @@ swearWordsAllowed: false
noNounsAllowed: true
noUserTokensAllowed: true
dupSynonymsAllowed: true
-permutateSynonyms: false
-sparse: false
+permutateSynonyms: true
+sparse: true
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 79bcf3f..d85f429 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -376,7 +376,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
private def mkComplexes(mdl: NCProbeModel, ns: NCNlpSentence):
ComplexHolder = {
val complexesWords = ns.map(Complex(_))
- val complexes: Seq[ComplexSeq] =
+ val complexes =
NCProbeVariants.convert(ns.srvReqId, mdl,
NCSentenceManager.collapse(mdl.model, ns.clone())).
map(_.asScala).
par.
@@ -456,8 +456,8 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
mark(ns, elm, res, direct, continuous, syn = Some(syn), parts)
}
- if (DEEP_DEBUG && ok)
- println(
+ if (DEEP_DEBUG)
+ logger.trace(
s"${if (ok) "Added" else "Skipped"} element [" +
s"id=${elm.getId}, " +
s"type=$typ, " +
@@ -470,130 +470,101 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
)
}
+ @throws[NCE]
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
- /**
- *
- * @param mdl
- * @param ns
- * @param combosToks
- * @param simpleEnabled
- * @param idlEnabled
- * @param req
- * @param ch
- * @param span
- */
- private def execute(
- mdl: NCProbeModel,
- ns: NCNlpSentence,
- combosToks: Seq[Seq[NlpToken]],
- simpleEnabled: Boolean,
- idlEnabled: Boolean,
- req: NCRequest,
- ch: ⇒ ComplexHolder,
- span: Span
- ): Unit =
- startScopedSpan("execute", span, "srvReqId" → ns.srvReqId, "mdlId" →
mdl.model.getId, "txt" → ns.text) { _ ⇒
- if (DEEP_DEBUG)
- println(s"Execution started [simpleEnabled=$simpleEnabled,
idlEnabled=$idlEnabled]")
-
- val contCache = mutable.HashMap.empty ++ mdl.elements.keys.map(k ⇒
k → mutable.ArrayBuffer.empty[Seq[Int]])
- lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]
-
- for (toks ← combosToks) {
- val tokIdxs = toks.map(_.index)
- lazy val tokStems = toks.map(_.stem).mkString(" ")
-
- // Attempt to match each element.
- for (
- elm ← mdl.elements.values;
- elemId = elm.getId
- if
- !contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
- !alreadyMarked(ns, elemId, toks, tokIdxs)
- ) {
- // 1. SIMPLE.
- if (simpleEnabled && (if (idlEnabled)
mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) {
- // 1.1 Continuous.
- var found = false
-
- fastAccess(mdl.continuousSynonyms, elemId,
toks.length) match {
- case Some(h) ⇒
- def tryMap(syns: Map[String, Synonym],
notFound: () ⇒ Unit): Unit =
- syns.get(tokStems) match {
- case Some(s) ⇒
- found = true
- add(ns, contCache,"simple
continuous", elm, toks, tokIdxs, s)
- case None ⇒ notFound()
- }
-
- def tryScan(syns: Seq[Synonym]): Unit =
- for (s ← syns if !found)
- if (s.isMatch(toks)) {
- found = true
- add(ns, contCache, "simple
continuous scan", elm, toks, tokIdxs, s)
+ startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" →
mdl.model.getId, "txt" → ns.text) { span ⇒
+ val req = NCRequestImpl(senMeta, ns.srvReqId)
+ val combToks = combos(ns)
+ lazy val ch = mkComplexes(mdl, ns)
+
+ def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
+ startScopedSpan("execute", parent, "srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
+ if (DEEP_DEBUG)
+ logger.trace(s"Execution started
[simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")
+
+ val contCache = mutable.HashMap.empty ++
mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
+ lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]
+
+ for (
+ toks ← combToks;
+ tokIdxs = toks.map(_.index);
+ elm ← mdl.elements.values;
+ elemId = elm.getId
+ if
+
!contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
+ !alreadyMarked(ns, elemId, toks, tokIdxs)
+ ) {
+ // 1. SIMPLE.
+ if (simpleEnabled && (if (idlEnabled)
mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) {
+ lazy val tokStems = toks.map(_.stem).mkString(" ")
+
+ // 1.1 Continuous.
+ var found = false
+
+ fastAccess(mdl.continuousSynonyms, elemId,
toks.length) match {
+ case Some(h) ⇒
+ def tryMap(syns: Map[String, Synonym],
notFound: () ⇒ Unit): Unit =
+ syns.get(tokStems) match {
+ case Some(s) ⇒
+ found = true
+ add(ns, contCache,"simple
continuous", elm, toks, tokIdxs, s)
+ case None ⇒ notFound()
}
- tryMap(
- h.txtDirectSynonyms,
- () ⇒ {
- tryScan(h.notTxtDirectSynonyms)
+ def tryScan(syns: Seq[Synonym]): Unit =
+ for (s ← syns if !found)
+ if (s.isMatch(toks)) {
+ found = true
+ add(ns, contCache, "simple
continuous scan", elm, toks, tokIdxs, s)
+ }
- if (!found)
- tryMap(h.txtNotDirectSynonyms, ()
⇒ tryScan(h.notTxtNotDirectSynonyms))
- }
- )
- case None ⇒ // No-op.
- }
+ tryMap(
+ h.txtDirectSynonyms,
+ () ⇒ {
+ tryScan(h.notTxtDirectSynonyms)
- // 1.2 Sparse.
- if (!found)
- for (s ← get(mdl.sparseSynonyms, elemId))
- s.sparseMatch(toks) match {
- case Some(res) ⇒ add(ns, contCache,
"simple sparse", elm, res, tokIdxs, s)
- case None ⇒ // No-op.
- }
- }
+ if (!found)
+ tryMap(h.txtNotDirectSynonyms,
() ⇒ tryScan(h.notTxtNotDirectSynonyms))
+ }
+ )
+ case None ⇒ // No-op.
+ }
- // 2. IDL.
- if (idlEnabled) {
- lazy val idlCombs = mkComplexCombinations(ch, toks,
idlCache.toSet)
+ // 1.2 Sparse.
+ if (!found)
+ for (s ← get(mdl.sparseSynonyms, elemId))
+ s.sparseMatch(toks) match {
+ case Some(res) ⇒ add(ns, contCache,
"simple sparse", elm, res, tokIdxs, s)
+ case None ⇒ // No-op.
+ }
+ }
- for (s ← get(mdl.idlSynonyms, elemId); comb ← idlCombs)
- s.idlMatch(comb.map(_.data), req) match {
- case Some(res) ⇒
- val typ = if (s.sparse) "IDL sparse" else
"IDL continuous"
+ // 2. IDL.
+ if (idlEnabled)
+ for (s ← get(mdl.idlSynonyms, elemId); comb ←
mkComplexCombinations(ch, toks, idlCache.toSet))
+ s.idlMatch(comb.map(_.data), req) match {
+ case Some(res) ⇒
+ val typ = if (s.sparse) "IDL sparse"
else "IDL continuous"
- add(ns, contCache, typ, elm, toTokens(res,
ns), tokIdxs, s, toParts(res, s))
+ add(ns, contCache, typ, elm,
toTokens(res, ns), tokIdxs, s, toParts(res, s))
- idlCache += comb
- case None ⇒ // No-op.
- }
+ idlCache += comb
+ case None ⇒ // No-op.
+ }
}
}
- }
- }
-
- @throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
- require(isStarted)
-
- startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" →
mdl.model.getId, "txt" → ns.text) { span ⇒
- val req = NCRequestImpl(senMeta, ns.srvReqId)
- val combToks = combos(ns)
-
- lazy val h = mkComplexes(mdl, ns)
if (ns.firstProbePhase) {
ns.firstProbePhase = false
if (mdl.hasNoIdlSynonyms)
- execute(mdl, ns, combToks, simpleEnabled = true,
idlEnabled = false, req, h, parent)
- execute(mdl, ns, combToks, simpleEnabled =
mdl.hasNoIdlSynonyms, mdl.hasIdlSynonyms, req, h, parent)
- }
- else {
- if (mdl.hasIdlSynonyms)
- execute(mdl, ns, combToks, simpleEnabled = false,
idlEnabled = true, req, h, parent)
+ execute(simpleEnabled = true, idlEnabled = false)
+ execute(simpleEnabled = mdl.hasNoIdlSynonyms, idlEnabled =
mdl.hasIdlSynonyms)
}
+ else if (mdl.hasIdlSynonyms)
+ execute(simpleEnabled = false, idlEnabled = true)
processParsers(mdl, ns, span, req)
}