This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
new 261758b WIP.
261758b is described below
commit 261758b289951cb856661f83ef6dd0ed49e0d37d
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Apr 5 21:04:51 2021 +0300
WIP.
---
.../org/apache/nlpcraft/common/util/NCUtils.scala | 13 ++---
.../apache/nlpcraft/model/impl/NCTokenLogger.scala | 2 +-
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 17 ++++---
.../probe/mgrs/deploy/NCDeployManager.scala | 3 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 37 ++++++++------
.../model/NCEnricherNestedModelSpec.scala | 56 ++++++++++++++++++++--
.../model/NCEnricherNestedModelSpec2.scala | 4 +-
7 files changed, 97 insertions(+), 35 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 141e813..23ca22b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -1424,12 +1424,13 @@ object NCUtils extends LazyLogging {
* @param e
*/
def prettyError(logger: Logger, title: String, e: Throwable): Unit = {
- // Keep the full trace in the 'trace' log level.
- logger.trace(title, e)
-
- prettyErrorImpl(new PrettyErrorLogger {
- override def log(s: String): Unit = logger.error(s)
- }, title, e)
+ e.printStackTrace()
+// // Keep the full trace in the 'trace' log level.
+// logger.trace(title, e)
+//
+// prettyErrorImpl(new PrettyErrorLogger {
+// override def log(s: String): Unit = logger.error(s)
+// }, title, e)
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index a99a43a..7c13efb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -339,7 +339,7 @@ object NCTokenLogger extends LazyLogging {
if (note.tokenFrom < note.tokenTo)
s"$v ${s"<${note.tokenFrom} to ${note.tokenTo}>"}"
else
- s"$v"
+ s"${if (v.isEmpty) "<>" else v}"
}
private def mkCells(hs: Seq[NoteMetadata], t: NCNlpSentenceToken):
Seq[String] = {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 4b2639e..6960fec 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -91,7 +91,6 @@ class NCProbeSynonym(
private def trySparseMatch0[T](toks: Seq[T], isMatch: (T,
NCProbeSynonymChunk) ⇒ Boolean, getIndex: T ⇒ Int): Option[Seq[T]] = {
require(toks != null)
require(toks.nonEmpty)
- require(this.size > 1)
lazy val buf = mutable.ArrayBuffer.empty[T]
var state = 0
@@ -106,14 +105,19 @@ class NCProbeSynonym(
else
!buf.contains(t) && isMatch(t, chunk)
}) match {
- case Some(t) ⇒ buf += t
+ case Some(t) ⇒
+ if (!perm && buf.nonEmpty && getIndex(t) <=
getIndex(buf.last))
+ state = -1
+ else
+ buf += t
case None ⇒ state = -1
}
- if (state != -1 &&
- buf.contains(toks.head) &&
- buf.contains(toks.last) &&
- (perm || buf.tail.zipWithIndex.forall { case (t, idx) ⇒
getIndex(t) > getIndex(buf(idx)) }) &&
+
+ if (state != -1
+ &&
+// buf.contains(toks.head) &&
+// buf.contains(toks.last) &&
{
val remained = toks.filter(t ⇒ !buf.contains(t))
@@ -123,6 +127,7 @@ class NCProbeSynonym(
Some(buf)
else
None
+
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index a9f4d4a..06fe040 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -195,9 +195,10 @@ object NCDeployManager extends NCService with
DecorateAsScala {
val syns = mutable.HashSet.empty[SynonymHolder]
+ // TODO: Sparse for nonDSL
def ok(b: Boolean, exp: Boolean): Boolean = if (exp) b else !b
def filter(dsl: Boolean, sparse: Boolean): Set[SynonymHolder] =
- syns.toSet.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl) &&
ok(s.syn.size > 1 && s.sparse, sparse))
+ syns.toSet.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl) &&
ok(s.sparse, sparse))
var cnt = 0
val maxCnt = mdl.getMaxTotalSynonyms
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 2dd6391..102d48c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -21,6 +21,7 @@ import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.impl.NCTokenLogger
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind,
TEXT}
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
@@ -279,12 +280,23 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param comb
* @param syn
*/
- private def getParts(comb: Seq[Complex], syn: NCProbeSynonym):
Seq[TokenData] =
+ private def getPartsComplex(comb: Seq[Complex], syn: NCProbeSynonym):
Seq[TokenData] =
comb.zip(syn.map(_.kind)).flatMap {
case (complex, kind) ⇒ if (complex.isToken) Some(complex.token →
kind)
else None
}
+ /**
+ *
+ * @param comb
+ * @param syn
+ */
+ private def getPartsContent(comb: Seq[NCDslContent], syn: NCProbeSynonym):
Seq[TokenData] =
+ comb.zip(syn.map(_.kind)).flatMap {
+ case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get →
kind)
+ else None
+ }
+
private def mkCache(): mutable.Map[String, ArrayBuffer[Seq[NlpToken]]] =
mutable.HashMap.empty[
String,
@@ -295,13 +307,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
(
tows.filter(_.isRight).map(_.right.get) ++
tows.filter(_.isLeft).map(_.left.get).
- flatMap(w ⇒
- ns.filter(
- t ⇒
- t.startCharIndex >= w.getStartCharIndex &&
- t.endCharIndex <= w.getEndCharIndex
- )
- )
+ flatMap(w ⇒ ns.filter(t ⇒
t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
).sortBy(_.startCharIndex)
@throws[NCE]
@@ -474,11 +480,10 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
// 3. DSL, sparse.
if (sparseEnabled)
- for (syn ← mdl.sparseSynonymsDsl.getOrElse(elemId,
Seq.empty); complex ← complexes if !foundSparse) {
- val comb = complex.tokensComplexes
-
+ for (syn ← mdl.sparseSynonymsDsl.getOrElse(elemId,
Seq.empty); (_, seq) ← dslCombs; comb ← seq if !foundSparse) {
syn.trySparseMatch(comb.map(_.data), req)
match {
- case Some(towsRes) ⇒
addSparse(convert(towsRes, ns), syn, getParts(comb, syn))
+ case Some(towsRes) ⇒
+ addSparse(convert(towsRes, ns), syn,
getPartsContent(towsRes, syn))
case None ⇒ // No-op.
}
}
@@ -489,9 +494,10 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
(len, seq) ← dslCombs;
syn ← fastAccess(mdl.nonSparseSynonymsDsl,
elemId, len).getOrElse(Seq.empty);
comb ← seq if !foundNotSparse
- )
+ ) {
if (syn.isMatch(comb.map(_.data), req))
- addNotSparse(syn, getParts(comb, syn))
+ addNotSparse(syn, getPartsComplex(comb,
syn))
+ }
}
}
}
@@ -520,8 +526,9 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val matchCnt = matchesNorm.size
+ // TODO:matchesNorm
// Add notes for all remaining (non-intersecting) matches.
- for ((m, idx) ← matchesNorm.zipWithIndex) {
+ for ((m, idx) ← matches.zipWithIndex) {
if (DEEP_DEBUG)
logger.trace(
s"Model '${mdl.model.getId}' element found (${idx + 1}
of $matchCnt) [" +
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 54f42f4..fa9b3c7 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
import org.apache.nlpcraft.model.NCElement
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestUserToken ⇒ usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestUserToken ⇒ usr, NCTestNlpToken ⇒ nlp}
import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
import org.junit.jupiter.api.Test
@@ -27,7 +27,10 @@ import java.util
/**
* Nested Elements test model.
*/
-class NCNestedTestModel extends NCDefaultTestModel {
+class NCNestedTestModel1 extends NCDefaultTestModel {
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
+
override def getElements: util.Set[NCElement] =
Set(
NCTestElement("x1", "{test|_} ^^{tok_id() == 'nlpcraft:date'}^^"),
@@ -42,8 +45,8 @@ class NCNestedTestModel extends NCDefaultTestModel {
/**
* Nested elements model enricher test.
*/
-@NCTestEnvironment(model = classOf[NCNestedTestModel], startClient = true)
-class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
+@NCTestEnvironment(model = classOf[NCNestedTestModel1], startClient = true)
+class NCEnricherNestedModelSpec1 extends NCEnricherBaseSpec {
@Test
def test(): Unit =
runBatch(
@@ -62,3 +65,48 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
)
)
}
+
+class NCNestedTestModel2 extends NCNestedTestModel1 {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+/**
+ * Nested elements model enricher test.
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel2], startClient = true)
+class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 {
+ @Test
+ def test2(): Unit =
+ runBatch(
+ _ ⇒ checkExists(
+ "test tomorrow",
+ usr(text = "test tomorrow", id = "x1")
+ ),
+ _ ⇒ checkExists(
+ "tomorrow test",
+ usr(text = "tomorrow test", id = "x1")
+ ),
+ _ ⇒ checkExists(
+ "test xxx tomorrow",
+ usr(text = "test tomorrow", id = "x1"),
+ nlp(text = "xxx"),
+ ),
+ _ ⇒ checkExists(
+ "y the y",
+ usr(text = "y y", id = "y3"),
+ nlp(text = "the", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "y xxx y",
+ usr(text = "y y", id = "y3"),
+ nlp(text = "xxx")
+ ),
+ _ ⇒ checkExists(
+ "aaa y xxx y",
+ nlp(text = "aaa"),
+ usr(text = "y y", id = "y3"),
+ nlp(text = "xxx")
+ )
+ )
+}
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index ede9153..82b6686 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -47,9 +47,9 @@ class NCNestedTestModel21 extends
NCModelAdapter("nlpcraft.nested2.test.mdl", "N
class NCEnricherNestedModelSpec21 extends NCTestContext {
@Test
def test(): Unit = {
- checkIntent("word", "onE1")
+ //checkIntent("word", "onE1")
checkIntent("10 word", "onE1")
- checkIntent("11 12 word", "onNumAndE1")
+ //checkIntent("11 12 word", "onNumAndE1")
}
}