This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-431
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-431 by this push:
new 2175d4d WIP.
2175d4d is described below
commit 2175d4dfb1af74fab8dba66a749326b1ea53935e
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Aug 27 14:17:18 2021 +0300
WIP.
---
.../scala/org/apache/nlpcraft/model/NCElement.java | 5 +++
.../apache/nlpcraft/model/NCModelFileAdapter.java | 10 +++++
.../org/apache/nlpcraft/model/NCModelView.java | 7 ++++
.../nlpcraft/model/impl/json/NCElementJson.java | 8 ++++
.../nlpcraft/model/impl/json/NCModelJson.java | 7 ++++
.../nlpcraft/probe/mgrs/cmd/NCCommandManager.scala | 6 +++
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 44 +++++++++++-----------
.../probe/mgrs/sentence/NCSentenceManager.scala | 20 ++++++----
.../scala/org/apache/nlpcraft/NCTestElement.scala | 17 ++++++++-
.../model/NCEnricherNestedModelSpec6.scala | 16 ++++----
10 files changed, 103 insertions(+), 37 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 9f5872a..b5b6cbd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -382,4 +382,9 @@ public interface NCElement extends NCMetadata, Serializable
{
default Optional<Boolean> isSparse() {
return Optional.empty();
}
+
+ // TODO:
+ default Optional<Boolean> isGreedy() {
+ return Optional.empty();
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index c313bf7..efa2b68 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -357,6 +357,11 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
return nvl(js.isSparse(), proxy.isSparse());
}
+ @Override
+ public Optional<Boolean> isGreedy() {
+ return nvl(js.isGreedy(), proxy.isGreedy());
+ }
+
private<T> Optional<T> nvl(T t, T dflt) {
return Optional.of(t != null ? t : dflt);
}
@@ -484,6 +489,11 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
+ public boolean isGreedy() {
+ return proxy.isGreedy();
+ }
+
+ @Override
public Map<String, Object> getMetadata() {
return metadata;
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index c213098..80cd161 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -168,6 +168,9 @@ public interface NCModelView extends NCMetadata {
*/
boolean DFLT_IS_SPARSE = false;
+ // TODO:
+ boolean DFLT_IS_GREEDY = true;
+
/**
* Default value for {@link #getMaxElementSynonyms()} method.
*/
@@ -818,6 +821,10 @@ public interface NCModelView extends NCMetadata {
return DFLT_IS_SPARSE;
}
+ default boolean isGreedy() {
+ return DFLT_IS_GREEDY;
+ }
+
/**
* Gets optional user defined model metadata that can be set by the
developer and accessed later.
* By default, it returns an empty map. Note that this metadata is mutable
and can be
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index addca45..be7b995 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -36,6 +36,8 @@ public class NCElementJson {
private Boolean isPermutateSynonyms;
// Can be null.
private Boolean isSparse;
+ // Can be null.
+ private Boolean isGreedy;
public String getParentId() {
return parentId;
@@ -97,4 +99,10 @@ public class NCElementJson {
public void setSparse(Boolean sparse) {
isSparse = sparse;
}
+ public Boolean isGreedy() {
+ return isGreedy;
+ }
+ public void setGreedy(Boolean greedy) {
+ isGreedy = greedy;
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index d2459d3..f332e08 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -58,6 +58,7 @@ public class NCModelJson {
private int maxTotalSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
private boolean isPermutateSynonyms = DFLT_IS_PERMUTATE_SYNONYMS;
private boolean isSparse = DFLT_IS_SPARSE;
+ private boolean isGreedy = DFLT_IS_GREEDY;
private int maxElementSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
private boolean maxSynonymsThresholdError =
DFLT_MAX_SYNONYMS_THRESHOLD_ERROR;
private long conversationTimeout = DFLT_CONV_TIMEOUT_MS;
@@ -202,6 +203,12 @@ public class NCModelJson {
public boolean isSparse() {
return isSparse;
}
+ public boolean isGreedy() {
+ return isGreedy;
+ }
+ public void setGreedy(boolean greedy) {
+ isGreedy = greedy;
+ }
public void setSparse(boolean sparse) {
isSparse = sparse;
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
index 1d923ce..431097c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
@@ -242,6 +242,7 @@ object NCCommandManager extends NCService {
override def getMaxTotalSynonyms: Int
= mdl.getMaxTotalSynonyms
override def isNoUserTokensAllowed:
Boolean = mdl.isNoUserTokensAllowed
override def isSparse: Boolean =
mdl.isSparse
+ override def isGreedy: Boolean =
mdl.isGreedy
override def getMetadata:
util.Map[String, AnyRef] = mdl.getMetadata
override def getAdditionalStopWords:
util.Set[String] = mdl.getAdditionalStopWords
override def getExcludedStopWords:
util.Set[String] = mdl.getExcludedStopWords
@@ -269,6 +270,9 @@ object NCCommandManager extends NCService {
// New method instead of
`isSparse`
def getSparse: lang.Boolean
+
+ // New method instead of
`isGreedy`
+ def getGreedy: lang.Boolean
}
val elm: NCElement =
@@ -288,10 +292,12 @@ object NCCommandManager extends NCService {
// Hidden.
override def
isPermutateSynonyms: Optional[lang.Boolean] = null
override def isSparse:
Optional[lang.Boolean] = null
+ override def isGreedy:
Optional[lang.Boolean] = null
// Wrapped.
override def
getPermutateSynonyms: lang.Boolean = e.isPermutateSynonyms.orElse(null)
override def getSparse:
lang.Boolean = e.isSparse.orElse(null)
+ override def getGreedy:
lang.Boolean = e.isGreedy.orElse(null)
}
elm
}).asJava
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1f81711..51e624f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -151,7 +151,7 @@ object NCModelEnricher extends NCProbeEnricher {
/**
*
* @param ns
- * @param elem
+ * @param elemId
* @param toks
* @param direct
* @param syn
@@ -160,7 +160,7 @@ object NCModelEnricher extends NCProbeEnricher {
*/
private def mark(
ns: Sentence,
- elem: NCElement,
+ elemId: String,
toks: Seq[NlpToken],
direct: Boolean,
syn: Option[Synonym] = None,
@@ -189,7 +189,7 @@ object NCModelEnricher extends NCProbeEnricher {
val idxs = toks.map(_.index).sorted
- val note = NlpNote(idxs, elem.getId, params: _*)
+ val note = NlpNote(idxs, elemId, params: _*)
toks.foreach(_.add(note))
@@ -269,7 +269,7 @@ object NCModelEnricher extends NCProbeEnricher {
if (!alreadyMarked(ns, elmId, matchedToks,
matchedToks.map(_.index).sorted))
mark(
ns,
- elem = mdl.elements.getOrElse(elmId, throw new
NCE(s"Custom model parser returned unknown element: $elmId")),
+ elemId = elmId,
toks = matchedToks,
direct = true,
metaOpt = Some(e.getMetadata.asScala.toMap)
@@ -410,27 +410,28 @@ object NCModelEnricher extends NCProbeEnricher {
dbgType: String,
ns: Sentence,
contCache: Cache,
- elem: NCElement,
+ elemId: String,
+ greedy: Boolean,
elemToks: Seq[NlpToken],
sliceToksIdxs: Seq[Int],
syn: Synonym,
- parts: Seq[TokType] = Seq.empty)
- : Unit = {
+ parts: Seq[TokType] = Seq.empty
+ ): Unit = {
val resIdxs = elemToks.map(_.index)
val resIdxsSorted = resIdxs.sorted
if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
- contCache(elem.getId) += sliceToksIdxs
+ contCache(elemId) += sliceToksIdxs
- val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)
+ val ok = !greedy || !alreadyMarked(ns, elemId, elemToks, sliceToksIdxs)
if (ok)
- mark(ns, elem, elemToks, direct = syn.isDirect &&
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
+ mark(ns, elemId, elemToks, direct = syn.isDirect &&
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
- if (DEEP_DEBUG)
- logger.trace(
+// if (DEEP_DEBUG)
+ logger.info(
s"${if (ok) "Added" else "Skipped"} element [" +
- s"id=${elem.getId}, " +
+ s"id=$elemId, " +
s"type=$dbgType, " +
s"text='${elemToks.map(_.origText).mkString(" ")}', " +
s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
@@ -466,10 +467,11 @@ object NCModelEnricher extends NCProbeEnricher {
toks <- combToks;
idxs = toks.map(_.index);
e <- mdl.elements.values;
- eId = e.getId
+ eId = e.getId;
+ greedy = e.isGreedy.orElse(mdl.model.isGreedy)
if
- !contCache(eId).exists(_.containsSlice(idxs)) &&
- !alreadyMarked(ns, eId, toks, idxs)
+ !greedy ||
+ !contCache(eId).exists(_.containsSlice(idxs)) &&
!alreadyMarked(ns, eId, toks, idxs)
) {
// 1. SIMPLE.
if (simpleEnabled && (if (idlEnabled)
mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
@@ -485,7 +487,7 @@ object NCModelEnricher extends NCProbeEnricher {
syns.get(tokStems) match {
case Some(s) =>
found = true
- add("simple continuous",
ns, contCache, e, toks, idxs, s)
+ add("simple continuous",
ns, contCache, eId, greedy, toks, idxs, s)
case None => notFound()
}
@@ -493,7 +495,7 @@ object NCModelEnricher extends NCProbeEnricher {
for (s <- syns if !found)
if (s.isMatch(toks)) {
found = true
- add("simple continuous
scan", ns, contCache, e, toks, idxs, s)
+ add("simple continuous
scan", ns, contCache, eId, greedy, toks, idxs, s)
}
tryMap(
@@ -512,7 +514,7 @@ object NCModelEnricher extends NCProbeEnricher {
if (!found && mdl.hasSparseSynonyms)
for (s <- get(mdl.sparseSynonyms, eId))
s.sparseMatch(toks) match {
- case Some(res) => add("simple sparse",
ns, contCache, e, res, idxs, s)
+ case Some(res) => add("simple sparse",
ns, contCache, eId, greedy, res, idxs, s)
case None => // No-op.
}
}
@@ -534,7 +536,7 @@ object NCModelEnricher extends NCProbeEnricher {
data = comb.map(_.data)
)
if (s.isMatch(data, req)) {
- add("IDL continuous", ns, contCache,
e, toks, idxs, s, toParts(data, s))
+ add("IDL continuous", ns, contCache,
eId, greedy, toks, idxs, s, toParts(data, s))
idlCache += comb
@@ -551,7 +553,7 @@ object NCModelEnricher extends NCProbeEnricher {
case Some(res) =>
val typ = if (s.sparse) "IDL
sparse" else "IDL continuous"
- add(typ, ns, contCache, e,
toTokens(res, ns), idxs, s, toParts(res, s))
+ add(typ, ns, contCache, eId,
greedy, toTokens(res, ns), idxs, s, toParts(res, s))
idlCache += comb
case None => // No-op.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 74ead87..17d4f6c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -30,7 +30,7 @@ import java.util
import java.util.{List => JList}
import scala.collection.mutable
import scala.collection.parallel.CollectionConverters._
-import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava,
SetHasAsJava}
+import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava,
SetHasAsJava, SetHasAsScala}
import scala.language.implicitConversions
/**
@@ -732,18 +732,24 @@ object NCSentenceManager extends NCService {
)
)
+
def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] =
s.flatten.filter(!_.isNlp)
// Drops similar sentences (with same notes structure). Keeps with
more found.
+ val notGreedyElems: Set[String] = mdl.getElements.asScala.flatMap(e =>
if (e.isGreedy.orElse(mdl.isGreedy)) Some(e.getId) else None).toSet
+
sens =
sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
- flatMap(p => {
- val m: Map[NCNlpSentence, Int] = p._2.map(p => p ->
notNlpNotes(p).size).toMap
+ flatMap { case (types, sensSeq) =>
+ if (types.exists(notGreedyElems.contains))
+ sensSeq
+ else {
+ val m: Map[NCNlpSentence, Int] = sensSeq.map(p => p ->
notNlpNotes(p).size).toMap
- val max = m.values.max
+ val max = m.values.max
- m.filter(_._2 == max).keys
- }).
- toSeq
+ m.filter(_._2 == max).keys
+ }
+ }.toSeq
sens =
sens.filter(s => {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
index 3598d2e..12bb325 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
@@ -19,7 +19,8 @@ package org.apache.nlpcraft
import org.apache.nlpcraft.model.{NCElement, NCValue}
-import java.util
+import java.{lang, util}
+import java.util.Optional
import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
import scala.language.implicitConversions
@@ -29,9 +30,23 @@ import scala.language.implicitConversions
case class NCTestElement(id: String, syns: String*) extends NCElement {
private val values = new util.ArrayList[NCValue]
+ var metadata = super.getMetadata
+ var description = super.getDescription
+ var parentId = super.getParentId
+ var permutateSynonyms = super.isPermutateSynonyms
+ var sparse = super.isSparse
+ var greedy = super.isGreedy
+
override def getId: String = id
override def getSynonyms: util.List[String] = (syns :+ id).asJava
override def getValues: util.List[NCValue] = values
+
+ override def getMetadata: util.Map[String, AnyRef] = metadata
+ override def getDescription: String = description
+ override def getParentId: String = parentId
+ override def isPermutateSynonyms: Optional[lang.Boolean] =
permutateSynonyms
+ override def isSparse: Optional[lang.Boolean] = sparse
+ override def isGreedy: Optional[lang.Boolean] = greedy
}
/**
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
index 4a80e48..8670848 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
@@ -17,28 +17,27 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
-import org.apache.nlpcraft.model.{NCElement, NCModelAdapter}
-
import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch,
NCModelAdapter, NCResult}
import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
import org.junit.jupiter.api.Test
import java.util
+import java.util.Optional
+import scala.jdk.CollectionConverters.SetHasAsJava
+
+class NCNestedTestModel6 extends NCModelAdapter("nlpcraft.nested5.test.mdl",
"Test Model", "1.0") {
+ override def getAbstractTokens: util.Set[String] = Set("a", "b",
"any").asJava
-class NCNestedTestModel6 extends NCModelAdapter(
- "nlpcraft.nested5.test.mdl", "Nested Data Test Model", "1.0"
-) {
override def getElements: util.Set[NCElement] =
Set(
NCTestElement("a"),
NCTestElement("b"),
- NCTestElement("any", "{//[a-zA-Z0-9]+//}[1, 3]"),
- NCTestElement("compose", "^^[aRef]{tok_id() == 'a'}^^
^^[anRef]{tok_id() == 'any'}^^ ^^[bRef]{tok_id() == 'b'}^^"),
+ { val e = NCTestElement("any", "{//[a-zA-Z0-9]+//}[1, 3]");
e.greedy = Optional.of(false); e },
+ NCTestElement("compose", "^^[aRef]{tok_id() == 'a'}^^
^^[anRef]{tok_id() == 'any'}^^ ^^[bRef]{tok_id() == 'b'}^^")
)
@NCIntent("intent=compose term(city)={tok_id() == 'compose'}")
private def onCompose(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
}
-
/**
*
*/
@@ -48,5 +47,6 @@ class NCEnricherNestedModelSpec6 extends NCTestContext {
def test(): Unit = {
checkIntent("a t1 t2 t3 b", "compose")
checkIntent("a t1 t2 b", "compose")
+ checkIntent("a t1 b", "compose")
}
}