This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-431
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-431 by this push:
     new 2175d4d  WIP.
2175d4d is described below

commit 2175d4dfb1af74fab8dba66a749326b1ea53935e
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Aug 27 14:17:18 2021 +0300

    WIP.
---
 .../scala/org/apache/nlpcraft/model/NCElement.java |  5 +++
 .../apache/nlpcraft/model/NCModelFileAdapter.java  | 10 +++++
 .../org/apache/nlpcraft/model/NCModelView.java     |  7 ++++
 .../nlpcraft/model/impl/json/NCElementJson.java    |  8 ++++
 .../nlpcraft/model/impl/json/NCModelJson.java      |  7 ++++
 .../nlpcraft/probe/mgrs/cmd/NCCommandManager.scala |  6 +++
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 44 +++++++++++-----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 20 ++++++----
 .../scala/org/apache/nlpcraft/NCTestElement.scala  | 17 ++++++++-
 .../model/NCEnricherNestedModelSpec6.scala         | 16 ++++----
 10 files changed, 103 insertions(+), 37 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 9f5872a..b5b6cbd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -382,4 +382,9 @@ public interface NCElement extends NCMetadata, Serializable 
{
     default Optional<Boolean> isSparse() {
         return Optional.empty();
     }
+
+    // TODO:
+    default Optional<Boolean> isGreedy() {
+        return Optional.empty();
+    }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index c313bf7..efa2b68 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -357,6 +357,11 @@ abstract public class NCModelFileAdapter extends 
NCModelAdapter {
                             return nvl(js.isSparse(), proxy.isSparse());
                         }
 
+                        @Override
+                        public Optional<Boolean> isGreedy() {
+                            return nvl(js.isGreedy(), proxy.isGreedy());
+                        }
+
                         private<T> Optional<T> nvl(T t, T dflt) {
                             return Optional.of(t != null ? t : dflt);
                         }
@@ -484,6 +489,11 @@ abstract public class NCModelFileAdapter extends 
NCModelAdapter {
     }
 
     @Override
+    public boolean isGreedy() {
+        return proxy.isGreedy();
+    }
+
+    @Override
     public Map<String, Object> getMetadata() {
         return metadata;
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index c213098..80cd161 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -168,6 +168,9 @@ public interface NCModelView extends NCMetadata {
      */
     boolean DFLT_IS_SPARSE = false;
 
+    // TODO:
+    boolean DFLT_IS_GREEDY = true;
+
     /**
      * Default value for {@link #getMaxElementSynonyms()} method.
      */
@@ -818,6 +821,10 @@ public interface NCModelView extends NCMetadata {
         return DFLT_IS_SPARSE;
     }
 
+    default boolean isGreedy() {
+        return DFLT_IS_GREEDY;
+    }
+
     /**
      * Gets optional user defined model metadata that can be set by the 
developer and accessed later.
      * By default, it returns an empty map. Note that this metadata is mutable 
and can be
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index addca45..be7b995 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -36,6 +36,8 @@ public class NCElementJson {
     private Boolean isPermutateSynonyms;
     // Can be null.
     private Boolean isSparse;
+    // Can be null.
+    private Boolean isGreedy;
 
     public String getParentId() {
         return parentId;
@@ -97,4 +99,10 @@ public class NCElementJson {
     public void setSparse(Boolean sparse) {
         isSparse = sparse;
     }
+    public Boolean isGreedy() {
+        return isGreedy;
+    }
+    public void setGreedy(Boolean greedy) {
+        isGreedy = greedy;
+    }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index d2459d3..f332e08 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -58,6 +58,7 @@ public class NCModelJson {
     private int maxTotalSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
     private boolean isPermutateSynonyms = DFLT_IS_PERMUTATE_SYNONYMS;
     private boolean isSparse = DFLT_IS_SPARSE;
+    private boolean isGreedy = DFLT_IS_GREEDY;
     private int maxElementSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
     private boolean maxSynonymsThresholdError = 
DFLT_MAX_SYNONYMS_THRESHOLD_ERROR;
     private long conversationTimeout = DFLT_CONV_TIMEOUT_MS;
@@ -202,6 +203,12 @@ public class NCModelJson {
     public boolean isSparse() {
         return isSparse;
     }
+    public boolean isGreedy() {
+        return isGreedy;
+    }
+    public void setGreedy(boolean greedy) {
+        isGreedy = greedy;
+    }
     public void setSparse(boolean sparse) {
         isSparse = sparse;
     }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
index 1d923ce..431097c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/cmd/NCCommandManager.scala
@@ -242,6 +242,7 @@ object NCCommandManager extends NCService {
                                         override def getMaxTotalSynonyms: Int 
= mdl.getMaxTotalSynonyms
                                         override def isNoUserTokensAllowed: 
Boolean = mdl.isNoUserTokensAllowed
                                         override def isSparse: Boolean = 
mdl.isSparse
+                                        override def isGreedy: Boolean = 
mdl.isGreedy
                                         override def getMetadata: 
util.Map[String, AnyRef] = mdl.getMetadata
                                         override def getAdditionalStopWords: 
util.Set[String] = mdl.getAdditionalStopWords
                                         override def getExcludedStopWords: 
util.Set[String] = mdl.getExcludedStopWords
@@ -269,6 +270,9 @@ object NCCommandManager extends NCService {
 
                                                 // New method instead of 
`isSparse`
                                                 def getSparse: lang.Boolean
+
+                                                // New method instead of 
`isGreedy`
+                                                def getGreedy: lang.Boolean
                                             }
 
                                             val elm: NCElement =
@@ -288,10 +292,12 @@ object NCCommandManager extends NCService {
                                                     // Hidden.
                                                     override def 
isPermutateSynonyms: Optional[lang.Boolean] = null
                                                     override def isSparse: 
Optional[lang.Boolean] = null
+                                                    override def isGreedy: 
Optional[lang.Boolean] = null
 
                                                     // Wrapped.
                                                     override def 
getPermutateSynonyms: lang.Boolean = e.isPermutateSynonyms.orElse(null)
                                                     override def getSparse: 
lang.Boolean = e.isSparse.orElse(null)
+                                                    override def getGreedy: 
lang.Boolean = e.isGreedy.orElse(null)
                                                 }
                                             elm
                                         }).asJava
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1f81711..51e624f 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -151,7 +151,7 @@ object NCModelEnricher extends NCProbeEnricher {
     /**
       *
       * @param ns
-      * @param elem
+      * @param elemId
       * @param toks
       * @param direct
       * @param syn
@@ -160,7 +160,7 @@ object NCModelEnricher extends NCProbeEnricher {
       */
     private def mark(
         ns: Sentence,
-        elem: NCElement,
+        elemId: String,
         toks: Seq[NlpToken],
         direct: Boolean,
         syn: Option[Synonym] = None,
@@ -189,7 +189,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
         val idxs = toks.map(_.index).sorted
 
-        val note = NlpNote(idxs, elem.getId, params: _*)
+        val note = NlpNote(idxs, elemId, params: _*)
 
         toks.foreach(_.add(note))
 
@@ -269,7 +269,7 @@ object NCModelEnricher extends NCProbeEnricher {
                         if (!alreadyMarked(ns, elmId, matchedToks, 
matchedToks.map(_.index).sorted))
                             mark(
                                 ns,
-                                elem = mdl.elements.getOrElse(elmId, throw new 
NCE(s"Custom model parser returned unknown element: $elmId")),
+                                elemId = elmId,
                                 toks = matchedToks,
                                 direct = true,
                                 metaOpt = Some(e.getMetadata.asScala.toMap)
@@ -410,27 +410,28 @@ object NCModelEnricher extends NCProbeEnricher {
         dbgType: String,
         ns: Sentence,
         contCache: Cache,
-        elem: NCElement,
+        elemId: String,
+        greedy: Boolean,
         elemToks: Seq[NlpToken],
         sliceToksIdxs: Seq[Int],
         syn: Synonym,
-        parts: Seq[TokType] = Seq.empty)
-    : Unit = {
+        parts: Seq[TokType] = Seq.empty
+    ): Unit = {
         val resIdxs = elemToks.map(_.index)
         val resIdxsSorted = resIdxs.sorted
 
         if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
-            contCache(elem.getId) += sliceToksIdxs
+            contCache(elemId) += sliceToksIdxs
 
-        val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)
+        val ok = !greedy || !alreadyMarked(ns, elemId, elemToks, sliceToksIdxs)
 
         if (ok)
-            mark(ns, elem, elemToks, direct = syn.isDirect && 
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
+            mark(ns, elemId, elemToks, direct = syn.isDirect && 
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
 
-        if (DEEP_DEBUG)
-            logger.trace(
+//        if (DEEP_DEBUG)
+            logger.info(
                 s"${if (ok) "Added" else "Skipped"} element [" +
-                    s"id=${elem.getId}, " +
+                    s"id=$elemId, " +
                     s"type=$dbgType, " +
                     s"text='${elemToks.map(_.origText).mkString(" ")}', " +
                     s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
@@ -466,10 +467,11 @@ object NCModelEnricher extends NCProbeEnricher {
                         toks <- combToks;
                         idxs = toks.map(_.index);
                         e <- mdl.elements.values;
-                        eId = e.getId
+                        eId = e.getId;
+                        greedy = e.isGreedy.orElse(mdl.model.isGreedy)
                         if
-                            !contCache(eId).exists(_.containsSlice(idxs)) &&
-                            !alreadyMarked(ns, eId, toks, idxs)
+                            !greedy ||
+                            !contCache(eId).exists(_.containsSlice(idxs))  && 
!alreadyMarked(ns, eId, toks, idxs)
                     ) {
                         // 1. SIMPLE.
                         if (simpleEnabled && (if (idlEnabled) 
mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
@@ -485,7 +487,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                             syns.get(tokStems) match {
                                                 case Some(s) =>
                                                     found = true
-                                                    add("simple continuous", 
ns, contCache, e, toks, idxs, s)
+                                                    add("simple continuous", 
ns, contCache, eId, greedy, toks, idxs, s)
                                                 case None => notFound()
                                             }
 
@@ -493,7 +495,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                             for (s <- syns if !found)
                                                 if (s.isMatch(toks)) {
                                                     found = true
-                                                    add("simple continuous 
scan", ns, contCache, e, toks, idxs, s)
+                                                    add("simple continuous 
scan", ns, contCache, eId, greedy, toks, idxs, s)
                                                 }
 
                                         tryMap(
@@ -512,7 +514,7 @@ object NCModelEnricher extends NCProbeEnricher {
                             if (!found && mdl.hasSparseSynonyms)
                                 for (s <- get(mdl.sparseSynonyms, eId))
                                     s.sparseMatch(toks) match {
-                                        case Some(res) => add("simple sparse", 
ns, contCache, e, res, idxs, s)
+                                        case Some(res) => add("simple sparse", 
ns, contCache, eId, greedy, res, idxs, s)
                                         case None => // No-op.
                                     }
                         }
@@ -534,7 +536,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     data = comb.map(_.data)
                                 )
                                     if (s.isMatch(data, req)) {
-                                        add("IDL continuous", ns, contCache, 
e, toks, idxs, s, toParts(data, s))
+                                        add("IDL continuous", ns, contCache, 
eId, greedy, toks, idxs, s, toParts(data, s))
 
                                         idlCache += comb
 
@@ -551,7 +553,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                         case Some(res) =>
                                             val typ = if (s.sparse) "IDL 
sparse" else "IDL continuous"
 
-                                            add(typ, ns, contCache, e, 
toTokens(res, ns), idxs, s, toParts(res, s))
+                                            add(typ, ns, contCache, eId, 
greedy, toTokens(res, ns), idxs, s, toParts(res, s))
 
                                             idlCache += comb
                                         case None => // No-op.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 74ead87..17d4f6c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -30,7 +30,7 @@ import java.util
 import java.util.{List => JList}
 import scala.collection.mutable
 import scala.collection.parallel.CollectionConverters._
-import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava, 
SetHasAsJava}
+import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava, 
SetHasAsJava, SetHasAsScala}
 import scala.language.implicitConversions
 
 /**
@@ -732,18 +732,24 @@ object NCSentenceManager extends NCService {
             )
         )
 
+
         def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = 
s.flatten.filter(!_.isNlp)
 
         // Drops similar sentences (with same notes structure). Keeps with 
more found.
+        val notGreedyElems: Set[String] = mdl.getElements.asScala.flatMap(e => 
if (e.isGreedy.orElse(mdl.isGreedy)) Some(e.getId) else None).toSet
+
         sens = 
sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
-            flatMap(p => {
-                val m: Map[NCNlpSentence, Int] = p._2.map(p => p -> 
notNlpNotes(p).size).toMap
+            flatMap { case (types, sensSeq) =>
+                if (types.exists(notGreedyElems.contains))
+                    sensSeq
+                else {
+                    val m: Map[NCNlpSentence, Int] = sensSeq.map(p => p -> 
notNlpNotes(p).size).toMap
 
-                val max = m.values.max
+                    val max = m.values.max
 
-                m.filter(_._2 == max).keys
-            }).
-            toSeq
+                    m.filter(_._2 == max).keys
+                }
+            }.toSeq
 
         sens =
             sens.filter(s => {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
index 3598d2e..12bb325 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
@@ -19,7 +19,8 @@ package org.apache.nlpcraft
 
 import org.apache.nlpcraft.model.{NCElement, NCValue}
 
-import java.util
+import java.{lang, util}
+import java.util.Optional
 import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
 import scala.language.implicitConversions
 
@@ -29,9 +30,23 @@ import scala.language.implicitConversions
 case class NCTestElement(id: String, syns: String*) extends NCElement {
     private val values = new util.ArrayList[NCValue]
 
+    var metadata = super.getMetadata
+    var description = super.getDescription
+    var parentId = super.getParentId
+    var permutateSynonyms = super.isPermutateSynonyms
+    var sparse = super.isSparse
+    var greedy = super.isGreedy
+
     override def getId: String = id
     override def getSynonyms: util.List[String] = (syns :+ id).asJava
     override def getValues: util.List[NCValue] = values
+
+    override def getMetadata: util.Map[String, AnyRef] = metadata
+    override def getDescription: String = description
+    override def getParentId: String = parentId
+    override def isPermutateSynonyms: Optional[lang.Boolean] = 
permutateSynonyms
+    override def isSparse: Optional[lang.Boolean] = sparse
+    override def isGreedy: Optional[lang.Boolean] = greedy
 }
 
 /**
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
index 4a80e48..8670848 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec6.scala
@@ -17,28 +17,27 @@
 
 package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
-import org.apache.nlpcraft.model.{NCElement, NCModelAdapter}
-
 import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, 
NCModelAdapter, NCResult}
 import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
 import org.junit.jupiter.api.Test
 
 import java.util
+import java.util.Optional
+import scala.jdk.CollectionConverters.SetHasAsJava
+
+class NCNestedTestModel6 extends NCModelAdapter("nlpcraft.nested5.test.mdl", 
"Test Model", "1.0") {
+    override def getAbstractTokens: util.Set[String] = Set("a", "b", 
"any").asJava
 
-class NCNestedTestModel6 extends NCModelAdapter(
-    "nlpcraft.nested5.test.mdl", "Nested Data Test Model", "1.0"
-) {
     override def getElements: util.Set[NCElement] =
         Set(
             NCTestElement("a"),
             NCTestElement("b"),
-            NCTestElement("any", "{//[a-zA-Z0-9]+//}[1, 3]"),
-            NCTestElement("compose", "^^[aRef]{tok_id() == 'a'}^^ 
^^[anRef]{tok_id() == 'any'}^^ ^^[bRef]{tok_id() == 'b'}^^"),
+            { val e = NCTestElement("any", "{//[a-zA-Z0-9]+//}[1, 3]"); 
e.greedy = Optional.of(false); e },
+            NCTestElement("compose", "^^[aRef]{tok_id() == 'a'}^^ 
^^[anRef]{tok_id() == 'any'}^^ ^^[bRef]{tok_id() == 'b'}^^")
         )
     @NCIntent("intent=compose term(city)={tok_id() == 'compose'}")
     private def onCompose(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
 }
-
 /**
   *
   */
@@ -48,5 +47,6 @@ class NCEnricherNestedModelSpec6 extends NCTestContext {
     def test(): Unit = {
         checkIntent("a t1 t2 t3 b", "compose")
         checkIntent("a t1 t2 b", "compose")
+        checkIntent("a t1 b", "compose")
     }
 }

Reply via email to