This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 2b8bef53b14aa3821ea3323a639f292dd0cfe5bf Author: Sergey Kamov <[email protected]> AuthorDate: Thu Apr 15 11:42:20 2021 +0300 WIP. --- .../org/apache/nlpcraft/common/util/NCUtils.scala | 6 ++- .../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 10 +---- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 51 ++++++++-------------- 3 files changed, 24 insertions(+), 43 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala index 5d25c25..636878b 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala @@ -2131,10 +2131,12 @@ object NCUtils extends LazyLogging { def isContinuous(idxs: Seq[Int]): Boolean = { require(idxs.nonEmpty) - idxs.size match { + val list = idxs.view + + list.size match { case 0 ⇒ throw new AssertionError() case 1 ⇒ true - case _ ⇒ idxs.zip(idxs.tail).forall { case (x, y) ⇒ x + 1 == y } + case _ ⇒ list.zip(list.tail).forall { case (x, y) ⇒ x + 1 == y } } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala index 207bc02..b944ccc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala @@ -134,15 +134,7 @@ class NCProbeSynonym( state = -1 } - if ( - state != -1 && - all.size == res.size - && - ( - !shouldBeNeighbors || - U.isIncreased(res.map(getIndex).sorted) - ) - ) + if (state != -1 && all.size == res.size && (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).sorted))) Some(res) else None diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index d85f429..353687b 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -156,16 +156,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { * @param toks * @param direct * @param syn - * @param metaOpt * @param parts - * @param continuous + * @param metaOpt */ private def mark( ns: NCNlpSentence, elem: NCElement, toks: Seq[NlpToken], direct: Boolean, - continuous: java.lang.Boolean, syn: Option[Synonym] = None, parts: Seq[TokType] = Seq.empty, metaOpt: Option[Map[String, Object]] = None @@ -175,10 +173,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { // For system elements. params += "direct" → direct.asInstanceOf[AnyRef] - // Internal usage. - params += "sortedTokensIndexes" → toks.map(_.index).sorted.asJava - params += "continuous" → continuous - syn match { case Some(s) ⇒ if (s.isValueSynonym) @@ -287,15 +281,12 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { ) // Checks element's tokens. - val idxs = matchedToks.map(_.index).sorted - - if (!alreadyMarked(ns, elemId, matchedToks, idxs)) + if (!alreadyMarked(ns, elemId, matchedToks, matchedToks.map(_.index).sorted)) mark( ns, elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")), toks = matchedToks, direct = true, - U.isContinuous(idxs), metaOpt = Some(e.getMetadata.asScala) ) }) @@ -443,9 +434,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { : Unit = { val resIdxs = res.map(_.index) - val continuous = U.isContinuous(resIdxs.sorted) - - if (continuous && resIdxs == allToksIdxs) + if (U.isContinuous(resIdxs.sorted) && resIdxs == allToksIdxs) contCache(elm.getId) += allToksIdxs val ok = !alreadyMarked(ns, elm.getId, res, allToksIdxs) @@ -453,7 +442,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { if (ok) { val direct = syn.isDirect && U.isIncreased(resIdxs) - mark(ns, elm, res, direct, continuous, syn = Some(syn), parts) + mark(ns, elm, res, direct, syn = Some(syn), parts) } if (DEEP_DEBUG) @@ -464,7 +453,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { s"text='${res.map(_.origText).mkString(" ")}', " + s"indexes=${resIdxs.mkString("[", ",", "]")}, " + s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + - s"continuous=$continuous, " + s"synonym=$syn" + s"]" ) @@ -572,27 +560,26 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { /** * - * @param ns * @param elemId * @param toks * @param allSortedSliceIdxs */ private def alreadyMarked(ns: NCNlpSentence, elemId: String, toks: Seq[NlpToken], allSortedSliceIdxs: Seq[Int]): Boolean = { lazy val toksIdxsSorted = toks.map(_.index).sorted - lazy val isCont = U.isContinuous(toksIdxsSorted) - - ns.flatten.exists( - n ⇒ - n.noteType == elemId && - { - lazy val nToksIdxsSorted = n.data[JList[Int]]("sortedTokensIndexes").asScala - - n.data[Boolean]("continuous") && allSortedSliceIdxs.containsSlice(nToksIdxsSorted) || - { - nToksIdxsSorted == toksIdxsSorted || - isCont && U.isContinuous(nToksIdxsSorted) && nToksIdxsSorted.containsSlice(toksIdxsSorted) - } - } - ) + + allSortedSliceIdxs.map(ns).forall(_.exists(n ⇒ n.noteType == elemId && n.isContiguous)) || + toks.exists(_.exists(n ⇒ + n.noteType == elemId && + ( + (n.isContiguous && + (allSortedSliceIdxs.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted)) + ) + || + ( + n.tokenIndexes == toksIdxsSorted || + n.tokenIndexes.containsSlice(toksIdxsSorted) && U.isContinuous(toksIdxsSorted) && U.isContinuous(n.tokenIndexes) + ) + ) + )) } } \ No newline at end of file
