This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 6e8deb8ff5c08c1e764c0ed313fe77f51c7eaccf Author: Sergey Kamov <[email protected]> AuthorDate: Tue Apr 13 13:04:02 2021 +0300 WIP. --- .../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 12 +++++----- .../probe/mgrs/deploy/NCDeployManager.scala | 2 +- .../nlpcraft/probe/mgrs/model/NCModelManager.scala | 6 ++--- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 26 +++++----------------- 4 files changed, 15 insertions(+), 31 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala index bc41b96..7a7d794 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala @@ -228,9 +228,11 @@ class NCProbeSynonym( } override def toString(): String = mkString(" ") - + // Orders synonyms from least to most significant. override def compare(that: NCProbeSynonym): Int = { + require(sparse == that.sparse, s"Invalid comparing [this=$this, that=$that]") + def compareIsValueSynonym(): Int = isValueSynonym match { case true if !that.isValueSynonym ⇒ 1 @@ -257,14 +259,10 @@ class NCProbeSynonym( 1 else if (!isDirect && that.isDirect) -1 - else if (sparse && !that.sparse) - 1 - else if (!sparse && that.sparse) - -1 else if (permute && !that.permute) - 1 - else if (!permute && that.permute) -1 + else if (!permute && that.permute) + 1 else // Both direct or indirect. isTextOnly match { case true if !that.isTextOnly ⇒ 1 diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala index d908b62..aeda6c7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala @@ -276,7 +276,7 @@ object NCDeployManager extends NCService with DecorateAsScala { add(seq, isDirect = seq == chunks, perm = true, sparse = hasDsl(seq)) ) else - add(chunks, isDirect = true, perm = false, sparse = hasDsl(chunks) || (sparseFlag && chunks.size > 1)) + add(chunks, isDirect = true, perm = permuteFlag, sparse = hasDsl(chunks) || (sparseFlag && chunks.size > 1)) } /** diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala index 9970e19..aa1dbf4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala @@ -58,9 +58,9 @@ object NCModelManager extends NCService with DecorateAsScala { data.values.foreach(w ⇒ { val mdl = w.model - println("w.directSynonyms="+w.continuousSynonyms.getOrElse("col:orders_shipped_date", Map.empty).mkString("\n")) - println("w.sparseSynonyms="+w.sparseSynonyms.getOrElse("col:orders_shipped_date", Seq.empty).mkString("\n")) - println("w.synonymsDsl="+w.dslSynonyms.getOrElse("col:orders_shipped_date", Seq.empty).mkString("\n")) + println("w.directSynonyms="+w.continuousSynonyms.getOrElse("col:orders_order_date", Map.empty).mkString("\n")) + println("w.sparseSynonyms="+w.sparseSynonyms.getOrElse("col:orders_order_date", Seq.empty).mkString("\n")) + println("w.synonymsDsl="+w.dslSynonyms.getOrElse("col:orders_order_date", Seq.empty).mkString("\n")) println val synСontCnt = w.continuousSynonyms.flatMap(_._2.map(_._2.count)).sum diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index 4d78847..adf1358 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -295,7 +295,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { ).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w")) ) - // Checks element's tokens. val idxs = matchedToks.map(_.index) val continuous = U.isContinuous(idxs.sorted) @@ -350,23 +349,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { toks.flatten.exists(n ⇒ n.noteType == elemId && { - val res = if (n.data("continuous").asInstanceOf[Boolean]) - true + false else { if (continuous) false else n.data("allToksIndexes").asInstanceOf[JList[Int]].asScala.containsSlice(allToksIndexes) } - -// println(s"n=$n") -// println(s"res=$res, continuous=$continuous, toksIdxs=${toks.map(_.index)}, all="+n.data("allToksIndexes")) -// println - - - res - } ) @@ -515,7 +505,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { val continuous = U.isContinuous(resIdxs.sorted) - if (continuous) + if (continuous && resIdxs == allToksIdxs) contCache(elm.getId) += allToksIdxs val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId) @@ -528,15 +518,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { if (DEEP_DEBUG) println( - s"Found element [" + + s"${if (added) "Added" else "Skipped"} element [" + s"id=${elm.getId}, " + s"type=$typ, " + s"text='${res.map(_.origText).mkString(" ")}', " + s"indexes=${resIdxs.mkString("[", ",", "]")}, " + s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + s"continuous=$continuous, " + - s"synonym=$s, " + - s"added=$added" + + s"synonym=$s" + s"]" ) } @@ -549,10 +538,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { // Attempt to match each element. for ( elm ← mdl.elements.values; - elemId = elm.getId; - contProc = contCache(elemId).exists(_.containsSlice(tokIdxs)) + elemId = elm.getId if - !contProc && + !contCache(elemId).exists(_.containsSlice(tokIdxs)) && // Checks whole tokens slice. !alreadyMarked(toks, tokIdxs, continuous = true, elemId) ) { @@ -624,8 +612,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { } } } - - } if (DEEP_DEBUG)
