[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sun, 19 Sep 2021 04:07:15 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 8f0b0d9  WIP.
8f0b0d9 is described below

commit 8f0b0d9c94df53e8bd1a056516a10e2407b0a84c
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 14:07:02 2021 +0300

    WIP.
---
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  2 +-
 .../apache/nlpcraft/model/impl/NCTokenImpl.scala   | 31 ++++++++++------------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  5 ++--
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 45fc3a8..1574787 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -89,7 +89,7 @@ class NCNlpSentenceNote(private val values: Map[String, 
JSerializable]) extends
         this.noteType == n.noteType &&
         this.wordIndexes.size == n.wordIndexes.size &&
         this.wordIndexes.zip(n.wordIndexes).map(p => p._1 - 
p._2).distinct.size == 1 &&
-        this.clone(Seq(0), Seq(0)) == n.clone(Seq(0), Seq(0))
+        this.filter(p => !SKIP_CLONE.contains(p._1)) == n.filter(p => 
!SKIP_CLONE.contains(p._1))
 
     /**
       *
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 4b2f251..1bd9add 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -17,17 +17,16 @@
 
 package org.apache.nlpcraft.model.impl
 
-import java.io.{Serializable => JSerializable}
-import java.util.Collections
-import java.util.{List => JList}
-
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeModel
 
+import java.io.{Serializable => JSerializable}
+import java.lang
+import java.util.{Collections, List => JList}
 import scala.collection.mutable
-import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, 
MapHasAsScala, SeqHasAsJava}
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava}
 
 /**
   *
@@ -49,9 +48,9 @@ private[nlpcraft] class NCTokenImpl(
     value: String,
     startCharIndex: Int,
     endCharIndex: Int,
-    meta: Map[String, Object],
+    meta: java.util.Map[String, Object],
     isAbstractProp: Boolean
-) extends NCMetadataAdapter(new java.util.HashMap(mutable.HashMap(meta.toSeq:_ 
*).asJava)) with NCToken with JSerializable {
+) extends NCMetadataAdapter(meta) with NCToken with JSerializable {
     require(mdl != null)
     require(srvReqId != null)
     require(id != null)
@@ -105,12 +104,12 @@ private[nlpcraft] object NCTokenImpl {
         // nlpcraft:nlp and some optional (after collapsing).
         require(tok.size <= 2, s"Unexpected token [size=${tok.size}, 
token=$tok]")
 
-        val md = mutable.HashMap.empty[String, JSerializable]
+        val md = new java.util.HashMap[String, AnyRef]()
 
         tok.foreach(n => {
             val id = n.noteType.toLowerCase
 
-            n.asMetadata().foreach { case (k, v) => md += s"$id:$k" -> v}
+            n.asMetadata().foreach { case (k, v) => md.put(s"$id:$k", 
v.asInstanceOf[AnyRef]) }
         })
 
         val usrNotes = tok.filter(_.isUser)
@@ -118,8 +117,6 @@ private[nlpcraft] object NCTokenImpl {
         // No overlapping allowed at this point.
         require(usrNotes.size <= 1, s"Unexpected elements notes: $usrNotes")
 
-        def convertMeta(): ScalaMeta = md.toMap.map(p => p._1 -> 
p._2.asInstanceOf[AnyRef])
-
         usrNotes.headOption match {
             case Some(usrNote) =>
                 require(mdl.elements.contains(usrNote.noteType), s"Element is 
not found: ${usrNote.noteType}")
@@ -139,9 +136,9 @@ private[nlpcraft] object NCTokenImpl {
                 }
 
                 // Special synthetic meta data element.
-                md.put("nlpcraft:nlp:freeword", false)
+                md.put("nlpcraft:nlp:freeword", java.lang.Boolean.FALSE)
 
-                elm.getMetadata.asScala.foreach { case (k, v) => md.put(k, 
v.asInstanceOf[JSerializable]) }
+                md.putAll(elm.getMetadata)
 
                 new NCTokenImpl(
                     mdl.model,
@@ -153,7 +150,7 @@ private[nlpcraft] object NCTokenImpl {
                     value = usrNote.dataOpt("value").orNull,
                     startCharIndex = tok.startCharIndex,
                     endCharIndex = tok.endCharIndex,
-                    meta = convertMeta(),
+                    meta = md,
                     isAbstractProp = 
mdl.model.getAbstractTokens.contains(elm.getId)
                 )
 
@@ -162,10 +159,10 @@ private[nlpcraft] object NCTokenImpl {
 
                 val note = tok.toSeq.minBy(n => if (n.isNlp) 1 else 0)
 
-                val isStop: Boolean = 
md("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
+                val isStop = 
md.get("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
 
                 // Special synthetic meta data element.
-                md.put("nlpcraft:nlp:freeword", !isStop && note.isNlp)
+                md.put("nlpcraft:nlp:freeword", lang.Boolean.valueOf(!isStop 
&& note.isNlp))
 
                 new NCTokenImpl(
                     mdl.model,
@@ -177,7 +174,7 @@ private[nlpcraft] object NCTokenImpl {
                     value = null,
                     startCharIndex = tok.startCharIndex,
                     endCharIndex = tok.endCharIndex,
-                    meta = convertMeta(),
+                    meta = md,
                     isAbstractProp = 
mdl.model.getAbstractTokens.contains(note.noteType)
                 )
         }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6c0f6f2..37ca1f4 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -454,7 +454,7 @@ object NCModelEnricher extends NCProbeEnricher {
       * @param h
       * @param toks
       */
-    private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: 
Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+    private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: 
mutable.HashSet[Seq[Complex]]): Seq[Seq[Complex]] = {
         val idxs = toks.flatMap(_.wordIndexes).toSet
 
         h.complexes.par.
@@ -537,7 +537,6 @@ object NCModelEnricher extends NCProbeEnricher {
             val combToks = combosTokens(ns.toSeq)
             lazy val ch = mkComplexes(mdl, ns)
 
-
             def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
                 startScopedSpan(
                     "execute", span, "srvReqId" -> ns.srvReqId, "mdlId" -> 
mdl.model.getId, "txt" -> ns.text
@@ -610,7 +609,7 @@ object NCModelEnricher extends NCProbeEnricher {
                         // 2. IDL.
                         if (idlEnabled) {
                             val allSyns = get(mdl.idlSynonyms, eId)
-                            lazy val allCombs = mkCombinations(ch, toks, 
idlCache.toSet)
+                            lazy val allCombs = mkCombinations(ch, toks, 
idlCache)
 
                             // 2.1 Continuous.
                             if (!mdl.hasSparseSynonyms) {

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to