This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new 8f0b0d9 WIP.
8f0b0d9 is described below
commit 8f0b0d9c94df53e8bd1a056516a10e2407b0a84c
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 14:07:02 2021 +0300
WIP.
---
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 2 +-
.../apache/nlpcraft/model/impl/NCTokenImpl.scala | 31 ++++++++++------------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 5 ++--
3 files changed, 17 insertions(+), 21 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 45fc3a8..1574787 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -89,7 +89,7 @@ class NCNlpSentenceNote(private val values: Map[String,
JSerializable]) extends
this.noteType == n.noteType &&
this.wordIndexes.size == n.wordIndexes.size &&
this.wordIndexes.zip(n.wordIndexes).map(p => p._1 -
p._2).distinct.size == 1 &&
- this.clone(Seq(0), Seq(0)) == n.clone(Seq(0), Seq(0))
+ this.filter(p => !SKIP_CLONE.contains(p._1)) == n.filter(p =>
!SKIP_CLONE.contains(p._1))
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 4b2f251..1bd9add 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -17,17 +17,16 @@
package org.apache.nlpcraft.model.impl
-import java.io.{Serializable => JSerializable}
-import java.util.Collections
-import java.util.{List => JList}
-
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
+import java.io.{Serializable => JSerializable}
+import java.lang
+import java.util.{Collections, List => JList}
import scala.collection.mutable
-import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava,
MapHasAsScala, SeqHasAsJava}
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava}
/**
*
@@ -49,9 +48,9 @@ private[nlpcraft] class NCTokenImpl(
value: String,
startCharIndex: Int,
endCharIndex: Int,
- meta: Map[String, Object],
+ meta: java.util.Map[String, Object],
isAbstractProp: Boolean
-) extends NCMetadataAdapter(new java.util.HashMap(mutable.HashMap(meta.toSeq:_
*).asJava)) with NCToken with JSerializable {
+) extends NCMetadataAdapter(meta) with NCToken with JSerializable {
require(mdl != null)
require(srvReqId != null)
require(id != null)
@@ -105,12 +104,12 @@ private[nlpcraft] object NCTokenImpl {
// nlpcraft:nlp and some optional (after collapsing).
require(tok.size <= 2, s"Unexpected token [size=${tok.size},
token=$tok]")
- val md = mutable.HashMap.empty[String, JSerializable]
+ val md = new java.util.HashMap[String, AnyRef]()
tok.foreach(n => {
val id = n.noteType.toLowerCase
- n.asMetadata().foreach { case (k, v) => md += s"$id:$k" -> v}
+ n.asMetadata().foreach { case (k, v) => md.put(s"$id:$k",
v.asInstanceOf[AnyRef]) }
})
val usrNotes = tok.filter(_.isUser)
@@ -118,8 +117,6 @@ private[nlpcraft] object NCTokenImpl {
// No overlapping allowed at this point.
require(usrNotes.size <= 1, s"Unexpected elements notes: $usrNotes")
- def convertMeta(): ScalaMeta = md.toMap.map(p => p._1 ->
p._2.asInstanceOf[AnyRef])
-
usrNotes.headOption match {
case Some(usrNote) =>
require(mdl.elements.contains(usrNote.noteType), s"Element is
not found: ${usrNote.noteType}")
@@ -139,9 +136,9 @@ private[nlpcraft] object NCTokenImpl {
}
// Special synthetic meta data element.
- md.put("nlpcraft:nlp:freeword", false)
+ md.put("nlpcraft:nlp:freeword", java.lang.Boolean.FALSE)
- elm.getMetadata.asScala.foreach { case (k, v) => md.put(k,
v.asInstanceOf[JSerializable]) }
+ md.putAll(elm.getMetadata)
new NCTokenImpl(
mdl.model,
@@ -153,7 +150,7 @@ private[nlpcraft] object NCTokenImpl {
value = usrNote.dataOpt("value").orNull,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta(),
+ meta = md,
isAbstractProp =
mdl.model.getAbstractTokens.contains(elm.getId)
)
@@ -162,10 +159,10 @@ private[nlpcraft] object NCTokenImpl {
val note = tok.toSeq.minBy(n => if (n.isNlp) 1 else 0)
- val isStop: Boolean =
md("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
+ val isStop =
md.get("nlpcraft:nlp:stopword").asInstanceOf[Boolean]
// Special synthetic meta data element.
- md.put("nlpcraft:nlp:freeword", !isStop && note.isNlp)
+ md.put("nlpcraft:nlp:freeword", lang.Boolean.valueOf(!isStop
&& note.isNlp))
new NCTokenImpl(
mdl.model,
@@ -177,7 +174,7 @@ private[nlpcraft] object NCTokenImpl {
value = null,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta(),
+ meta = md,
isAbstractProp =
mdl.model.getAbstractTokens.contains(note.noteType)
)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6c0f6f2..37ca1f4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -454,7 +454,7 @@ object NCModelEnricher extends NCProbeEnricher {
* @param h
* @param toks
*/
- private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache:
Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+ private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache:
mutable.HashSet[Seq[Complex]]): Seq[Seq[Complex]] = {
val idxs = toks.flatMap(_.wordIndexes).toSet
h.complexes.par.
@@ -537,7 +537,6 @@ object NCModelEnricher extends NCProbeEnricher {
val combToks = combosTokens(ns.toSeq)
lazy val ch = mkComplexes(mdl, ns)
-
def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
startScopedSpan(
"execute", span, "srvReqId" -> ns.srvReqId, "mdlId" ->
mdl.model.getId, "txt" -> ns.text
@@ -610,7 +609,7 @@ object NCModelEnricher extends NCProbeEnricher {
// 2. IDL.
if (idlEnabled) {
val allSyns = get(mdl.idlSynonyms, eId)
- lazy val allCombs = mkCombinations(ch, toks,
idlCache.toSet)
+ lazy val allCombs = mkCombinations(ch, toks,
idlCache)
// 2.1 Continuous.
if (!mdl.hasSparseSynonyms) {