This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-471
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-471 by this push:
new 490c722 WIP.
490c722 is described below
commit 490c72205c2f0a7fffa1b8780be907fc73809566
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 28 16:55:49 2021 +0300
WIP.
---
.../opennlp/impl/NCOpenNlpEntityParserImpl.scala | 48 +++++++++++-----------
1 file changed, 23 insertions(+), 25 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
index 4aac412..ecfa978 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
@@ -39,44 +39,42 @@ object NCOpenNlpEntityParserImpl {
/**
*
*/
-class NCOpenNlpEntityParserImpl(is: InputStream, res: String) extends
NCEntityParser with LazyLogging:
+class NCOpenNlpEntityParserImpl(is: InputStream, res: String) extends
NCEntityParser with LazyLogging :
@volatile private var finder: NameFinderME = _
+ private case class Holder(start: Int, end: Int, name: String, probability:
Double)
+
override def start(): Unit =
finder = new NameFinderME(new
TokenNameFinderModel(NCUtils.getStream(res)))
logger.trace(s"Loaded resource: $res")
override def stop(): Unit = finder = null
- override def parse(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): JList[NCEntity] =
- val toksSeq = toks.asScala
- val words = toksSeq.toArray.map(_.getOriginalText)
-
- case class Holder(start: Int, end: Int, name: String, probability:
Double)
-
- val hs = this.synchronized {
+ private def find(words: Array[String]): Seq[Holder] =
+ this.synchronized {
try
- finder.find(words).map(p => Holder(p.getStart, p.getEnd - 1,
p.getType, p.getProb) ).toSeq
+ finder.find(words).map(p => Holder(p.getStart, p.getEnd - 1,
p.getType, p.getProb)).toSeq
finally
finder.clearAdaptiveData()
}
- val ents = new util.ArrayList[NCEntity]()
+ override def parse(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): JList[NCEntity] =
+ val toksIdxs = toks.asScala.zipWithIndex
- if hs.nonEmpty then
- val toksIdxs = toks.asScala.zipWithIndex
+ find(toksIdxs.map { case (t, _) => t.getOriginalText
}.toArray).flatMap(h =>
+ def calcIndex(getHolderIndex: Holder => Int): Int =
+ toksIdxs.find { case (_, idx) => idx == getHolderIndex(h) }
match
+ case Some((_, idx)) => idx
+ case None => -1
- for ((h, hIdx) <- hs.zipWithIndex)
- def calcIndex(getHolderIndex: Holder => Int) =
- toksIdxs.find { case (_, idx) => idx == getHolderIndex(h)
} match
- case Some((_, idx)) => idx
- case None => -1
+ val i1 = calcIndex(_.start)
+ lazy val i2 = calcIndex(_.end)
- val i1 = calcIndex(_.start)
- lazy val i2 = calcIndex(_.end)
+ if i1 != -1 && i2 != -1 then
+ Some(
+ new NCPropertyMapAdapter with NCEntity {
+ put(s"opennlp:${h.name}:probability", h.probability)
- if i1 != -1 && i2 != -1 then
- val ent = new NCPropertyMapAdapter with NCEntity {
override def getTokens: JList[NCToken] =
toksIdxs.flatMap { case (t, idx) => if idx >= i1
&& idx <= i2 then Some(t) else None }.asJava
override def getRequestId: String = req.getRequestId
@@ -84,7 +82,7 @@ class NCOpenNlpEntityParserImpl(is: InputStream, res: String)
extends NCEntityPa
override def getIndex: Int = 0 // TODO:
override def getGuid: String =
NCUtils.genUUID().toString
}
- ent.put(s"opennlp:${h.name}:probability", h.probability)
- ents.add(ent);
-
- util.Collections.unmodifiableList(ents) // TODO: should we wrap?
\ No newline at end of file
+ )
+ else
+ None
+ ).asJava
\ No newline at end of file