This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 22c6bd2 WIP.
22c6bd2 is described below
commit 22c6bd2c1582d069056858c3d405be93817bd855
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 29 23:11:55 2021 +0300
WIP.
---
.../semantic/impl/NCSemanticEntityParserImpl.scala | 50 +++++++++-------------
1 file changed, 21 insertions(+), 29 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index ded73cc..5ebd99e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -47,30 +47,26 @@ object NCSemanticEntityParserImpl:
(for (n <- toks.size until 0 by -1) yield
toks.sliding(n)).flatten.map(p => p)
private def startsAndEnds(fix: String, s: String): Boolean =
s.startsWith(fix) && s.endsWith(fix)
- private def mkChunk(stemmer: NCStemmer, chunk: String): NCSynonymChunk = {
+ private def mkChunk(stemmer: NCStemmer, chunk: String): NCSynonymChunk =
def stripSuffix(fix: String, s: String): String = s.slice(fix.length,
s.length - fix.length)
// Regex synonym.
- if (startsAndEnds(REGEX_FIX, chunk)) {
+ if startsAndEnds(REGEX_FIX, chunk) then
val ptrn = stripSuffix(REGEX_FIX, chunk)
- if (ptrn.nonEmpty) {
+ if ptrn.nonEmpty then
try
NCSynonymChunk(kind = REGEX, text = chunk, regex =
Pattern.compile(ptrn))
- catch {
+ catch
case e: PatternSyntaxException =>
throw new NCException(s"Invalid regex synonym syntax
detected [" +
s"chunk=$chunk" +
s"]", e)
- }
- }
else
throw new NCException(s"Empty regex synonym detected [" +
s"chunk=$chunk" +
s"]")
- }
else
NCSynonymChunk(kind = TEXT, text = chunk, stem =
stemmer.stem(chunk))
- }
private def getPieces(toks: Seq[NCToken]): Seq[Piece] =
combos(toks).map(combo => {
@@ -120,12 +116,11 @@ class NCSemanticEntityParserImpl(stemmer: NCStemmer,
macros: Map[String, String]
case class Holder(elemId: String, synonyms: Seq[NCSynonym])
- val all = mutable.ArrayBuffer.empty[Holder]
+ val buf = mutable.ArrayBuffer.empty[Holder]
- elements.foreach(e => {
+ elements.foreach(e =>
if e.getSynonyms != null then
val syns = e.getSynonyms.asScala
-
val susp = syns.filter(syn => !syn.contains("//") &&
SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
if susp.nonEmpty then
@@ -137,7 +132,7 @@ class NCSemanticEntityParserImpl(stemmer: NCStemmer,
macros: Map[String, String]
)
// TODO: NCSynonym + trim for lines etc
- all += Holder(
+ buf += Holder(
e.getId,
syns.
flatMap(p.expand).
@@ -145,38 +140,35 @@ class NCSemanticEntityParserImpl(stemmer: NCStemmer,
macros: Map[String, String]
// TODO:
toSeq.map(chunks => NCSynonym(false, false, null,
chunks))
)
- })
+ )
sortedSyns =
- all.groupBy(_.synonyms.size).map {
- case (len, hs) =>
+ buf.groupBy(_.synonyms.size).map {
+ (len, hs) =>
len -> hs.groupBy(_.elemId).map { case (id, seq) => id ->
seq.flatMap(_.synonyms).toSeq.sorted }
}
override def stop(): Unit = sortedSyns = null
- override def parse(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): JList[NCEntity] =
+ override def parse(req: NCRequest, cfg: NCModelConfig, toksList:
JList[NCToken]): JList[NCEntity] =
val cache = mutable.HashSet.empty[Seq[Int]]
- val entities = mutable.ArrayBuffer.empty[NCEntity]
+ val ents = mutable.ArrayBuffer.empty[NCEntity]
+ val toks = toksList.asScala.toSeq
- def tryMatch(base: Seq[NCToken], toks: Seq[NCToken]): Unit =
+ for (piece <- getPieces(toks); extra <- Seq(piece.main) ++ piece.extra)
val idxs = toks.map(_.getIndex)
- if (cache.add(idxs))
- for ((elemId, syns) <- sortedSyns.getOrElse(toks.size,
Seq.empty))
+ if cache.add(idxs) then
+ for ((id, syns) <- sortedSyns.getOrElse(toks.size, Seq.empty))
var found = false
for (s <- syns if !found)
- if (s.isMatch(toks))
+ if s.isMatch(toks) then
found = true
- entities +=
+ ents +=
new NCPropertyMapAdapter with NCEntity:
- override def getTokens: JList[NCToken] =
base.asJava
+ override def getTokens: JList[NCToken] =
piece.main.asJava
override def getRequestId: String =
req.getRequestId
- override def getId: String = elemId
-
- val value = getPieces(toks.asScala.toSeq)
- for (piece <- value; extra <- Seq(piece.main) ++ piece.extra)
- tryMatch(piece.main, extra)
+ override def getId: String = id
- entities.toSeq.asJava
\ No newline at end of file
+ ents.toSeq.asJava
\ No newline at end of file