This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-483-1-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483-1-1 by this push:
new 649904e WIP.
649904e is described below
commit 649904ed3210525eb09e6f5219d2c56923357041
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Mar 11 14:18:08 2022 +0300
WIP.
---
.../parser/impl/NCSemanticEntityParserImpl.scala | 31 ++++++++++++----------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
index 9fdad68..7de4cde 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
@@ -228,9 +228,11 @@ class NCSemanticEntityParserImpl(
Map.empty
val cache = mutable.HashSet.empty[Seq[Int]] // Variants (tokens
without stopwords) can be repeated.
+
case class Holder(elemId: String, tokens: Seq[NCToken], value:
Option[String]):
private val idxs = tokens.map(_.getIndex).toSet
def isSuperSet(toks: Seq[NCToken]): Boolean = idxs.size >
toks.size && toks.map(_.getIndex).toSet.subsetOf(idxs)
+
val hs = mutable.ArrayBuffer.empty[Holder]
for (piece <- getPieces(toks) if
!hs.exists(_.isSuperSet(piece.baseTokens));
@@ -253,20 +255,21 @@ class NCSemanticEntityParserImpl(
elems.foreach(elem =>
add(elem.elementId, elem.value))
case None => // No-op.
// With regex.
- if !found then
- for ((elemId, syns) <-
synsHolder.mixedSynonyms.getOrElse(variant.size, Seq.empty))
- for (s <- syns if !found)
- found = s.chunks.zip(variant).
- sortBy { (chunk, _) => if chunk.isText
then 0 else 1 }.
- forall { (chunk, tok) =>
- if chunk.isText then
- chunk.stem == stems(tok) ||
(stems4Lemms.nonEmpty && chunk.stem == stems4Lemms(tok))
- else
- def match0(txt: String) =
chunk.regex.matcher(txt).matches()
- match0(tok.getText) ||
match0(tok.getText.toLowerCase)
- }
-
- if found then add(elemId,
Option.when(s.value != null)(s.value))
+ for ((elemId, syns) <-
synsHolder.mixedSynonyms.getOrElse(variant.size, Seq.empty))
+ found = false
+
+ for (s <- syns if !found)
+ found = s.chunks.zip(variant).
+ sortBy { (chunk, _) => if chunk.isText
then 0 else 1 }.
+ forall { (chunk, tok) =>
+ if chunk.isText then
+ chunk.stem == stems(tok) ||
(stems4Lemms.nonEmpty && chunk.stem == stems4Lemms(tok))
+ else
+ def match0(txt: String) =
chunk.regex.matcher(txt).matches()
+ match0(tok.getText) ||
match0(tok.getText.toLowerCase)
+ }
+
+ if found then add(elemId, Option.when(s.value
!= null)(s.value))
hs.toSeq.map(h => {
val e = elemsMap(h.elemId)