This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new f7c3611 WIP.
f7c3611 is described below
commit f7c361193e8f599eb550c4842bfa53fe18248b41
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 18 12:16:45 2021 +0300
WIP.
---
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 31 +++++++++++-----------
1 file changed, 15 insertions(+), 16 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 058d713..6c0f6f2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -681,23 +681,22 @@ object NCModelEnricher extends NCProbeEnricher {
usrNotes.
filter(n => !links.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))).
filter(n => !parts.contains(NCTokenPartKey(n, ns))).
- foreach(n => {
- val hasBetter =
- usrNotes.exists(candidate =>
- candidate != n &&
- candidate.noteType == n.noteType &&
- candidate.dataOpt("parts") == n.dataOpt("parts") &&
-
candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) &&
- n.wordIndexes.filter(n =>
!candidate.wordIndexes.contains(n)).
- forall(wordIdx => ns.tokens.exists(t =>
t.wordIndexes.contains(wordIdx) && t.isStopWord)))
-
- if (hasBetter) {
- ns.removeNote(n)
-
- // TODO: trace.
- logger.info(s"Element removed: ${n}")
+ foreach(n =>
+ usrNotes.find(candidate =>
+ candidate != n &&
+ candidate.noteType == n.noteType &&
+ candidate.dataOpt("parts") == n.dataOpt("parts") &&
+ candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet)
&&
+ n.wordIndexes.filter(n =>
!candidate.wordIndexes.contains(n)).
+ forall(wordIdx => ns.tokens.exists(t =>
t.wordIndexes.contains(wordIdx) && t.isStopWord))
+ ) match {
+ case Some(better) =>
+ ns.removeNote(n)
+ // TODO: trace.
+ logger.info(s"Element removed: $n, better: $better")
+ case None => // No-op.
}
- })
+ )
}
// TODO: simplify, add tests, check model properties (sparse etc) for
optimization.