This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 77c1d91e WIP.
77c1d91e is described below
commit 77c1d91e4faf9cfa3340d19a2b29b5632d1bec1a
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 16 16:44:39 2022 +0400
WIP.
---
.../nlp/enrichers/NCQuotesTokenEnricher.scala | 29 +++++++++++-----------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
index 7433c7b6..30cf49af 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
@@ -25,8 +25,6 @@ import scala.collection.*
* Companion helper.
*/
object NCQuotesTokenEnricher:
- private val PROP = "quoted"
-
private case class Range(from: Int, to: Int):
def in(idx: Int): Boolean = idx >= from && idx <= to
@@ -55,32 +53,33 @@ class NCQuotesTokenEnricher extends NCTokenEnricher with
LazyLogging:
//noinspection DuplicatedCode
/** @inheritdoc */
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): Unit =
- def markAllNot(invalidState: Boolean): Unit =
- if invalidState then logger.warn(s"Detected invalid quotes in:
${req.getText}")
- toks.foreach(_.put(PROP, false))
+ def mark(get: NCToken => Boolean): Unit = for (t <- toks)
t.put("quoted", get(t))
+ def markFalse(invalid: Boolean): Unit =
+ if invalid then logger.warn(s"Detected invalid quotes in:
${req.getText}")
+ mark(_ => false)
val quotes = toks.filter(isQuote)
if quotes.isEmpty then
- markAllNot(false)
+ markFalse(false)
else if quotes.length % 2 != 0 then
- markAllNot(true)
+ markFalse(true)
else
- val quotedRanges = mutable.HashSet.empty[Range]
+ val ranges = mutable.HashSet.empty[Range]
val stack = mutable.Stack.empty[NCToken]
- for (quote <- quotes)
+ for (q <- quotes)
if stack.nonEmpty then
val top = stack.top
- if top.getText == QUOTES_REVERSED.getOrElse(quote.getText,
null) then
- quotedRanges += Range(top.getIndex + 1, quote.getIndex
- 1)
+ if top.getText == QUOTES_REVERSED.getOrElse(q.getText,
null) then
+ ranges += Range(top.getIndex + 1, q.getIndex - 1)
stack.pop()
else
- stack.push(quote)
+ stack.push(q)
else
- stack.push(quote)
+ stack.push(q)
if stack.isEmpty then
- toks.foreach(t => t.put(PROP,
quotedRanges.exists(_.in(t.getIndex))))
+ mark(t => ranges.exists(_.in(t.getIndex)))
else
- markAllNot(true)
\ No newline at end of file
+ markFalse(true)
\ No newline at end of file