This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
     new 03ca3bc  WIP.
03ca3bc is described below

commit 03ca3bce0d8e499745554df074cf1d0489626891
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 22 23:41:18 2021 +0300

    WIP.
---
 .../nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index e9ccbac..a90cce7 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -223,7 +223,6 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: 
Set[String], exclStop
             )
         (m(false), m(true))
 
-
     private def read(path: String): Set[String] = 
NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet
 
     /**
@@ -599,9 +598,13 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: 
Set[String], exclStop
         // | Pass #9.                                        |
         // | Deletes stop words if they are marked as quoted.|
         // +-------------------------------------------------+
-        val quotes = toks.filter(isQuote)
+        var quotes = toks.filter(isQuote)
+
+        if (quotes.size % 2 != 0)
+            // Just ignore last odd quote.
+            quotes = quotes.reverse.drop(1).reverse
 
-        if (quotes.nonEmpty && quotes.size % 2 == 0)
+        if (quotes.nonEmpty)
             val m = toks.zipWithIndex.toMap
 
             val pairs =
@@ -615,8 +618,5 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: 
Set[String], exclStop
 
                     from > idx && to < idx
                 })
-        else
-            // TODO: do we need such logs?
-            logger.debug(s"Unexpected quotes count, stop words processing 
updating skipped for text: ${toks.map(_.getOriginalText).mkString(" ")}")
 
         stops.toSeq.sortBy(_.getStartCharIndex)
\ No newline at end of file

Reply via email to