This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
     new 7d6f4cd  WIP.
7d6f4cd is described below

commit 7d6f4cdf06283e556eec2d43101a3024a8192402
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 24 23:35:17 2021 +0300

    WIP.
---
 .../nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala    | 4 +++-
 .../internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
index 8935864..c016886 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
@@ -41,8 +41,10 @@ class NCEnQuotesImpl extends NCTokenEnricher with 
LazyLogging:
         val toksSeq = toks.asScala
         val quotes = toksSeq.filter(isQuote)
 
-        if quotes.size % 2 == 0 then
+        // Start and end quote mustn't be same ("a` processed as valid)
+        if quotes.nonEmpty && quotes.size % 2 == 0 then
             val m = toksSeq.zipWithIndex.toMap
+
             val pairs = quotes.zipWithIndex.drop(1).flatMap {
                 (t, idx) => if idx % 2 != 0 then Some(m(t) -> m(quotes(idx - 
1))) else None
             }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index fc8cf6a..202b6a1 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -541,6 +541,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
         // Just ignore last odd quote.
         if quotes.size % 2 != 0 then quotes = quotes.reverse.drop(1).reverse
 
+        // Start and end quote mustn't be same ("a` processed as valid)
         if quotes.nonEmpty then
             val m = toks.zipWithIndex.toMap
             val pairs = quotes.zipWithIndex.drop(1).flatMap {

Reply via email to