This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new 7d6f4cd WIP.
7d6f4cd is described below
commit 7d6f4cdf06283e556eec2d43101a3024a8192402
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 24 23:35:17 2021 +0300
WIP.
---
.../nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala | 4 +++-
.../internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
index 8935864..c016886 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
@@ -41,8 +41,10 @@ class NCEnQuotesImpl extends NCTokenEnricher with
LazyLogging:
val toksSeq = toks.asScala
val quotes = toksSeq.filter(isQuote)
- if quotes.size % 2 == 0 then
+ // Start and end quote mustn't be same ("a` processed as valid)
+ if quotes.nonEmpty && quotes.size % 2 == 0 then
val m = toksSeq.zipWithIndex.toMap
+
val pairs = quotes.zipWithIndex.drop(1).flatMap {
(t, idx) => if idx % 2 != 0 then Some(m(t) -> m(quotes(idx -
1))) else None
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index fc8cf6a..202b6a1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -541,6 +541,7 @@ private[impl] class NCEnStopWordsFinder(addStems:
Set[String], exclStems: Set[St
// Just ignore last odd quote.
if quotes.size % 2 != 0 then quotes = quotes.reverse.drop(1).reverse
+ // Start and end quote mustn't be same ("a` processed as valid)
if quotes.nonEmpty then
val m = toks.zipWithIndex.toMap
val pairs = quotes.zipWithIndex.drop(1).flatMap {