This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new cbd72d8 WIP.
cbd72d8 is described below
commit cbd72d86a9202eb36cc45186ddd8a19763f94fea
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Dec 21 18:31:31 2021 +0300
WIP.
---
.../internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java | 1 -
.../nlp/token/parser/opennlp/impl/NCEnStopWordsFinderImpl.scala | 6 +++---
.../internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala | 2 +-
.../src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala | 1 +
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index 86d25be..7e6a639 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -26,7 +26,6 @@ import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCOpenNlpImpl;
import java.io.File;
import java.util.List;
import java.util.Objects;
-import java.util.Set;
/*
* Models can be downloaded from the following resources:
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinderImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinderImpl.scala
index 68f9b5e..b7fb2f3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinderImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinderImpl.scala
@@ -37,7 +37,7 @@ object NCEnStopWordsFinderImpl:
type Word = String
/** All POSes set. http://www.clips.ua.ac.be/pages/mbsp-tags */
- private final val POSES = Set(
+ private val POSES = Set(
"CC",
"CD",
"DT",
@@ -82,7 +82,7 @@ object NCEnStopWordsFinderImpl:
"--" // Synthetic POS.
)
- private final val STOP_BEFORE_STOP: Seq[Word] = Seq("DT", "PRP", "PRP$",
"WDT", "WP", "WP$", "WRB")
+ private val STOP_BEFORE_STOP: Seq[Word] = Seq("DT", "PRP", "PRP$", "WDT",
"WP", "WP$", "WRB")
private val Q_POS = Set("``", "''")
@@ -451,7 +451,7 @@ class NCEnStopWordsFinderImpl(addStopWords: JSet[String],
exclStopWords: JSet[St
* @param toks
*/
override def find(toks: JList[NCToken]): JList[NCToken] =
- // TODO: check ? stop clear?
+ // TODO: check started? clear on stop?
if (percents == null)
throw new IllegalStateException(s"${this.getClass.getName} is not
started.")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index 2eb8124..0f54e7e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -90,7 +90,7 @@ class NCOpenNlpImpl(tokMdlIn: InputStream, posMdlIn:
InputStream, lemmaDicIn: In
* @return
*/
override def parse(req: NCRequest): JList[NCToken] =
- // TODO: check ? stop clear?
+ // TODO: check started? clear on stop?
if (tokenizer == null)
throw new IllegalStateException(s"${this.getClass.getName} is not
started.")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 1784da9..9c13081 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -927,6 +927,7 @@ object NCUtils extends LazyLogging:
catch
case e: IOException => throw new NCException(s"Failed to read
stream.", e)
+ // TODO: is it suitable place for methods related to tokens manipulations?
/**
* Gets all sequential permutations of tokens in this NLP sentence.
*