This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new d80ba49 DOES NOT COMPILE
d80ba49 is described below
commit d80ba49172160701b419c71c782a747f756448c5
Author: Aaron Radzinski <[email protected]>
AuthorDate: Mon Jan 10 13:36:38 2022 -0800
DOES NOT COMPILE
---
.../src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java | 3 ---
nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java | 3 ++-
.../scala/org/apache/nlpcraft/nlp/NCModelConfigBuilderImpl.scala | 2 +-
.../nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" | 1 -
.../nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala | 3 +++
.../nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala | 4 ++--
.../nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java | 1 -
.../nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala | 6 +++---
8 files changed, 11 insertions(+), 12 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
index 81bac8d..991bb78 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java
@@ -188,7 +188,6 @@ public class NCModelConfigBuilder {
* @return
*/
public NCModelConfigBuilder withEntityValidator(NCEntityValidator
entityValidator) {
- // TODO: error texts.
Objects.requireNonNull(entityValidator, "Validators cannot be null.");
impl.withEntityValidator(entityValidator);
return this;
@@ -200,7 +199,6 @@ public class NCModelConfigBuilder {
* @return
*/
public NCModelConfigBuilder withVariantValidators(List<NCVariantValidator>
variantValidators) {
- // TODO: error texts.
Objects.requireNonNull(variantValidators, "Validators cannot be
null.");
variantValidators.forEach(p -> Objects.requireNonNull(p, "Validators
cannot be null."));
impl.withVariantValidators(variantValidators);
@@ -213,7 +211,6 @@ public class NCModelConfigBuilder {
* @return
*/
public NCModelConfigBuilder withVariantValidator(NCVariantValidator
variantValidator) {
- // TODO: error texts.
Objects.requireNonNull(variantValidator, "Validator cannot be null.");
impl.withVariantValidator(variantValidator);
return this;
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
index 82c5a6e..e99bcc3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
@@ -25,8 +25,9 @@ import java.util.List;
public interface NCTokenParser {
/**
*
+ * @param cfg
* @param text
* @return
*/
- List<NCToken> tokenize(String text);
+ List<NCToken> tokenize(NCModelConfig cfg, String text);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCModelConfigBuilderImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCModelConfigBuilderImpl.scala
index 6794cbd..5176d94 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCModelConfigBuilderImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCModelConfigBuilderImpl.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.*
import java.util.{ArrayList as JAList, List as JList}
/**
- * TODO: move it to right package.
+ * TODO: move back Java...
*
*/
class NCModelConfigBuilderImpl(id: String, name: String, version: String):
diff --git
"a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
index 0c9c6c8..1befe90 100644
---
"a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
+++
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
@@ -38,7 +38,6 @@ public class NСSwearWordsTokenEnricher implements
NCTokenEnricher {
* @param mdlSrc
*/
public NСSwearWordsTokenEnricher(String mdlSrc) {
- // TODO: error texts.
Objects.requireNonNull(mdlSrc, "Swear words model file cannot be
null.");
impl = new NCSwearWordsImpl(mdlSrc);
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
index 877c0ff..fcd48e1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
@@ -278,6 +278,9 @@ class NCStopWordsImpl(addStopsSet: util.Set[String],
exclStopsSet: util.Set[Stri
(for (n <- toks.length until 0 by -1 if n <= maxLen) yield
toks.sliding(n)).flatten
+ /**
+ *
+ */
private def start(): Unit =
addStems = if addStopsSet == null then Set.empty else
addStopsSet.asScala.toSet.map(stem)
exclStems = if exclStopsSet == null then Set.empty else
exclStopsSet.asScala.toSet.map(stem)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
index def5ef6..54ac0e6 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
@@ -35,9 +35,9 @@ class NCSwearWordsImpl(res: String) extends NCTokenEnricher
with LazyLogging:
private final val stemmer = new PorterStemmer
private var swearWords: Set[String] = _
- start()
+ init()
- private def start(): Unit =
+ private def init(): Unit =
swearWords = NCUtils.readTextStream(NCUtils.getStream(res), "UTF-8").
map(p => stemmer.stem(p.toLowerCase)).toSet
logger.trace(s"Loaded resource: $res")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index 7777dc2..1750a87 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -45,7 +45,6 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
* @throws NCException
*/
public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
- // TODO: error texts.
Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be
null.");
Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index 3130cee..5d811d2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -47,9 +47,9 @@ class NCOpenNlpImpl(tokMdl: String, posMdlSrc: String,
lemmaDicSrc: String) ext
private var lemmatizer: DictionaryLemmatizer = _
private var tokenizer: TokenizerME = _
- start()
+ init()
- private def start(): Unit =
+ private def init(): Unit =
NCUtils.execPar(
() =>
tagger = new POSTaggerME(new
POSModel(NCUtils.getStream(posMdlSrc)));
@@ -94,7 +94,7 @@ class NCOpenNlpImpl(tokMdl: String, posMdlSrc: String,
lemmaDicSrc: String) ext
hs.zip(poses).zip(lemmas).zipWithIndex.map { case (((h, pos),
lemma), idx) =>
new NCPropertyMapAdapter with NCToken:
- override val getText: String = h.text
+ override inline def getText: String = h.text
override val getLemma: String = lemma
override val getPos: String = pos
override val getIndex: Int = idx