This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new e1d30d57 WIP.
e1d30d57 is described below
commit e1d30d573e73fe296fad9c413e4159a241f5f767
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 14 14:51:07 2022 +0400
WIP.
---
.../org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala | 6 ++++++
.../apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala | 1 +
.../scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala | 2 ++
.../org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala | 1 +
4 files changed, 10 insertions(+)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index f8451775..4178d2ca 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -32,15 +32,21 @@ import scala.concurrent.ExecutionContext
* instance. Learn more about lemmas
[[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about
part-of-speech
*
[[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
here]].
*
+ * At least one of model must be defined.
+ *
* This OpenNLP enricher requires PoS and lemma models. Some of OpenNLP
community models can be found
* [[https://opennlp.sourceforge.net/models-1.5/ here]].
*
* @param posMdlRes Relative path, absolute path or URL to
*
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]] model.
+ * Can be `null` if **part-of-speech** model shouldn't be configured,
so `pos` property will not be passed.
* @param lemmaDicRes Relative path, absolute path or URL to
*
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]] model.
+ * Can be `null` if **lemmatizer** model shouldn't be configured, so
`lemma` property will not be passed.
*/
class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String =
null) extends NCTokenEnricher with LazyLogging:
+ require(posMdlRes != null || lemmaDicRes != null, "At least one model must
be defined")
+
private var tagger: POSTaggerME = _
private var lemmatizer: DictionaryLemmatizer = _
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index c9344283..54efbf50 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -42,6 +42,7 @@ import java.util.Objects
//noinspection ScalaWeakerAccess
class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends
NCTokenEnricher with LazyLogging:
require(dictRes != null, "Swear words model file cannot be null.")
+ require(stemmer != null, "Stemmer cannot be null.")
private var swearWords: Set[String] = _
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index cc624432..b21f34ec 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -48,6 +48,8 @@ import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
* By default all [[NCToken]] instances converted.
*/
class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends
NCEntityParser:
+ require(predicate != null)
+
/** @inheritdoc */
override def parse(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): List[NCEntity] =
toks.filter(predicate).map(t =>
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index a9244535..31c84450 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -68,6 +68,7 @@ class NCOpenNLPEntityParser(findersMdlsRes: List[String])
extends NCEntityParser
private case class Holder(start: Int, end: Int, name: String, probability:
Double)
init()
+
/**
*
*/