This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 4c27312e Update NCOpenNLPTokenEnricher.scala
4c27312e is described below
commit 4c27312eb9e9de237c238e1cfb89c6dd0d0422ea
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Dec 13 16:18:39 2022 -0800
Update NCOpenNLPTokenEnricher.scala
---
.../nlp/enrichers/NCOpenNLPTokenEnricher.scala | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index 270ca60a..f8451775 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -27,19 +27,18 @@ import java.io.*
import scala.concurrent.ExecutionContext
/**
- * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCTokenEnricher token enricher]].
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCTokenEnricher token enricher]]. This
+ * enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap
metadata]] property to the [[NCToken token]]
+ * instance. Learn more about lemmas
[[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about
part-of-speech
+ *
[[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
here]].
*
- * This enricher adds `lemma` and `pos` (part-of-speech) string
[[NCPropertyMap metadata]] property to the [[NCToken token]]
- * instance.
-
- * Lemma is the canonical form of word, look
[[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] for more details.
- *
- * Part-of-speech tags are described
[[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
here]].
- *
- * Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
+ * This OpenNLP enricher requires PoS and lemma models. Some of OpenNLP
community models can be found
+ * [[https://opennlp.sourceforge.net/models-1.5/ here]].
*
- * @param posMdlRes Path to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]] model.
- * @param lemmaDicRes Path to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]] model.
+ * @param posMdlRes Relative path, absolute path or URL to
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]] model.
+ * @param lemmaDicRes Relative path, absolute path or URL to
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]] model.
*/
class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String =
null) extends NCTokenEnricher with LazyLogging:
private var tagger: POSTaggerME = _