[incubator-nlpcraft] branch NLPCRAFT-520 updated: WIP.

sergeykamov Fri, 23 Dec 2022 03:23:09 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 0c695b47 WIP.
0c695b47 is described below

commit 0c695b47c020ffbf5a2acfc0909e9171442e7bd4
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 23 15:23:16 2022 +0400

    WIP.
---
 .../entity/parser/stanford/NCStanfordNLPEntityParser.scala | 14 ++++++++++++--
 .../token/parser/stanford/NCStanfordNLPTokenParser.scala   |  5 ++++-
 .../nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala       |  2 +-
 .../nlpcraft/nlp/parsers/NCSemanticEntityParser.scala      |  2 ++
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git 
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
 
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
index 1677c6dd..e2b9432b 100644
--- 
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
+++ 
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
@@ -25,9 +25,18 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters.*
 
 /**
+  *  [[https://nlp.stanford.edu/ Stanford NLP]] based language independent 
[[NCEntityParser entity parser]] configured by
+  *  given [[StanfordCoreNLP]] pipeline instance.
   *
-  * @param stanford
-  * @param supported
+  * This parser prepares [[NCEntity]] instances which are detected by prepared 
[[StanfordCoreNLP]] pipeline.
+  * These entities are created with ID `stanford:modelName`, where `modelName` 
is model configured in [[StanfordCoreNLP pipeline]].
+  * Also this parser copies optional `nne` string and `confidence` double 
[[NCPropertyMap metadata]] properties to the
+  * created entities extracted from [[StanfordCoreNLP]] annotations.
+
+  * **NOTE:** that parser can produce different types of [[NCEntity]] 
instances and each input [[NCToken]] can be included into several output 
[[NCEntity]] instances.
+  *
+  * @param stanford Configured [[StanfordCoreNLP]] pipeline instance.
+  * @param supported Supported [[StanfordCoreNLP]] model names. Only supported 
models will be used for [[NCEntity]] instances generation.
   */
 class NCStanfordNLPEntityParser(stanford: StanfordCoreNLP, supported: 
Set[String]) extends NCEntityParser:
     require(stanford != null, "Stanford instance cannot be null.")
@@ -35,6 +44,7 @@ class NCStanfordNLPEntityParser(stanford: StanfordCoreNLP, 
supported: Set[String
 
     private val supportedLc = supported.map(_.toLowerCase)
 
+    /** @inheritdoc */
     override def parse(req: NCRequest, cfg: NCModelConfig, toks: 
List[NCToken]): List[NCEntity] =
         val doc = new CoreDocument(req.getText)
         stanford.annotate(doc)
diff --git 
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
 
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
index 0869ea2a..ce6665ff 100644
--- 
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
+++ 
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
@@ -27,14 +27,17 @@ import java.io.StringReader
 import scala.collection.mutable
 
 /**
+  *  [[https://nlp.stanford.edu/ Stanford NLP]] based language independent 
[[NCTokenParser entity parser]] configured
+  *  by given [[StanfordCoreNLP]] pipeline instance.
   *
-  * @param stanford
+  * @param stanford Configured [[StanfordCoreNLP]] pipeline instance.
   */
 class NCStanfordNLPTokenParser(stanford: StanfordCoreNLP) extends 
NCTokenParser:
     require(stanford != null, "Stanford instance cannot be null.")
 
     private def nvl(v: String, dflt : => String): String = if v != null then v 
else dflt
 
+    /** @inheritdoc */
     override def tokenize(text: String): List[NCToken] =
         val doc = new CoreDocument(text)
         stanford.annotate(doc)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index 8cb7d661..074f095e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -43,7 +43,7 @@ import scala.util.Using
   *
   * Some of OpenNLP prepared models can be found 
[[https://opennlp.sourceforge.net/models-1.5/ here]].
   *
-  * **NOTE:** that each input [[NCToken]] can be included into several output 
[[NCEntity]] instances.
+  * **NOTE:** that parser can produce different types of [[NCEntity]] 
instances and each input [[NCToken]] can be included into several output 
[[NCEntity]] instances.
   *
   * @param findersMdlsRes Relative paths, absolute paths, resources or URLs to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
 models]].
   */
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index 8004d3e9..8d3fcd7c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -131,6 +131,8 @@ import NCSemanticEntityParser.*
   * `stemmer` implementation language should be corresponded to other 
components of [[NCPipeline]], but
   * required `stemmer` implementation is independent from other components' 
stemmers.
   *
+  * **NOTE:** that parser can produce different types of [[NCEntity]] 
instances and each input [[NCToken]] can be included into several output 
[[NCEntity]] instances.
+  *
   * There are several constructors with different set of parameters.
   * - **stemmer** [[NCStemmer]] implementation which used for matching tokens 
and given [[NCSemanticElement]] synonyms.
   * - **parser** [[NCTokenParser]] implementation which used for given 
[[NCSemanticElement]] synonyms tokenization. It should be same implementation 
as used in [[NCPipeline.getTokenParser]].

[incubator-nlpcraft] branch NLPCRAFT-520 updated: WIP.

Reply via email to