This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 0c695b47 WIP.
0c695b47 is described below
commit 0c695b47c020ffbf5a2acfc0909e9171442e7bd4
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 23 15:23:16 2022 +0400
WIP.
---
.../entity/parser/stanford/NCStanfordNLPEntityParser.scala | 14 ++++++++++++--
.../token/parser/stanford/NCStanfordNLPTokenParser.scala | 5 ++++-
.../nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala | 2 +-
.../nlpcraft/nlp/parsers/NCSemanticEntityParser.scala | 2 ++
4 files changed, 19 insertions(+), 4 deletions(-)
diff --git
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
index 1677c6dd..e2b9432b 100644
---
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
+++
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParser.scala
@@ -25,9 +25,18 @@ import scala.collection.mutable
import scala.jdk.CollectionConverters.*
/**
+ * [[https://nlp.stanford.edu/ Stanford NLP]] based language independent
[[NCEntityParser entity parser]] configured by
+ * given [[StanfordCoreNLP]] pipeline instance.
*
- * @param stanford
- * @param supported
+ * This parser prepares [[NCEntity]] instances which are detected by prepared
[[StanfordCoreNLP]] pipeline.
+ * These entities are created with ID `stanford:modelName`, where `modelName`
is model configured in [[StanfordCoreNLP pipeline]].
+ * Also this parser copies optional `nne` string and `confidence` double
[[NCPropertyMap metadata]] properties to the
+ * created entities extracted from [[StanfordCoreNLP]] annotations.
+
+ * **NOTE:** that parser can produce different types of [[NCEntity]]
instances and each input [[NCToken]] can be included into several output
[[NCEntity]] instances.
+ *
+ * @param stanford Configured [[StanfordCoreNLP]] pipeline instance.
+ * @param supported Supported [[StanfordCoreNLP]] model names. Only supported
models will be used for [[NCEntity]] instances generation.
*/
class NCStanfordNLPEntityParser(stanford: StanfordCoreNLP, supported:
Set[String]) extends NCEntityParser:
require(stanford != null, "Stanford instance cannot be null.")
@@ -35,6 +44,7 @@ class NCStanfordNLPEntityParser(stanford: StanfordCoreNLP,
supported: Set[String
private val supportedLc = supported.map(_.toLowerCase)
+ /** @inheritdoc */
override def parse(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): List[NCEntity] =
val doc = new CoreDocument(req.getText)
stanford.annotate(doc)
diff --git
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
index 0869ea2a..ce6665ff 100644
---
a/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
+++
b/nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.scala
@@ -27,14 +27,17 @@ import java.io.StringReader
import scala.collection.mutable
/**
+ * [[https://nlp.stanford.edu/ Stanford NLP]] based language independent
[[NCTokenParser entity parser]] configured
+ * by given [[StanfordCoreNLP]] pipeline instance.
*
- * @param stanford
+ * @param stanford Configured [[StanfordCoreNLP]] pipeline instance.
*/
class NCStanfordNLPTokenParser(stanford: StanfordCoreNLP) extends
NCTokenParser:
require(stanford != null, "Stanford instance cannot be null.")
private def nvl(v: String, dflt : => String): String = if v != null then v
else dflt
+ /** @inheritdoc */
override def tokenize(text: String): List[NCToken] =
val doc = new CoreDocument(text)
stanford.annotate(doc)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index 8cb7d661..074f095e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -43,7 +43,7 @@ import scala.util.Using
*
* Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
*
- * **NOTE:** that each input [[NCToken]] can be included into several output
[[NCEntity]] instances.
+ * **NOTE:** that parser can produce different types of [[NCEntity]]
instances and each input [[NCToken]] can be included into several output
[[NCEntity]] instances.
*
* @param findersMdlsRes Relative paths, absolute paths, resources or URLs to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
models]].
*/
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index 8004d3e9..8d3fcd7c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -131,6 +131,8 @@ import NCSemanticEntityParser.*
* `stemmer` implementation language should be corresponded to other
components of [[NCPipeline]], but
* required `stemmer` implementation is independent from other components'
stemmers.
*
+ * **NOTE:** that parser can produce different types of [[NCEntity]]
instances and each input [[NCToken]] can be included into several output
[[NCEntity]] instances.
+ *
* There are several constructors with different set of parameters.
* - **stemmer** [[NCStemmer]] implementation which used for matching tokens
and given [[NCSemanticElement]] synonyms.
* - **parser** [[NCTokenParser]] implementation which used for given
[[NCSemanticElement]] synonyms tokenization. It should be same implementation
as used in [[NCPipeline.getTokenParser]].