This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new c645daaa WIP.
c645daaa is described below
commit c645daaa4d409f9fd2d70ab07b2a93ba2ee4a949
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Dec 8 16:57:50 2022 +0400
WIP.
---
.../org/apache/nlpcraft/NCPipelineBuilder.scala | 51 ++++++++++++++++++++++
.../nlp/enrichers/NCEnQuotesTokenEnricher.scala | 11 +++--
.../nlp/enrichers/NCEnStopWordsTokenEnricher.scala | 16 ++++---
.../enrichers/NCEnSwearWordsTokenEnricher.scala | 13 +++---
.../nlp/enrichers/NCOpenNLPTokenEnricher.scala | 11 +++--
.../nlpcraft/nlp/parsers/NCNLPEntityParser.scala | 13 +++---
.../nlp/parsers/NCOpenNLPEntityParser.scala | 14 +++---
.../nlp/parsers/NCOpenNLPTokenParser.scala | 4 +-
.../nlpcraft/nlp/parsers/NCSemanticElement.scala | 13 +++---
.../nlp/parsers/NCSemanticEntityParser.scala | 10 +----
.../nlpcraft/nlp/parsers/NCSemanticStemmer.scala | 5 ++-
11 files changed, 110 insertions(+), 51 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
index 0ca12729..5b945e07 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
@@ -227,6 +227,23 @@ class NCPipelineBuilder:
/**
* Shortcut to configure pipeline with [[NCSemanticEntityParser]].
*
+ * For English language it initializes also following components:
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenParser token parser]]
+ * initialized by
[[http://opennlp.sourceforge.net/models-1.5/en-token.bin en-token.bin]] model.
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenEnricher token enricher]]
+ * initialized by
+ * [[http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
en-pos-maxent.bin]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]]
+ * and
+ *
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
en-lemmatizer.dict]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]].
+ * - [[NCEnStopWordsTokenEnricher `Stop-word`]] token enricher.
+ * - [[NCEnSwearWordsTokenEnricher `Swear-word`]] token enricher
initialized by
+ *
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
swear_words.txt]] dictionary.
+ * - [[NCEnQuotesTokenEnricher `Quotes`]] token enricher.
+ * - [[NCEnDictionaryTokenEnricher `Known-word`]] token enricher.
+ * - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+ *
* @param lang ISO 639-1 language code. Currently, only "en" (English) is
supported.
* @param macros Macros to use with [[NCSemanticEntityParser]].
* @param elms Semantic elements to use with [[NCSemanticEntityParser]].
@@ -248,6 +265,23 @@ class NCPipelineBuilder:
/**
* Shortcut to configure pipeline with [[NCSemanticEntityParser]].
*
+ * For English language it initializes also following components:
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenParser token parser]]
+ * initialized by
[[http://opennlp.sourceforge.net/models-1.5/en-token.bin en-token.bin]] model.
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenEnricher token enricher]]
+ * initialized by
+ * [[http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
en-pos-maxent.bin]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]]
+ * and
+ *
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
en-lemmatizer.dict]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]].
+ * - [[NCEnStopWordsTokenEnricher `Stop-word`]] token enricher.
+ * - [[NCEnSwearWordsTokenEnricher `Swear-word`]] token enricher
initialized by
+ *
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
swear_words.txt]] dictionary.
+ * - [[NCEnQuotesTokenEnricher `Quotes`]] token enricher.
+ * - [[NCEnDictionaryTokenEnricher `Known-word`]] token enricher.
+ * - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+ *
* @param lang ISO 639-1 language code. Currently, only "en" (English) is
supported.
* @param elms Semantic elements to use with [[NCSemanticEntityParser]].
*/
@@ -256,6 +290,23 @@ class NCPipelineBuilder:
/**
* Shortcut to configure pipeline with [[NCSemanticEntityParser]].
*
+ * For English language it initializes also following components:
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenParser token parser]]
+ * initialized by
[[http://opennlp.sourceforge.net/models-1.5/en-token.bin en-token.bin]] model.
+ * - [[https://opennlp.apache.org/ OpenNLP]] based
[[NCOpenNLPTokenEnricher token enricher]]
+ * initialized by
+ * [[http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
en-pos-maxent.bin]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]]
+ * and
+ *
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
en-lemmatizer.dict]] model for
+ *
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]].
+ * - [[NCEnStopWordsTokenEnricher `Stop-word`]] token enricher.
+ * - [[NCEnSwearWordsTokenEnricher `Swear-word`]] token enricher
initialized by
+ *
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
swear_words.txt]] dictionary.
+ * - [[NCEnQuotesTokenEnricher `Quotes`]] token enricher.
+ * - [[NCEnDictionaryTokenEnricher `Known-word`]] token enricher.
+ * - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+ *
* @param lang ISO 639-1 language code. Currently, only "en" (English) is
supported.
* @param mdlSrc Classpath resource, file path or URL for YAML or JSON
semantic model definition file.
*/
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
index ac6428c8..ea9bd28a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
@@ -21,12 +21,15 @@ import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
/**
- * [[NCTokenEnricher]] built-in English language implementation.
+ * Quotes [[NCTokenEnricher enricher]] for English language.
*
- * It adds <code>quoted</code> boolean property to [[NCToken]] instance if
word which it represents is in quotes.
+ * This enricher adds `quoted` boolean [[NCPropertyMap metadata]] property to
the [[NCToken token]]
+ * instance if word it represents is in quotes. The value `true` of the
metadata property indicates that this word is in quotes,
+ * `false` value indicates otherwise.
*
- * Note that this implementation requires <code>pos</code> string property in
[[NCToken]] instance.
- * You can configure [[NCOpenNLPTokenEnricher]] before
[[NCEnQuotesTokenEnricher]] in your [[NCPipeline]].
+ * **NOTE:** this implementation requires `lemma` string [[NCPropertyMap
metadata]] property that contains
+ * token's lemma. You can configure [[NCOpenNLPTokenEnricher]] that provides
this metadata property before
+ * this enricher in your [[NCPipeline pipeline]].
*/
//noinspection ScalaWeakerAccess
class NCEnQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index a6c052c4..f0ffb1a7 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -163,16 +163,20 @@ object NCEnStopWordsTokenEnricher:
import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher.*
/**
- * [[NCTokenEnricher]] built-in English language implementation.
+ * "Stop-word" [[NCTokenEnricher enricher]] for English language.
+ *
+ * This enricher adds `stopword` boolean [[NCPropertyMap metadata]] property
to the [[NCToken token]]
+ * instance if word it represents is an English stop-word. The value `true`
of the metadata property indicates that this word is detected as stop-word,
+ * `false` value indicates otherwise.
*
- * It adds <code>stopword</code> string property to [[NCToken]] instance if
word which it represents is stop-word.
* Look more about stop-words [[https://en.wikipedia.org/wiki/Stop_word
here]].
*
- * Note that this implementation requires <code>pos</code> and
<code>lemma</code> string properties in [[NCToken]] instance.
- * You can configure [[NCOpenNLPTokenEnricher]] before
[[NCEnQuotesTokenEnricher]] in your [[NCPipeline]].
+ * **NOTE:** this implementation requires `lemma` and `pos` string
[[NCPropertyMap metadata]] properties that contains
+ * token's lemma and part of speech. You can configure
[[NCOpenNLPTokenEnricher]] that provides this metadata property before
+ * this enricher in your [[NCPipeline pipeline]].
*
- * @param addStopsSet User defined additional stop-words collection.
- * @param exclStopsSet Collection of words which should not be marked as
stop-words during component processing.
+ * @param addStopsSet User defined collection of additional stop-words.
+ * @param exclStopsSet User defined collection of exceptions, that is words
which should not be marked as stop-words during processing.
*/
class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty,
exclStopsSet: Set[String] = Set.empty) extends NCTokenEnricher with LazyLogging:
private final val stemmer = new PorterStemmer
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
index 2072c435..c4fa7d8b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
@@ -26,13 +26,16 @@ import java.io.*
import java.util.Objects
/**
- * [[NCTokenEnricher]] built-in English language implementation.
+ * "Swear-word" [[NCTokenEnricher enricher]] for English language.
*
- * It adds <code>swear</code> boolean property to [[NCToken]] instance if
word which it represents is swear word.
+ * This enricher adds `swear` boolean [[NCPropertyMap metadata]] property to
the [[NCToken token]]
+ * instance if word it represents is a swear word dictionary, i.e. the swear
dictionary contains this word's
+ * stem. The value `true` of the metadata property indicates that this word's
stem is found in the dictionary,
+ * `false` value indicates otherwise.
*
- * @param res Path to swear words list text resource.
- * Note that [[NCPipelineBuilder.withSemantic()]] methods use for English
language
- *
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
NlpCraft Swearword Dictionary]]
+ * Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
+ *
+ * @param res Path to English swear dictionary. English swear dictionary has
simple plain text format with one word on one line.
*/
//noinspection ScalaWeakerAccess
class NCEnSwearWordsTokenEnricher(res: String) extends NCTokenEnricher with
LazyLogging:
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index 3467daea..af8d6f10 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -28,20 +28,19 @@ import java.io.*
import scala.concurrent.ExecutionContext
/**
- * [[NCTokenEnricher]] built-in language independent implementation based on
- * [[https://opennlp.apache.org/ OpenNLP]] <code>lemma</code> and <code>POS
tagger</code> models.
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCTokenEnricher enricher]].
*
- * It adds <code>lemma</code> and <code>pos</code> (part-of-speech) string
properties to [[NCToken]] instance.
+ * This enricher adds `lemma` and `pos` (part-of-speech) string
[[NCPropertyMap metadata]] property to the [[NCToken token]]
+ * instance.
+
* Lemma is the canonical form of word, look
[[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] for more details.
+ *
* Part-of-speech tags are described
[[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
here]].
*
* Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
*
* @param posMdlSrc Path to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
POSTaggerME]] model.
- * Note that [[NCPipelineBuilder.withSemantic()]] methods use for English
language [[http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
en-pos-maxent.bin]].
- *
* @param lemmaDicSrc Path to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
DictionaryLemmatizer]] model.
- * Note that [[NCPipelineBuilder.withSemantic()]] methods use for English
language
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
en-lemmatizer.dict]].
*/
class NCOpenNLPTokenEnricher(posMdlSrc: String = null, lemmaDicSrc: String =
null) extends NCTokenEnricher with LazyLogging:
private var tagger: POSTaggerME = _
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index e78fb668..d23d42a0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -31,19 +31,20 @@ object NCNLPEntityParser:
import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
/**
- * [[NCEntityParser]] built-in NLP implementation.
+ * NLP data [[NCEntityParser parser]].
*
- * It converts list of [[NCToken]] instances to list of [[NCEntity]]
instances with ID <code>nlp:token</code>.
- * Each [[NCEntity]] instance contains following mandatory properties:
+ * This parser converts list of input [[NCToken]] instances to list of
[[NCEntity]] instances with ID `nlp:token`.
+ * All [[NCEntity]] instances contain following mandatory [[NCPropertyMap
metadata]] properties:
* - nlp:token:text
* - nlp:token:index
* - nlp:token:startCharIndex
* - nlp:token:endCharIndex
*
- * and all another properties which were already added by [[NCPipeline]]
into processed [[NCToken]].
+ * Also created [[NCEntity]] instances receive all another [[NCPropertyMap
metadata]] properties
+ * which were added by configured in [[NCPipeline pipeline]] token
[[org.apache.nlpcraft.NCTokenEnricher enrichers]].
*
- * @param predicate Predicate which allow to restrict list of converted
[[NCToken]] instances.
- * By default all [[NCToken]] instances converted to [[NCEntity]] instances.
+ * @param predicate Predicate which allows to filter list of converted
[[NCToken]] instances.
+ * By default all [[NCToken]] instances converted.
*/
class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends
NCEntityParser:
override def parse(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): List[NCEntity] =
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index 2f2d0f48..e40e8ff2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -47,15 +47,19 @@ object NCOpenNLPEntityParser:
new NCOpenNLPEntityParser(List(src))
/**
- * [[NCEntityParser]] built-in implementation based on
[[https://opennlp.apache.org/ OpenNLP]] <code>name finders</code> models.
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCEntityParser parser]] configured by
+ * paths to [[https://opennlp.apache.org/ OpenNLP]] `name finders` models.
*
- * It prepares [[NCEntity]] instances which are found by configured
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
models]]
- * with entity ID <code>opennlp:modelId</code>, where <code>modelId</code> is
ID of configured models.
+ * This parser prepares [[NCEntity]] instances which are detected by given
models.
+ * These entities are created with ID `opennlp:modelId`, where `modelId` is
[[https://opennlp.apache.org/ OpenNLP]] model ID.
+ * Also this parser adds `opennlp:modelId:probability` double [[NCPropertyMap
metadata]] property to the
+ * created entities extracted from related [[https://opennlp.apache.org/
OpenNLP]] model.
*
- * Note that each [[NCToken]] can be included into several [[NCEntity]]
instances.
+ * Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
+ *
+ * **NOTE:** that each input [[NCToken]] can be included into several output
[[NCEntity]] instances.
*
* @param srcs Paths to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
models]].
- * Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
*/
class NCOpenNLPEntityParser(srcs: List[String]) extends NCEntityParser with
LazyLogging:
require(srcs != null, "Models source cannot be null.")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
index 044e331d..dbc6657e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
@@ -27,12 +27,12 @@ import java.util
import java.util.Objects
/**
- * [[NCTokenParser]] built-in language independent implementation based on
[[https://opennlp.apache.org/ OpenNLP]] <code>tokenizers</code> models.
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCTokenParser parser]] configured
+ * by path to [[https://opennlp.apache.org/ OpenNLP]] `tokenizers` model.
*
* Some of OpenNLP prepared models can be found
[[https://opennlp.sourceforge.net/models-1.5/ here]].
*
* @param tokMdl Path to
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/tokenize/TokenizerModel.html
model]].
- * Note that [[NCPipelineBuilder.withSemantic()]] methods use for English
language [[http://opennlp.sourceforge.net/models-1.5/en-token.bin
en-token.bin]].
*/
class NCOpenNLPTokenParser(tokMdl: String) extends NCTokenParser with
LazyLogging:
require(tokMdl != null, "Tokenizer model path cannot be null.")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index f74f79ca..b9768e59 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -19,24 +19,24 @@ package org.apache.nlpcraft.nlp.parsers
/**
*
- * Configuration which helps to detect [[org.apache.nlpcraft.NCEntity
NCEntity]] for
- * <code>Semantic</code> implementation of
[[org.apache.nlpcraft.NCEntityParser NCEntityParser]].
+ * Configuration element which helps to detect [[org.apache.nlpcraft.NCEntity
NCEntity]] for
+ * `Semantic` implementation of [[org.apache.nlpcraft.NCEntityParser
NCEntityParser]].
*
- * See detailed description
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
+ * See detailed description on the website
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
*
* @see [[NCSemanticEntityParser]]
* @see [[NCSemanticStemmer]]
*/
trait NCSemanticElement:
/**
- * Gets <code>id</code> for created [[org.apache.nlpcraft.NCEntity
NCEntity]] instance.
+ * Gets `id` for created [[org.apache.nlpcraft.NCEntity NCEntity]]
instance.
* Representation of [[org.apache.nlpcraft.NCEntity.getId
NCEntity.getId()]] method.
* @return Element ID.
*/
def getId: String
/**
- * Gets <code>groups</code> for created [[org.apache.nlpcraft.NCEntity
NCEntity]] instance.
+ * Gets `groups` for created [[org.apache.nlpcraft.NCEntity NCEntity]]
instance.
* Representation of [[org.apache.nlpcraft.NCEntity.getGroups
NCEntity.getGroups()]] method.
* @return Groups.
*/
@@ -57,8 +57,7 @@ trait NCSemanticElement:
def getSynonyms: Set[String] = Set.empty
/**
- * Gets optional <code>properties</code> map for created
[[org.apache.nlpcraft.NCEntity NCEntity]] instance.
- * Representation of [[org.apache.nlpcraft.NCEntity NCEntity]] content.
+ * Gets [[org.apache.nlpcraft.NCPropertyMap metadata]] property for
created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
* @return Groups.
*/
def getProperties: Map[String, AnyRef] = Map.empty
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index 7d0c5a1d..3942584e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -177,16 +177,10 @@ object NCSemanticEntityParser:
import org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser.*
/**
- * [[NCEntityParser]] built-in <code>semantic</code>implementation.
+ * `Semantic` [[NCEntityParser parser]] implementation.
*
- * See detailed description
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
+ * See detailed description on the website
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
*
- * Note that [[org.apache.nlpcraft.NCPipelineBuilder.withSemantic()
NCPipelineBuilder.withSemantic()]]
- * methods use for English language
- * [[org.apache.nlpcraft.nlp.parsers.NCOpenNLPTokenParser
NCOpenNLPTokenParser]] implementation of
- * [[NCTokenParser]] with model
- * [[http://opennlp.sourceforge.net/models-1.5/en-token.bin en-token.bin]].
-
*
* @see [[NCSemanticElement]]
* @see [[NCSemanticStemmer]]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
index eae2f643..27490eda 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
@@ -19,9 +19,10 @@ package org.apache.nlpcraft.nlp.parsers
/**
*
- * Stemmer trait. Read more about stemming
[[https://en.wikipedia.org/wiki/Stemming here]].
+ * `Stemmer` trait. Stems are used for finding words by their reduced form.
+ * Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
*
- * See detailed description
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
+ * See detailed description on the website
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
*
* @see [[NCSemanticEntityParser]]
* @see [[NCSemanticElement]]