This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 882220de WIP on Scaladoc.
882220de is described below
commit 882220de45303114f078f0679f738c5472fb2d68
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Jan 12 14:47:45 2023 -0800
WIP on Scaladoc.
---
.gitignore | 1 +
.../scala/org/apache/nlpcraft/NCEntityParser.scala | 10 +--
.../main/scala/org/apache/nlpcraft/NCModel.scala | 1 -
.../nlpcraft/nlp/parsers/NCNLPEntityParser.scala | 8 +--
.../nlp/parsers/NCOpenNLPEntityParser.scala | 4 +-
.../nlpcraft/nlp/parsers/NCSemanticElement.scala | 79 ++++++++++------------
6 files changed, 44 insertions(+), 59 deletions(-)
diff --git a/.gitignore b/.gitignore
index cb293c18..645d0e94 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@
.idea
venv/
target
+null
out
zips
model.yaml
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.scala
index fd77ef68..16e2fc6d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.scala
@@ -22,17 +22,9 @@ package org.apache.nlpcraft
*
* Parser instance can produce [[NCEntity]] instances with different types.
* Each [[NCEntity]] instance contains [[NCToken]] instances list and
- * each [[NCToken]] instance can belong to different [[NCEntity]] instances.
+ * each [[NCToken]] instance can belong to one or more different [[NCEntity]]
instances.
* Order of result entities list is not important.
*
- * Example. For [[NCToken tokens]] **San** and **Diego** can be found two
[[NCEntity entities]]:
- * - **City** entity which contains tokens **San** and **Diego**.
- * - **Name** entity which contains token **Diego**.
- *
- * **NOTE** that even if this parser instance produces [[NCEntity]]
instances with only one same type,
- * [[NCPipeline]] can contain multiple [[NCEntityParser]] instances, so
total result set of [[NCEntity]] instances can contain different
- * entities types. Based on this entities total result set the system
prepares [[NCVariant]] instances .
- *
* See [[NCPipeline]] for documentation on the overall processing pipeline.
Note that pipeline
* must have at least one entity parser.
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.scala
index 028555d5..80188ce4 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.scala
@@ -32,7 +32,6 @@ package org.apache.nlpcraft
* some aspects of your data models - your entire model and all of its
components are part of your project's
* source code.
*
- *
* @see [[NCModelClient]]
* @see [[NCModelConfig]]
* @see [[NCPipeline]]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index 170af870..102c1837 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -41,11 +41,11 @@ import NCNLPEntityParser.*
* - `nlp:entity:startCharIndex` - token text's first character index in the
input sentence.
* - `nlp:entity:endCharIndex` - token text 's last character index in the
input sentence.
*
- * Note that [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap
metadata]] properties from its
- * corresponding token with new name that is prefixed with
**'nlp:entity:'**. For example, for token property **prop**
- * the corresponding inherited entity property name will be
**nlp:entity:prop**.
+ * Note that [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap
metadata]] properties from its
+ * corresponding token with new name that is prefixed with **'nlp:entity:'**.
For example, for token property **prop**
+ * the corresponding inherited entity property name will be
**nlp:entity:prop**.
*
- * @param predicate Predicate which allows to filter list of converted
[[NCToken]] instances. Only tokens that
+ * @param predicate Predicate which allows to filter list of converted
[[NCToken]] instances. Only tokens that
* satisfy given predicate will convert to entity by this parser. By
default all [[NCToken]] instances are
* converted.
*/
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index 854c941e..4546777c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -33,8 +33,8 @@ import scala.language.postfixOps
import scala.util.Using
/**
- * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCEntityParser entity parser]] configured using
- * [[https://opennlp.apache.org/ OpenNLP]] **name finders** models.
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent
[[NCEntityParser entity parser]] configured using
+ * [[https://opennlp.apache.org/ OpenNLP]] **name finders** models.
*
* This parser prepares [[NCEntity]] instances which are detected by the
provided models.
* These entities are created with type `opennlp:modelName`, where
`modelName` is the model name.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index e9e9dd7c..85dece07 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -20,39 +20,32 @@ package org.apache.nlpcraft.nlp.parsers
import org.apache.nlpcraft.nlp.stemmer.NCStemmer
import org.apache.nlpcraft.*
/**
- * This trait is used in [[NCSemanticEntityParser]] and defines configuration
- * which contains synonyms for [[NCEntity]] detection and properties which
are used for new [[NCEntity]] instances creation.
+ * This trait defines a named [[NCEntity entity]] that is used by
[[NCSemanticEntityParser]].
*
- * In other words it denotes a [[NCEntity named entity]] which can be created
by [[NCSemanticEntityParser]].
+ * THe main purpose of this trait is to provide a set of synonyms by which
this named entity can be matched
+ * in the input text. Each synonym consists of one or more individual words.
Synonym matching is performed on the
+ * normalized and stemmatized forms of both a synonym and a user input on
first phase and if the first attempt was not
+ * successful, it tries to match stemmatized forms of synonyms with
lemmatized and the stemmatized forms of user input.
+ * This approach provides more accurate matching and doesn't force users to
provide synonyms in their initial words form.
*
- * The trait contains a set of synonyms to match on named entity.
- * A synonym can have one or more individual words.
* Note that element's type is its implicit synonym so that even if no
additional synonyms are defined at least one synonym
* always exists.
- * Note also that synonym matching is performed on normalized and stemmatized
forms of both a synonym and user input on
- * first phase and if first attempt is not successful, it tries to match
stemmatized forms of synonyms
- * with stemmatized forms of user input which were lemmatized preliminarily.
- * This approach allows to provide more accurate matching and doesn't force
users to prepare synonyms in initial words form.
*
- * Stemmetization.
- * Via one synonyms **argue** all following words *argued*, *argues* and
*arguing* are matched
- * by the same stem **argu**.
- * Note that you can control stemmatization aggression level by choosing
preferable algorithm,
+ * **1st Phase**: on the 1st phase [[NCSemanticEntityParser]] will use
stemmatized forms of both the synonym and the user input.
+ * For example, aa single synonyms **argue** will match all following words
**argued**, **argues** and **arguing** by utilizing
+ * the same stem **argu**.* Note that you can control stemmatization level by
choosing preferable algorithm,
* look at the following article
[[https://www.baeldung.com/cs/porter-vs-lancaster-stemming-algorithms
Differences Between Porter and Lancaster Stemming Algorithms]].
- * Also note please that stemmatization approach can be less or more useful
for different languages.
+ * Also note that stemmatization approach effectiveness varies depending on
the chosen languages.
*
- * Lemmatization.
- * If an element defined via synonym **go**, all following user input texts
are matched:
- * *go*, *gone*, *goes*, *went*. So, it is enough to define just synonym
initial word's forms.
+ * **2ng Phase**: at the second phase, if the 1st phase didn't produce a
match, [[NCSemanticEntityParser]] will try to use lemmatized and
+ * then stemmatized version of the user input against stemmatized form of the
synonym. For example, if an element is defined via
+ * synonym **go**, all following user input texts will be matched: **go**,
**gone**, **goes**, **went**. Note that it is enough to
+ * define just initial word's form for the synonym.
*
- * Beside described above synonyms, semantic element can also have an
optional set of special synonyms called values or "proper nouns" for this
element.
- * Unlike basic synonyms, each value is a pair of a name and a set of
standard synonyms by which that value,
- * and ultimately its element, can be recognized in the user input.
- * Note that the value name itself acts as an implicit synonym even when no
additional synonyms added for that value.
- *
- * So [[NCEntity named entity]] can be found via
[[NCSemanticElement.getSynonyms element synonyms]] or
- * [[NCSemanticElement.getValues element values]].
- * Other [[NCSemanticElement]] properties are passed into created
corresponded [[NCEntity]] instance.
+ * Beside described above synonyms, semantic element can also have an
optional set of special synonyms called values or
+ * "proper nouns" for this element. Unlike basic synonyms, each value is a
pair of a name and a set of standard synonyms
+ * by which that value, and ultimately its element, can be recognized in the
user input. Note that the value name itself
+ * acts as an implicit synonym even when no additional synonyms added for
that value.
*
* Example 1.
* <pre>
@@ -62,7 +55,7 @@ import org.apache.nlpcraft.*
* - "{menu|carte|card}"
* - "{products|goods|food|item|_} list"
* </pre>
- * Described above element **ord:menu** can be detected via synonyms:
*menu*, *products*, *products list* etc.
+ * This YAML representation describes semantic entity **ord:menu** that can
be detected via synonyms: *menu*, *products*, *products list* etc.
*
* Example 2.
* <pre>
@@ -73,13 +66,13 @@ import org.apache.nlpcraft.*
* "medium": [ "{medium|intermediate|normal|regular} {size|piece|_}" ]
* "large": [ "{big|biggest|large|max|maximum|huge|enormous}
{size|piece|_}" ]
* </pre>
- * Described above element **ord:pizza:size** can be detected via
values synonyms: *small*, *medium size*, *big piece* etc.
- * Note that **value** (*small*, *medium* or *large* in this example) is
passed in created [[NCEntity]] as property with key
- * **element-type:value** (*ord:pizza:size:value* in this
example).
+ * This YAML definition describes semantic entity **ord:pizza:size**
that can be detected via values synonyms: *small*, *medium size*,
+ * *big piece* etc. Note that **value** (*small*, *medium* or *large* in this
example) is passed in created [[NCEntity]] as
+ * a property with a key *element-type:value* (`ord:pizza:size:value` in this
example).
*
- * **NOTE** that given examples show how semantic elements synonyms and
values are represented via YAML format
- * when these elements passed in [[NCSemanticEntityParser]] via semantic
model resource definition,
- * but there aren't differences when semantic elements are defined via JSON
files or prepared programmatically.
+ * **NOTE**: these examples show how semantic elements can be defined via
YAML format
+ * when these elements passed in [[NCSemanticEntityParser]] via resource
definition,
+ * but there aren't any differences when semantic elements defined via
JSON/YAML files or prepared programmatically.
*
* See detailed description on the website
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic
Semantic Parser]].
*
@@ -87,42 +80,42 @@ import org.apache.nlpcraft.*
*/
trait NCSemanticElement:
/**
- * Gets **type** for created [[NCEntity]] instance.
- * Representation of [[NCEntity.getType]] method.
+ * Gets type of this element which will become type of the entity if this
element is detected.
*
* @return Element type.
+ * @see [[NCEntity.getType]]
*/
def getType: String
/**
- * Gets **groups** for created [[NCEntity]] instance.
- * Representation of [[NCEntity.getGroups]] method.
+ * Gets groups this element is member of. By default, this element
belongs to at least
+ * one group with the name of its [[getType type]].
*
* @return Groups.
+ * @see [[NCEntity.getGroups]]
*/
def getGroups: Set[String] = Set(getType)
/**
- * Gets values map. Each element can contain multiple value,
+ * Gets values map. Each element can have zero or more values,
* each value is described as name and list of its synonyms.
- * They allows to find element's value in text.
- * Note that macros can be used for synonyms definition.
+ * Note that macros can be used for synonyms definition, i.e. returned
synonyms can contain references to macros.
*
* @return Values.
*/
def getValues: Map[String, Set[String]] = Map.empty
/**
- * Gets elements synonyms list. They allows to find element in text.
- * Note that macros can be used for synonyms definition, so returned
synonyms can contain references to macroses.
+ * Gets element's synonyms.
+ * Note that macros can be used for synonyms definition, i.e. returned
synonyms can contain references to macros.
*
* @return Synonyms.
*/
def getSynonyms: Set[String] = Set.empty
/**
- * Gets [[NCPropertyMap metadata]] property for created [[NCEntity]]
instance.
+ * Gets [[NCPropertyMap metadata]] that will be passed to [[NCEntity]]
instance if this element is detected.
*
- * @return Groups.
+ * @return Metadata.
*/
def getProperties: Map[String, AnyRef] = Map.empty