This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 33ad4d9  WIP.
33ad4d9 is described below

commit 33ad4d9c6fff2bd5220358dfede138afe467e67d
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Dec 7 18:22:42 2021 -0800

    WIP.
---
 .../main/scala/org/apache/nlpcraft/NCEntity.java   | 121 +-----------
 .../{NCEntityParser.java => NCEntityEnricher.java} |  25 ++-
 .../scala/org/apache/nlpcraft/NCEntityParser.java  |   3 +-
 .../scala/org/apache/nlpcraft/NCModelConfig.java   | 217 ++-------------------
 .../{NCToken.java => NCParameterized.java}         |  65 +++---
 .../main/scala/org/apache/nlpcraft/NCToken.java    |   8 +-
 .../{NCEntityParser.java => NCTokenEnricher.java}  |  21 +-
 7 files changed, 105 insertions(+), 355 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
index 35aac20..6f2899e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
@@ -22,109 +22,25 @@ import java.util.List;
 /**
  *
  */
-public interface NCEntity {
-    /**
-     * Gets ID of the request this entity is part of.
-     *
-     * @return ID of the request this entity is part of.
-     */
-    String getRequestId();
-
+public interface NCEntity extends NCParameterized {
     /**
      *
      * @return
      */
-    String getId();
-
-    /**
-     * Gets the optional parent ID of the model element this entity 
represents. This only available
-     * for user-defined model elements - built-in entities do not have parents 
and this will return {@code null}.
-     *
-     * @return ID of the entity's element immediate parent or {@code null} if 
not available.
-     * @see NCElement#getParentId()
-     * @see #getAncestors()
-     */
-    String getParentId();
-
-    /**
-     * Gets the list of all parent IDs from this entity up to the root. This 
only available
-     * for user-defined model elements = built-in entities do not have parents 
and will return an empty list.
-     *
-     * @return List, potentially empty but never {@code null}, of all parent 
IDs from this entity up to the root.
-     * @see #getParentId()
-     */
-    List<String> getAncestors();
+    List<NCToken> getTokens();
 
     /**
-     * Tests whether this entity is a child of given entity ID. It is 
equivalent to:
-     * <pre class="brush: java">
-     *     return getAncestors().contains(tokId);
-     * </pre>
-     *
-     * @param tokId Ancestor entity ID.
-     * @return <code>true</code> this entity is a child of given entity ID, 
<code>false</code> otherwise.
-     */
-    default boolean isChildOf(String tokId) {
-        return getAncestors().contains(tokId);
-    }
-
-    /**
-     * Gets the value if this entity was detected via element's value (or its 
synonyms). Otherwise,
-     * returns {@code null}. Only applicable for user-defined model elements - 
built-in entities
-     * do not have values, and it will return {@code null}.
-     *
-     * @return Value for the user-defined model element or {@code null}, if 
not available.
-     * @see NCElement#getValues()
-     */
-    String getValue();
-
-    /**
-     * Gets the list of groups this entity belongs to. Note that, by default, 
if not specified explicitly,
-     * entity always belongs to one group with ID equal to entity ID.
-     *
-     * @return entity groups list. Never {@code null} - but can be empty.
-     * @see NCElement#getGroups()
-     */
-    List<String> getGroups();
-
-    /**
-     * Tests whether this entity belongs to the given group. It is equivalent 
to:
-     * <pre class="brush: java">
-     *      return getGroups().contains(grp);
-     * </pre>
-     *
-     * @param grp Group to test.
-     * @return <code>True</code> if this entity belongs to the group 
<code>grp</code>, {@code false} otherwise.
-     */
-    default boolean isMemberOf(String grp) {
-        return getGroups().contains(grp);
-    }
-
-    /**
-     * Gets start character index of this entity in the original text.
-     *
-     * @return Start character index of this entity.
-     */
-    int getStartCharIndex();
-
-    /**
-     * Gets end character index of this entity in the original text.
+     * Gets ID of the request this entity is part of.
      *
-     * @return End character index of this entity.
+     * @return ID of the request this entity is part of.
      */
-    int getEndCharIndex();
+    String getRequestId();
 
     /**
      *
-     * @return Whether this entity is a stopword.
-     */
-    boolean isStopWord();
-    
-    /**
-     *
-     * @return Original user input text for this entity.
+     * @return
      */
-    String getOriginalText();
+    String getId();
 
     /**
      *
@@ -133,29 +49,6 @@ public interface NCEntity {
     int getIndex();
 
     /**
-     *
-     * @return Normalized user input text for this entity.
-     */
-    String getNormalizedText();
-    /**
-     *
-     * @return Lemma of this entity, i.e. a canonical form of this word.
-     */
-    String getLemma();
-
-    /**
-     *
-     * @return Stem of this entity.
-     */
-    String getStem();
-
-    /**
-     *
-     * @return Penn Treebank POS tag for this entity.
-     */
-    String getPos();
-
-    /**
      * A shortcut method that gets internal globally unique system ID of the 
entity.
      * <p>
      * This method is equivalent to:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java
similarity index 57%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java
index fedf377..ccc49f2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityEnricher.java
@@ -15,21 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft;
+package org.apache.nlpcraft;/*
+   _________            ______________
+   __  ____/_______________  __ \__  /_____ _____  __
+   _  /    _  __ \_  ___/_  /_/ /_  /_  __ `/_  / / /
+   / /___  / /_/ /(__  )_  ____/_  / / /_/ /_  /_/ /
+   \____/  \____//____/ /_/     /_/  \__,_/ _\__, /
+                                            /____/
 
-import java.util.List;
+          2D ASCII JVM GAME ENGINE FOR SCALA3
+              (C) 2021 Rowan Games, Inc.
+                ALl rights reserved.
+*/
+
+import java.util.*;
 
 /**
  *
  */
-public interface NCEntityParser {
+public interface NCEntityEnricher {
     /**
      *
      * @param req
-     * @param cfg
-     * @param toks
-     * @param ents List of already parsed entities prio to this step. Can be 
empty but never {@code null}.
-     * @return
+     * @param cfg 
+     * @param ents
      */
-    List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks, 
List<NCEntity> ents);
+    void enrich(NCRequest req, NCModelConfig cfg, List<NCEntity> ents);
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
index fedf377..d58dbc8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
@@ -28,8 +28,7 @@ public interface NCEntityParser {
      * @param req
      * @param cfg
      * @param toks
-     * @param ents List of already parsed entities prio to this step. Can be 
empty but never {@code null}.
      * @return
      */
-    List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks, 
List<NCEntity> ents);
+    List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks);
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index fa6f4b9..998cb16 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -22,17 +22,7 @@ import java.util.*;
 /**
  *
  */
-public interface NCModelConfig {
-    /**
-     * Default value for {@link #getMinWords()} method.
-     */
-    int DFLT_MIN_WORDS = 1;
-
-    /**
-     * Default value for {@link #getMaxWords()} method.
-     */
-    int DFLT_MAX_WORDS = 50;
-
+public interface NCModelConfig extends NCParameterized {
     /**
      * Default value for {@link #getMinTokens()} method.
      */
@@ -54,15 +44,22 @@ public interface NCModelConfig {
     boolean DFLT_IS_NOT_LATIN_CHARSET_ALLOWED = false;
 
     /**
-     * Default value for {@link #isSwearWordsAllowed()} method.
+     *
+     * @return
+     */
+    NCTokenParser getTokenParser();
+
+    /**
+     *
+     * @return
      */
-    boolean DFLT_IS_SWEAR_WORDS_ALLOWED = false;
+    List<NCTokenEnricher> getTokenEnrichers();
 
     /**
      *
      * @return
      */
-    NCTokenParser getTokenParser();
+    List<NCEntityEnricher> getEntityEnrichers();
 
     /**
      *
@@ -72,52 +69,20 @@ public interface NCModelConfig {
 
     /**
      * Gets unique, <i>immutable</i> ID of this model.
-     * <p>
-     * Note that <b>model IDs are immutable</b> while name and version
-     * can be changed freely. Changing model ID is equal to creating a 
completely new model.
-     * Model IDs (unlike name and version) are not exposed to the end user and 
only serve a
-     * technical purpose. ID's max length is 32 characters.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by <code>id</code> 
property:
-     * <pre class="brush: js">
-     * {
-     *      "id": "my.model.id"
-     * }
-     * </pre>
      *
      * @return Unique, <i>immutable</i> ID of this model.
      */
     String getId();
 
     /**
-     * Gets descriptive name of this model. Name's max length is 64 characters.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by <code>name</code> 
property:
-     * <pre class="brush: js">
-     * {
-     *      "name": "My Model"
-     * }
-     * </pre>
+     * Gets descriptive name of this model.
      *
      * @return Descriptive name for this model.
      */
     String getName();
 
     /**
-     * Gets the version of this model using semantic versioning. Version's max 
length is 16 characters.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>version</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "version": "1.0.0"
-     * }
-     * </pre>
+     * Gets the version of this model using semantic versioning.
      *
      * @return A version compatible with (<a 
href="http://www.semver.org";>www.semver.org</a>) specification.
      */
@@ -126,15 +91,6 @@ public interface NCModelConfig {
     /**
      * Gets optional short model description. This can be displayed by the 
management tools.
      * Default implementation retusrns <code>null</code>.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>description</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "description": "Model description..."
-     * }
-     * </pre>
      *
      * @return Optional short model description. Can return <code>null</code>.
      */
@@ -153,169 +109,36 @@ public interface NCModelConfig {
     }
 
     /**
-     * Gets minimum word count (<i>including</i> stopwords) below which user 
input will be automatically
-     * rejected as too short. In almost all cases this value should be greater 
than or equal to one.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link #DFLT_MIN_WORDS} 
will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>minWords</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "minWords": 2
-     * }
-     * </pre>
      *
-     * @return Minimum word count (<i>including</i> stopwords) below which 
user input will be automatically
-     * rejected as too short.
-     */
-    default int getMinWords() {
-        return DFLT_MIN_WORDS;
-    }
-
-    /**
-     * Gets maximum word count (<i>including</i> stopwords) above which user 
input will be automatically
-     * rejected as too long. In almost all cases this value should be greater 
than or equal to one.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link #DFLT_MAX_WORDS} 
will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>maxWords</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "maxWords": 50
-     * }
-     * </pre>
-     *
-     * @return Maximum word count (<i>including</i> stopwords) above which 
user input will be automatically
-     * rejected as too long.
-     */
-    default int getMaxWords() {
-        return DFLT_MAX_WORDS;
-    }
-
-    /**
-     * Gets minimum number of all tokens (system and user defined) below which 
user input will be
-     * automatically rejected as too short. In almost all cases this value 
should be greater than or equal to one.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link #DFLT_MIN_TOKENS} 
will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>minTokens</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "minTokens": 1
-     * }
-     * </pre>
-     *
-     * @return Minimum number of all tokens.
+     * @return
      */
     default int getMinTokens() {
         return DFLT_MIN_TOKENS;
     }
 
     /**
-     * Gets maximum number of all tokens (system and user defined) above which 
user input will be
-     * automatically rejected as too long. Note that sentences with large 
number of token can result
-     * in significant processing delay and substantial memory consumption.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link #DFLT_MAX_TOKENS} 
will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>maxTokens</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "maxTokens": 100
-     * }
-     * </pre>
      *
-     * @return Maximum number of all tokens.
+     * @return
      */
     default int getMaxTokens() {
         return DFLT_MAX_TOKENS;
     }
 
+    int getMaxStopWords();
+
     /**
-     * Gets minimum word count (<i>excluding</i> stopwords) below which user 
input will be automatically rejected
-     * as ambiguous sentence.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link 
#DFLT_MIN_NON_STOPWORDS} will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>minNonStopwords</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "minNonStopwords": 2
-     * }
-     * </pre>
      *
-     * @return Minimum word count (<i>excluding</i> stopwords) below which 
user input will be automatically
-     * rejected as too short.
+     * @return
      */
-    default int getMinNonStopwords() {
+    default int getMinNonStopWords() {
         return DFLT_MIN_NON_STOPWORDS;
     }
 
     /**
-     * Whether to allow non-Latin charset in user input. Currently, only
-     * Latin charset is supported. However, model can choose whether to 
automatically reject user
-     * input with characters outside of Latin charset. If {@code false} such 
user input will be automatically
-     * rejected.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link 
#DFLT_IS_NOT_LATIN_CHARSET_ALLOWED} will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>nonLatinCharsetAllowed</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "nonLatinCharsetAllowed": false
-     * }
-     * </pre>
      *
-     * @return Whether to allow non-Latin charset in user input.
+     * @return
      */
     default boolean isNotLatinCharsetAllowed() {
         return DFLT_IS_NOT_LATIN_CHARSET_ALLOWED;
     }
-
-    /**
-     * Whether to allow known swear words in user input. If {@code false} - 
user input with
-     * detected known swear words will be automatically rejected.
-     * <p>
-     * <b>Default</b>
-     * <br>
-     * If not provided by the model the default value {@link 
#DFLT_IS_SWEAR_WORDS_ALLOWED} will be used.
-     * <p>
-     * <b>JSON</b>
-     * <br>
-     * If using JSON/YAML model presentation this is set by 
<code>swearWordsAllowed</code> property:
-     * <pre class="brush: js">
-     * {
-     *      "swearWordsAllowed": false
-     * }
-     * </pre>
-     *
-     * @return Whether to allow known swear words in user input.
-     */
-    default boolean isSwearWordsAllowed() {
-        return DFLT_IS_SWEAR_WORDS_ALLOWED;
-    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
similarity index 50%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
index 14927df..854fbba 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
@@ -15,57 +15,68 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft;
+package org.apache.nlpcraft;/*
+   _________            ______________
+   __  ____/_______________  __ \__  /_____ _____  __
+   _  /    _  __ \_  ___/_  /_/ /_  /_  __ `/_  / / /
+   / /___  / /_/ /(__  )_  ____/_  / / /_/ /_  /_/ /
+   \____/  \____//____/ /_/     /_/  \__,_/ _\__, /
+                                            /____/
+
+          2D ASCII JVM GAME ENGINE FOR SCALA3
+              (C) 2021 Rowan Games, Inc.
+                ALl rights reserved.
+*/
+
+import java.util.Optional;
 
 /**
  *
  */
-public interface NCToken {
-    /**
-     *
-     * @return
-     */
-    String getOriginalText();
-
+public interface NCParameterized {
     /**
-     *
+     * 
+     * @param key
+     * @param <T>
      * @return
      */
-    String getNormalizedText();
+    <T> T get(String key);
 
     /**
-     *
+     * 
+     * @param key
+     * @param <T>
      * @return
      */
-    String getLemma();
+    <T> Optional<T> getOpt(String key);
 
     /**
-     *
-     * @return
-     */
-    String getStem();
-
-    /**
-     *
-     * @return
+     * 
+     * @param key
+     * @param obj
      */
-    String getPos();
+    void put(String key, Object obj);
 
     /**
-     *
+     * 
+     * @param key
+     * @param obj
+     * @param <T>
      * @return
      */
-    int getStartCharIndex();
+    <T> T putIfAbsent(String key, T obj);
 
     /**
-     *
+     * 
+     * @param key
      * @return
      */
-    int getEndCharIndex();
+    boolean contains(String key);
 
     /**
-     *
+     * 
+     * @param key
      * @return
      */
-    int getLength();
+    boolean remove(String key);
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
index 14927df..bcb0aa5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
@@ -20,7 +20,7 @@ package org.apache.nlpcraft;
 /**
  *
  */
-public interface NCToken {
+public interface NCToken extends NCParameterized {
     /**
      *
      * @return
@@ -55,6 +55,12 @@ public interface NCToken {
      *
      * @return
      */
+    boolean isStopWord();
+
+    /**
+     *
+     * @return
+     */
     int getStartCharIndex();
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java
similarity index 60%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java
index fedf377..a26abe7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenEnricher.java
@@ -15,21 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft;
+package org.apache.nlpcraft;/*
+   _________            ______________
+   __  ____/_______________  __ \__  /_____ _____  __
+   _  /    _  __ \_  ___/_  /_/ /_  /_  __ `/_  / / /
+   / /___  / /_/ /(__  )_  ____/_  / / /_/ /_  /_/ /
+   \____/  \____//____/ /_/     /_/  \__,_/ _\__, /
+                                            /____/
 
-import java.util.List;
+          2D ASCII JVM GAME ENGINE FOR SCALA3
+              (C) 2021 Rowan Games, Inc.
+                ALl rights reserved.
+*/
+
+import java.util.*;
 
 /**
  *
  */
-public interface NCEntityParser {
+public interface NCTokenEnricher {
     /**
      *
      * @param req
      * @param cfg
      * @param toks
-     * @param ents List of already parsed entities prio to this step. Can be 
empty but never {@code null}.
-     * @return
      */
-    List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks, 
List<NCEntity> ents);
+    void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks);
 }

Reply via email to