This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-468
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-468 by this push:
     new b4339af  WIP.
b4339af is described below

commit b4339aff0fe09e24cf24f35ff25980ac7e06159c
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Oct 13 12:17:39 2021 +0300

    WIP.
---
 .../components/ner/opennlp/NCOpenNlpNerParser.java |  7 +++++
 .../scala/org/apache/nlpcraft/model/interfaces.txt | 30 ++++++++++++----------
 .../src/test/java/org/apache/nlpcraft/NCSpec.java  |  8 +++++-
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
index fa11313..42fbfc6 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
@@ -24,11 +24,18 @@ import org.apache.nlpcraft.model.nlp.NCNlpToken;
 import org.apache.nlpcraft.model.nlp.NCNlpWord;
 
 import java.util.List;
+import java.util.Set;
 
 /**
  * NERs implementation based on OpenNlp NERs.
  */
 public class NCOpenNlpNerParser implements NCNlpNerParser {
+    private final Set<String> supportedNerNames;
+
+    public NCOpenNlpNerParser(Set<String> supportedNerNames) {
+        this.supportedNerNames = supportedNerNames;
+    }
+
     @Override
     public List<NCNlpToken> parse(NCRequest req, NCModelConfig cfg, 
List<NCNlpWord> words, List<NCNlpToken> elements) {
         return null;
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
index 1f8fc28..f260949 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
@@ -15,14 +15,15 @@
 # limitations under the License.
 #
 
-Interfaces (pluggable components). All of them already have built-in 
implementations
+Interfaces (pluggable components). All of them have built-in implementations.
 
 1. Text-to-words tokenizer - org.apache.nlpcraft.model.nlp.NCNlpTokenizer.
 Delivered:
   - org.apache.nlpcraft.model.components.tokenizer.NCOpenNlpTokenizer (not 
configured)
   - Stanford impl (not configured)
+Mandatory.
 Default in config - NCOpenNlpTokenizer.
-When user need to implement own:
+When user needs to implement his own:
  - own logic required (for example `opennlp` implementation is not satisfied, 
and `stanford` license is not suitable)
  - new languages support
 
@@ -30,27 +31,30 @@ When user need to implement own:
 Delivered:
   - org.apache.nlpcraft.model.components.ner.opennlp.NCOpenNlpNerParser 
(configured from predefined set of supported NERS: DATE, PERSON etc)
   - stanford impl (configured from predefined set of supported NERS: DATE, 
PERSON etc)
-  - org.apache.nlpcraft.model.components.ner.synonyms.NCSynonymsNerParser 
(configured by synonyms, macros ect)
+  - org.apache.nlpcraft.model.components.ner.synonyms.NCSynonymsNerParser 
(configured by synonyms, macros etc)
+Optional (if null, the system provides only NLP information, and NCToken is 
same as NCWord).
 Default in config - empty list.
-When user need to implement own:
-  - custom elements' detection logic support, which cannot be defined by 
NCSynonymsNerParser
+When user needs to implement his own:
+  - custom elements' detection logic support, which cannot be defined by 
provided NCSynonymsNerParser.
   - wrappers under existing NERS like Spacy
 
 3. Special words finder - org.apache.nlpcraft.model.nlp.NCNlpWordsDetector.
 Delivered:
   - org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector 
(EN, configured with additional and excluded words set)
   - org.apache.nlpcraft.model.components.detectors.NCDefaultSwearWordsDetector 
(EN, not configured)
-  - org.apache.nlpcraft.model.components.detectors.NCConfiguredWordsDetector 
(configured simple way with words set)
-Default in config - NCDefaultStopWordsDetector, NCDefaultSwearWordsDetector
-(`suspicious` detector is not set by default. Can be configured by 
NCConfiguredWordsDetector)
-When user need to implement own:
-  - own sophisticated logic implementation, which cannot be configured by 
NCConfiguredWordsDetector
+  - org.apache.nlpcraft.model.components.detectors.NCConfiguredWordsDetector 
(configured simple way via words set)
+Optional (if null, stop, swear and suspicious words are not detected, these 
properties set are `false`).
+Default in config - NCDefaultStopWordsDetector, NCDefaultSwearWordsDetector.
+(`suspicious` detector is not set by default. Can be configured if necessary 
by NCConfiguredWordsDetector)
+When user needs to implement his own:
+  - own sophisticated logic implementation, which cannot be configured by 
NCConfiguredWordsDetector.
   - new languages support
 
 4. org.apache.nlpcraft.model.NCModelBehaviour
 Delivered:
    - No special implementation, this interface has all empty default methods.
+Optional.
 Default in config - empty implementation.
-When user need to implement own:
-  - when system used without intents
-  - some tricks, even using intents
\ No newline at end of file
+When user needs to implement his own:
+  - when the system used without intents
+  - if user needs some tricks, even using intents
\ No newline at end of file
diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java 
b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
index 511813c..b1acb3f 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
@@ -101,7 +101,13 @@ public class NCSpec {
                 // Nlp tokenizer.
                 withTokenizer(new NCOpenNlpTokenizer()).
                 // NERs.
-                withNerParsers(Arrays.asList(new NCOpenNlpNerParser(), ner1, 
ner2)).
+                withNerParsers(
+                    Arrays.asList(
+                        new NCOpenNlpNerParser(new HashSet<>() {{ add("DATE"); 
add("PERSON") ;}}),
+                        ner1,
+                        ner2
+                    )
+                ).
                 getConfig();
 
         NCModel mdl =

Reply via email to