This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-468
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-468 by this push:
new b4339af WIP.
b4339af is described below
commit b4339aff0fe09e24cf24f35ff25980ac7e06159c
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Oct 13 12:17:39 2021 +0300
WIP.
---
.../components/ner/opennlp/NCOpenNlpNerParser.java | 7 +++++
.../scala/org/apache/nlpcraft/model/interfaces.txt | 30 ++++++++++++----------
.../src/test/java/org/apache/nlpcraft/NCSpec.java | 8 +++++-
3 files changed, 31 insertions(+), 14 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
index fa11313..42fbfc6 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
@@ -24,11 +24,18 @@ import org.apache.nlpcraft.model.nlp.NCNlpToken;
import org.apache.nlpcraft.model.nlp.NCNlpWord;
import java.util.List;
+import java.util.Set;
/**
* NERs implementation based on OpenNlp NERs.
*/
public class NCOpenNlpNerParser implements NCNlpNerParser {
+ private final Set<String> supportedNerNames;
+
+ public NCOpenNlpNerParser(Set<String> supportedNerNames) {
+ this.supportedNerNames = supportedNerNames;
+ }
+
@Override
public List<NCNlpToken> parse(NCRequest req, NCModelConfig cfg,
List<NCNlpWord> words, List<NCNlpToken> elements) {
return null;
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
index 1f8fc28..f260949 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/interfaces.txt
@@ -15,14 +15,15 @@
# limitations under the License.
#
-Interfaces (pluggable components). All of them already have built-in
implementations
+Interfaces (pluggable components). All of them have built-in implementations.
1. Text-to-words tokenizer - org.apache.nlpcraft.model.nlp.NCNlpTokenizer.
Delivered:
- org.apache.nlpcraft.model.components.tokenizer.NCOpenNlpTokenizer (not
configured)
- Stanford impl (not configured)
+Mandatory.
Default in config - NCOpenNlpTokenizer.
-When user need to implement own:
+When user needs to implement his own:
- own logic required (for example `opennlp` implementation is not satisfied,
and `stanford` license is not suitable)
- new languages support
@@ -30,27 +31,30 @@ When user need to implement own:
Delivered:
- org.apache.nlpcraft.model.components.ner.opennlp.NCOpenNlpNerParser
(configured from predefined set of supported NERS: DATE, PERSON etc)
- stanford impl (configured from predefined set of supported NERS: DATE,
PERSON etc)
- - org.apache.nlpcraft.model.components.ner.synonyms.NCSynonymsNerParser
(configured by synonyms, macros ect)
+ - org.apache.nlpcraft.model.components.ner.synonyms.NCSynonymsNerParser
(configured by synonyms, macros etc)
+Optional (if null, the system provides only NLP information, and NCToken is
same as NCWord).
Default in config - empty list.
-When user need to implement own:
- - custom elements' detection logic support, which cannot be defined by
NCSynonymsNerParser
+When user needs to implement his own:
+ - custom elements' detection logic support, which cannot be defined by
provided NCSynonymsNerParser.
- wrappers under existing NERS like Spacy
3. Special words finder - org.apache.nlpcraft.model.nlp.NCNlpWordsDetector.
Delivered:
- org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector
(EN, configured with additional and excluded words set)
- org.apache.nlpcraft.model.components.detectors.NCDefaultSwearWordsDetector
(EN, not configured)
- - org.apache.nlpcraft.model.components.detectors.NCConfiguredWordsDetector
(configured simple way with words set)
-Default in config - NCDefaultStopWordsDetector, NCDefaultSwearWordsDetector
-(`suspicious` detector is not set by default. Can be configured by
NCConfiguredWordsDetector)
-When user need to implement own:
- - own sophisticated logic implementation, which cannot be configured by
NCConfiguredWordsDetector
+ - org.apache.nlpcraft.model.components.detectors.NCConfiguredWordsDetector
(configured simple way via words set)
+Optional (if null, stop, swear and suspicious words are not detected, these
properties set are `false`).
+Default in config - NCDefaultStopWordsDetector, NCDefaultSwearWordsDetector.
+(`suspicious` detector is not set by default. Can be configured if necessary
by NCConfiguredWordsDetector)
+When user needs to implement his own:
+ - own sophisticated logic implementation, which cannot be configured by
NCConfiguredWordsDetector.
- new languages support
4. org.apache.nlpcraft.model.NCModelBehaviour
Delivered:
- No special implementation, this interface has all empty default methods.
+Optional.
Default in config - empty implementation.
-When user need to implement own:
- - when system used without intents
- - some tricks, even using intents
\ No newline at end of file
+When user needs to implement his own:
+ - when the system used without intents
+ - if user needs some tricks, even using intents
\ No newline at end of file
diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
index 511813c..b1acb3f 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
@@ -101,7 +101,13 @@ public class NCSpec {
// Nlp tokenizer.
withTokenizer(new NCOpenNlpTokenizer()).
// NERs.
- withNerParsers(Arrays.asList(new NCOpenNlpNerParser(), ner1,
ner2)).
+ withNerParsers(
+ Arrays.asList(
+ new NCOpenNlpNerParser(new HashSet<>() {{ add("DATE");
add("PERSON") ;}}),
+ ner1,
+ ner2
+ )
+ ).
getConfig();
NCModel mdl =