This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-468
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-468 by this push:
new 68a1c67 WIP.
68a1c67 is described below
commit 68a1c67fdf37b7afeafdd7119c5e398f307940d2
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Oct 13 13:51:59 2021 +0300
WIP.
---
.../model/components/detectors/NCConfiguredWordsDetector.java | 3 ++-
.../model/components/detectors/NCDefaultStopWordsDetector.java | 5 +++++
.../model/components/detectors/NCDefaultSwearWordsDetector.java | 2 ++
.../nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java | 2 ++
.../nlpcraft/model/components/ner/synonyms/NCSynonymsNerParser.java | 2 ++
.../nlpcraft/model/components/tokenizer/NCOpenNlpTokenizer.java | 2 ++
6 files changed, 15 insertions(+), 1 deletion(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCConfiguredWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCConfiguredWordsDetector.java
index 100aa3a..5cd7c13 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCConfiguredWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCConfiguredWordsDetector.java
@@ -28,7 +28,8 @@ import java.util.Set;
import java.util.stream.Collectors;
/**
- *
+ * Easy implementation which allows to configute words set for detection.
+ * Language - independent.
*/
public class NCConfiguredWordsDetector implements NCNlpWordsDetector {
private final Set<String> words;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
index 7b6c0a1..7ba8a24 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
@@ -28,6 +28,11 @@ import java.util.Set;
/**
* Stopwords detector default implementation.
+ * Can be additionally configured with:
+ * - additional words set and
+ * - excluded words set.
+ *
+ * Language - EN.
*/
public class NCDefaultStopWordsDetector implements NCNlpWordsDetector {
private Set<String> additional;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
index ecb5af8..a564203 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
@@ -27,6 +27,8 @@ import java.util.List;
/**
* Swearwords detector default implementation.
+ *
+ * Language - EN.
*/
public class NCDefaultSwearWordsDetector implements NCNlpWordsDetector {
@Override
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
index 42fbfc6..04c959b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/opennlp/NCOpenNlpNerParser.java
@@ -28,6 +28,8 @@ import java.util.Set;
/**
* NERs implementation based on OpenNlp NERs.
+ *
+ * Language - EN.
*/
public class NCOpenNlpNerParser implements NCNlpNerParser {
private final Set<String> supportedNerNames;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/synonyms/NCSynonymsNerParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/synonyms/NCSynonymsNerParser.java
index 9c05476..ecc53db 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/synonyms/NCSynonymsNerParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/ner/synonyms/NCSynonymsNerParser.java
@@ -26,6 +26,8 @@ import java.util.Set;
/**
* Nlpcraft synonyms based NER provider.
+ *
+ * Language - independent.
*/
public interface NCSynonymsNerParser extends NCNlpNerParser {
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/tokenizer/NCOpenNlpTokenizer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/tokenizer/NCOpenNlpTokenizer.java
index 22902ba..3c19e0e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/tokenizer/NCOpenNlpTokenizer.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/tokenizer/NCOpenNlpTokenizer.java
@@ -26,6 +26,8 @@ import java.util.List;
/**
* Default tokenizer based on OpenNlp.
* Can be renamed to DefaultTokenizer.
+ *
+ * Language - EN.
*/
public class NCOpenNlpTokenizer implements NCNlpTokenizer {
@Override