This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-468
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-468 by this push:
new 24cf418 WIP.
24cf418 is described below
commit 24cf418106ffba1aa972d7d08abfe76b95346ae6
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Oct 13 09:18:06 2021 +0300
WIP.
---
.../org/apache/nlpcraft/model/NCModelConfig.java | 6 ++--
.../model/builders/NCModelConfigBuilder.java | 1 +
.../detectors/NCDefaultStopWordsDetector.java | 15 ++++++++-
.../detectors/NCDefaultSwearWordsDetector.java | 4 ++-
.../detectors/NCSimpleWordsDetector.java | 4 ++-
.../nlpcraft/model/nlp/NCNlpWordsDetector.java | 8 +++--
.../src/test/java/org/apache/nlpcraft/NCSpec.java | 36 +++++-----------------
7 files changed, 36 insertions(+), 38 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
index 25f437c..094a7f0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
@@ -691,15 +691,13 @@ public interface NCModelConfig {
default int getConversationDepth() { return DFLT_CONV_DEPTH; }
/**
- * // TODO: add javadoc
+ * // TODO: add javadoc or drop it.
* @return TBD
*/
default boolean isStopWordsAllowed() {
return DFLT_IS_STOPWORDS_ALLOWED;
}
- // TODO: dropped. getSuspiciousWords,
- // TODO: dropped - getAdditionalStopWords, getExcludedStopWords - just
override if necessary getStopWordsDetector.
default NCNlpWordsDetector getStopWordsDetector() {
return new NCDefaultStopWordsDetector();
}
@@ -710,6 +708,8 @@ public interface NCModelConfig {
return null;
}
+ // TODO: dropped. getSuspiciousWord (use your own or
org.apache.nlpcraft.model.components.detectors.NCSimpleWordsDetector)
+ // TODO: dropped - getAdditionalStopWords, getExcludedStopWords (liik at
API NCDefaultStopWordsDetector)
// /**
// * Gets an optional list of suspicious words. A suspicious word is a
word that generally should not appear in user
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
index 6d083a4..e27b486 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
@@ -79,6 +79,7 @@ public class NCModelConfigBuilder {
// 2. Specail words detectors. Free implementation supported.
// Default used -
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector.
+ // (it supports extra and excluded words set)
public NCModelConfigBuilder withStopWordsDetector(NCNlpWordsDetector
stopWordsDetector) {
return this;
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
index b81fd1f..d410d65 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model.components.detectors;
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
import org.apache.nlpcraft.model.nlp.NCNlpWord;
import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
@@ -27,8 +29,19 @@ import java.util.List;
* Stopwords detector default implementation.
*/
public class NCDefaultStopWordsDetector implements NCNlpWordsDetector {
+ private Set<String> additional;
+ private Set<String> excluded;
+
+ public NCDefaultStopWordsDetector() {
+ }
+
+ public NCDefaultStopWordsDetector(Set<String> additional, Set<String>
excluded) {
+ this.additional = additional;
+ this.excluded = excluded;
+ }
+
@Override
- public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+ public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg,
List<NCNlpWord> sen) {
return Collections.emptyList();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
index a51c8f1..ecb5af8 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model.components.detectors;
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
import org.apache.nlpcraft.model.nlp.NCNlpWord;
import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
@@ -28,7 +30,7 @@ import java.util.List;
*/
public class NCDefaultSwearWordsDetector implements NCNlpWordsDetector {
@Override
- public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+ public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg,
List<NCNlpWord> sen) {
return Collections.emptyList();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
index 712bdb6..427a10b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model.components.detectors;
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
import org.apache.nlpcraft.model.nlp.NCNlpWord;
import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
@@ -36,7 +38,7 @@ public class NCSimpleWordsDetector implements
NCNlpWordsDetector {
}
@Override
- public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+ public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg,
List<NCNlpWord> sen) {
// TODO: stems, normal form.
return sen.stream().filter(p ->
words.contains(p.getWord())).collect(Collectors.toList());
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
index bd79078..106b956 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model.nlp;
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
import org.apache.nlpcraft.model.nlp.NCNlpWord;
import java.util.List;
@@ -28,8 +30,6 @@ import java.util.List;
* -
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector,
* -
org.apache.nlpcraft.model.components.detectors.NCDefaultSwearWordsDetector
*
- * If you want to extend logic by additionally and excluded stop words - it
can be impemented by overriding default solution.
- *
* Custom and language related solutions should be implemented and set in
model configuration.
*
*/
@@ -37,8 +37,10 @@ public interface NCNlpWordsDetector {
/**
* Finds words by some criteria in given words list.
*
+ * @param req
+ * @param cfg
* @param sen
* @return
*/
- List<NCNlpWord> detect(List<NCNlpWord> sen);
+ List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg, List<NCNlpWord>
sen);
}
diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
index 880e466..e14a149 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
@@ -22,6 +22,7 @@ import org.apache.nlpcraft.model.NCModel;
import org.apache.nlpcraft.model.NCModelBehaviour;
import org.apache.nlpcraft.model.NCRejection;
import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
import org.apache.nlpcraft.model.NCResult;
import org.apache.nlpcraft.model.builders.NCModelConfigBuilder;
import
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector;
@@ -61,34 +62,6 @@ public class NCSpec {
}
}
- private static class CustomStopWordsDetector implements NCNlpWordsDetector
{
- private final static Set<String> ADDITIONAL = new HashSet<>() {
- {
- add("stop1");
- add("stop2");
- }
- };
-
- private final static NCNlpWordsDetector STANDARD = new
NCDefaultStopWordsDetector();
-
- @Override
- public List<NCNlpWord> detect(List<NCNlpWord> sen) {
- return
- Stream.concat(
- STANDARD.detect(sen).stream(),
- sen.stream().filter(p ->
ADDITIONAL.contains(p.getWord()))
- ).
- distinct().
- sorted(
- new Comparator<NCNlpWord>() {
- @Override
- public int compare(NCNlpWord w1, NCNlpWord w2) {
- return Integer.compare(w1.getStart(),
w2.getStart());
- }
- }
- ).collect(Collectors.toList());
- }
- }
@Test
public void test() throws Exception {
@@ -126,7 +99,12 @@ public class NCSpec {
withId("modleId").
withName("name").
withSuspiciousWordsDetector(new
NCSimpleWordsDetector(Collections.singleton("bad"))).
- withStopWordsDetector(new CustomStopWordsDetector()).
+ withStopWordsDetector(
+ new NCDefaultStopWordsDetector(
+ new HashSet<>() {{ add("stop1"); add("stop2"); }}
+ null
+ )
+ ).
// Nlp tokenizer.
withTokenizer(new NCOpenNlpTokenizer()).
// NERs.