This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-468
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-468 by this push:
     new 24cf418  WIP.
24cf418 is described below

commit 24cf418106ffba1aa972d7d08abfe76b95346ae6
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Oct 13 09:18:06 2021 +0300

    WIP.
---
 .../org/apache/nlpcraft/model/NCModelConfig.java   |  6 ++--
 .../model/builders/NCModelConfigBuilder.java       |  1 +
 .../detectors/NCDefaultStopWordsDetector.java      | 15 ++++++++-
 .../detectors/NCDefaultSwearWordsDetector.java     |  4 ++-
 .../detectors/NCSimpleWordsDetector.java           |  4 ++-
 .../nlpcraft/model/nlp/NCNlpWordsDetector.java     |  8 +++--
 .../src/test/java/org/apache/nlpcraft/NCSpec.java  | 36 +++++-----------------
 7 files changed, 36 insertions(+), 38 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
index 25f437c..094a7f0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java
@@ -691,15 +691,13 @@ public interface NCModelConfig {
     default int getConversationDepth() { return DFLT_CONV_DEPTH; }
 
     /**
-     * // TODO: add javadoc
+     * // TODO: add javadoc or drop it.
      * @return TBD
      */
     default boolean isStopWordsAllowed() {
         return DFLT_IS_STOPWORDS_ALLOWED;
     }
 
-    // TODO: dropped. getSuspiciousWords,
-    // TODO: dropped - getAdditionalStopWords, getExcludedStopWords - just 
override if necessary getStopWordsDetector.
     default NCNlpWordsDetector getStopWordsDetector() {
         return new NCDefaultStopWordsDetector();
     }
@@ -710,6 +708,8 @@ public interface NCModelConfig {
         return null;
     }
 
+    // TODO: dropped. getSuspiciousWord (use your own or 
org.apache.nlpcraft.model.components.detectors.NCSimpleWordsDetector)
+    // TODO: dropped - getAdditionalStopWords, getExcludedStopWords (liik at 
API NCDefaultStopWordsDetector)
 
 //    /**
 //     * Gets an optional list of suspicious words. A suspicious word is a 
word that generally should not appear in user
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
index 6d083a4..e27b486 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelConfigBuilder.java
@@ -79,6 +79,7 @@ public class NCModelConfigBuilder {
 
     // 2. Specail words detectors. Free implementation supported.
     // Default used - 
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector.
+    // (it supports extra and excluded words set)
     public NCModelConfigBuilder withStopWordsDetector(NCNlpWordsDetector 
stopWordsDetector) {
         return this;
     }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
index b81fd1f..d410d65 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultStopWordsDetector.java
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.model.components.detectors;
 
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
 import org.apache.nlpcraft.model.nlp.NCNlpWord;
 import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
 
@@ -27,8 +29,19 @@ import java.util.List;
  * Stopwords detector default implementation.
  */
 public class NCDefaultStopWordsDetector implements NCNlpWordsDetector {
+    private Set<String> additional;
+    private Set<String> excluded;
+
+    public NCDefaultStopWordsDetector() {
+    }
+
+    public NCDefaultStopWordsDetector(Set<String> additional, Set<String> 
excluded) {
+        this.additional = additional;
+        this.excluded = excluded;
+    }
+
     @Override
-    public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+    public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg, 
List<NCNlpWord> sen) {
         return Collections.emptyList();
     }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
index a51c8f1..ecb5af8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCDefaultSwearWordsDetector.java
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.model.components.detectors;
 
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
 import org.apache.nlpcraft.model.nlp.NCNlpWord;
 import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
 
@@ -28,7 +30,7 @@ import java.util.List;
  */
 public class NCDefaultSwearWordsDetector implements NCNlpWordsDetector {
     @Override
-    public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+    public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg, 
List<NCNlpWord> sen) {
         return Collections.emptyList();
     }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
index 712bdb6..427a10b 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/components/detectors/NCSimpleWordsDetector.java
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.model.components.detectors;
 
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
 import org.apache.nlpcraft.model.nlp.NCNlpWord;
 import org.apache.nlpcraft.model.nlp.NCNlpWordsDetector;
 
@@ -36,7 +38,7 @@ public class NCSimpleWordsDetector implements 
NCNlpWordsDetector {
     }
 
     @Override
-    public List<NCNlpWord> detect(List<NCNlpWord> sen) {
+    public List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg, 
List<NCNlpWord> sen) {
         // TODO: stems, normal form.
         return sen.stream().filter(p -> 
words.contains(p.getWord())).collect(Collectors.toList());
     }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
index bd79078..106b956 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWordsDetector.java
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.model.nlp;
 
+import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
 import org.apache.nlpcraft.model.nlp.NCNlpWord;
 
 import java.util.List;
@@ -28,8 +30,6 @@ import java.util.List;
  *  - 
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector,
  *  - 
org.apache.nlpcraft.model.components.detectors.NCDefaultSwearWordsDetector
  *
- * If you want to extend logic by additionally and excluded stop words - it 
can be impemented by overriding default solution.
- *
  * Custom and language related solutions should be implemented and set in 
model configuration.
  *
  */
@@ -37,8 +37,10 @@ public interface NCNlpWordsDetector {
     /**
      * Finds words by some criteria in given words list.
      *
+     * @param req
+     * @param cfg
      * @param sen
      * @return
      */
-    List<NCNlpWord> detect(List<NCNlpWord> sen);
+    List<NCNlpWord> detect(NCRequest req, NCModelConfig cfg, List<NCNlpWord> 
sen);
 }
diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java 
b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
index 880e466..e14a149 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java
@@ -22,6 +22,7 @@ import org.apache.nlpcraft.model.NCModel;
 import org.apache.nlpcraft.model.NCModelBehaviour;
 import org.apache.nlpcraft.model.NCRejection;
 import org.apache.nlpcraft.model.NCModelConfig;
+import org.apache.nlpcraft.model.NCRequest;
 import org.apache.nlpcraft.model.NCResult;
 import org.apache.nlpcraft.model.builders.NCModelConfigBuilder;
 import 
org.apache.nlpcraft.model.components.detectors.NCDefaultStopWordsDetector;
@@ -61,34 +62,6 @@ public class NCSpec {
         }
     }
 
-    private static class CustomStopWordsDetector implements NCNlpWordsDetector 
{
-        private final static Set<String> ADDITIONAL = new HashSet<>() {
-            {
-                add("stop1");
-                add("stop2");
-            }
-        };
-
-        private final static NCNlpWordsDetector STANDARD = new 
NCDefaultStopWordsDetector();
-
-        @Override
-        public List<NCNlpWord> detect(List<NCNlpWord> sen) {
-            return
-                Stream.concat(
-                        STANDARD.detect(sen).stream(),
-                        sen.stream().filter(p -> 
ADDITIONAL.contains(p.getWord()))
-                    ).
-                    distinct().
-                    sorted(
-                        new Comparator<NCNlpWord>() {
-                            @Override
-                            public int compare(NCNlpWord w1, NCNlpWord w2) {
-                                return Integer.compare(w1.getStart(), 
w2.getStart());
-                            }
-                        }
-                    ).collect(Collectors.toList());
-        }
-    }
 
     @Test
     public void test() throws Exception {
@@ -126,7 +99,12 @@ public class NCSpec {
                 withId("modleId").
                 withName("name").
                 withSuspiciousWordsDetector(new 
NCSimpleWordsDetector(Collections.singleton("bad"))).
-                withStopWordsDetector(new CustomStopWordsDetector()).
+                withStopWordsDetector(
+                    new NCDefaultStopWordsDetector(
+                        new HashSet<>() {{ add("stop1"); add("stop2"); }}
+                        null
+                    )
+                ).
                 // Nlp tokenizer.
                 withTokenizer(new NCOpenNlpTokenizer()).
                 // NERs.

Reply via email to