[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

sergeykamov Sun, 02 Jan 2022 05:37:19 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new 1579b65  WIP.
1579b65 is described below

commit 1579b6540d28b2dac68c2f9327c52df22048cd20
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Jan 2 16:36:25 2022 +0300

    WIP.
---
 .../scala/org/apache/nlpcraft/NCModelClient.java   |   7 +-
 .../scala/org/apache/nlpcraft/NCModelConfig.java   |  10 +-
 .../org/apache/nlpcraft/NCModelConfigAdapter.java  |  38 +---
 .../main/scala/org/apache/nlpcraft/NCRequest.java  |   6 -
 .../main/scala/org/apache/nlpcraft/NCToken.java    |  16 +-
 .../scala/org/apache/nlpcraft/NCTokenParser.java   |  29 ++-
 .../scala/org/apache/nlpcraft/NCTokenizer.java     |  33 ----
 ...ariantsFilter.java => NCVariantsValidator.java} |   2 +-
 .../src/main/scala/org/apache/nlpcraft/NCWord.java |  53 ------
 .../apache/nlpcraft/internal/util/NCUtils.scala    |  68 +------
 .../parser/opennlp/NCOpenNlpEntityParser.java      |   6 +-
 .../parser/semantic/NCSemanticEntityParser.java    |  20 +-
 .../parser/semantic/NCSemanticTextStemmer.java     |  30 ---
 .../semantic/en/NCEnSemanticEntityParser.java      |  38 ----
 .../semantic/impl/NCSemanticEntityParserImpl.scala |  31 ++--
 .../impl/NCSemanticSynonymsProcessor.scala         |  14 +-
 .../semantic/impl/en/NCEnSemanticTextStemmer.java  |  30 ---
 .../enricher/en/NCEnBracketsTokenEnricher.java     |   5 +-
 .../enricher/en/NCEnDictionaryTokenEnricher.java   |   5 +-
 .../enricher/en/NCEnLanguageTokenEnricher.java     |   5 +-
 .../token/enricher/en/NCEnQuotesTokenEnricher.java |   5 +-
 ...richer.java => NCEnStopWordsTokenEnricher.java} |  23 ++-
 .../enricher/en/NCEnSwearWordsTokenEnricher.java   |   5 +-
 .../token/enricher/en/impl/NCEnBracketsImpl.scala  |   3 +-
 .../en/impl/NCEnStopWordGenerator.scala            |   3 +-
 .../en/impl/NCEnStopWordsImpl.scala}               | 202 ++++++++++++++-------
 .../enricher/en/impl/NCEnSwearWordsImpl.scala      |   4 +-
 .../parser/opennlp/en/NCEnOpenNlpTokenParser.java  |  52 ++----
 .../parser/opennlp/en/impl/NCEnOpenNlpImpl.scala   | 142 ++++++---------
 .../nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java  |  64 -------
 .../opennlp/impl/NCOpenNlpTokenizerImpl.scala      |  45 -----
 .../nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java |   4 +-
 .../opennlp/NCEnOpenNlpTokenParserBenchmark.java   |   3 +-
 .../parser/opennlp/NCOpenNlpEntityParserSpec.scala |   5 +-
 .../semantic/NCSemanticEntityParserJsonSpec.scala  |  12 +-
 .../semantic/NCSemanticEntityParserSpec.scala      |  14 +-
 .../semantic/NCSemanticEntityParserYamlSpec.scala  |  12 +-
 .../en/NCEnBracketsTokenEnricherSpec.scala         |   5 +-
 .../en/NCEnDictionaryTokenEnricherSpec.scala       |   3 +-
 .../enricher/en/NCEnQuotesTokenEnricherSpec.scala  |   5 +-
 .../opennlp/en/NCEnOpenNlpTokenParserSpec.scala    |  51 ++++--
 .../apache/nlpcraft/nlp/util/NCTestConfig.scala    |  54 +++---
 .../apache/nlpcraft/nlp/util/NCTestRequest.scala   |   1 -
 .../org/apache/nlpcraft/nlp/util/NCTestToken.scala |   9 +-
 .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala |  32 +++-
 45 files changed, 447 insertions(+), 757 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
index a5482ff..3fe9fdb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
@@ -74,12 +74,13 @@ public class NCModelClient implements NCLifecycle {
     public void start(NCModelConfig cfg) {
         verify();
 
-        cfg.getTokenizer().start(cfg);
+        cfg.getTokenParser().start(cfg);
 
         ExecutorService s = getExecutorService();
 
+        // TODO: start and stop validators.
+
         try {
-            start(s, cfg.getTokenParsers(), cfg);
             start(s, cfg.getEntityParsers(), cfg);
             start(s, cfg.getEntityEnrichers(), cfg);
             start(s, cfg.getTokenEnrichers(), cfg);
@@ -104,7 +105,7 @@ public class NCModelClient implements NCLifecycle {
             stopExecutorService(s);
         }
 
-        cfg.getTokenizer().stop();
+        cfg.getTokenParser().stop();
     }
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index 78aa42e..97653a2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -27,13 +27,7 @@ public interface NCModelConfig extends NCPropertyMap {
      *
      * @return
      */
-    NCTokenizer getTokenizer();
-
-    /**
-     *
-     * @return
-     */
-    List<NCTokenParser> getTokenParsers();
+    NCTokenParser getTokenParser();
 
     /**
      *
@@ -69,7 +63,7 @@ public interface NCModelConfig extends NCPropertyMap {
      *
      * @return
      */
-    List<NCVariantsFilter> getVariantsFilters();
+    List<NCVariantsValidator> getVariantValidators();
 
 
     /**
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
index 1ffb978..5db1716 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
@@ -23,19 +23,18 @@ import java.util.*;
  *
  */
 // TODO: validation for constructor and all setters.
+    // TODO: do builder instaed of it.
 public class NCModelConfigAdapter extends NCPropertyMapAdapter implements 
NCModelConfig {
     private final String id;
     private final String name;
     private final String version;
-    private final NCTokenizer tokenizer;
-    private final List<NCTokenParser> tokParsers = new ArrayList<>();
+    private final NCTokenParser tokParser;
     private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
     private final List<NCEntityEnricher> entEnrichers = new ArrayList<>();
     private final List<NCEntityParser> entParsers = new ArrayList<>();
     private final List<NCTokenValidator> tokenValidators = new ArrayList<>();
     private final List<NCEntityValidator> entityValidators = new ArrayList<>();
-    private final List<NCVariantsFilter> variantsFilters = new ArrayList<>();
-
+    private final List<NCVariantsValidator> variantsFilters = new 
ArrayList<>();
 
     /**
      *
@@ -44,35 +43,23 @@ public class NCModelConfigAdapter extends 
NCPropertyMapAdapter implements NCMode
      * @param version
      * @param tokParser
      */
-    public NCModelConfigAdapter(String id, String name, String version, 
NCTokenizer tokenizer, NCTokenParser tokParser, NCEntityParser entParser) {
+    public NCModelConfigAdapter(String id, String name, String version, 
NCTokenParser tokParser, NCEntityParser entParser) {
         Objects.requireNonNull(id, "ID cannot be null.");
         Objects.requireNonNull(name, "Name cannot be null.");
         Objects.requireNonNull(version, "Version cannot be null.");
-        Objects.requireNonNull(tokenizer, "Tokenizer cannot be null.");
         Objects.requireNonNull(tokParser, "Token parser cannot be null.");
         Objects.requireNonNull(entParser, "Entity parser cannot be null.");
 
         this.id = id;
         this.name = name;
         this.version = version;
-        this.tokenizer = tokenizer;
-
-        tokParsers.add(tokParser);
+        this.tokParser = tokParser;
+        
         entParsers.add(entParser);
     }
 
     /**
      *
-     * @param tokParser
-     */
-    public void addTokenParser(NCTokenParser tokParser) {
-        Objects.requireNonNull(tokParser, "Token parser cannot be null.");
-
-        tokParsers.add(tokParser);
-    }
-
-    /**
-     *
      * @param entParser
      */
     public void addEntityParser(NCEntityParser entParser) {
@@ -125,7 +112,7 @@ public class NCModelConfigAdapter extends 
NCPropertyMapAdapter implements NCMode
      *
      * @param variantFilter
      */
-    public void addVariantFilter(NCVariantsFilter variantFilter) {
+    public void addVariantFilter(NCVariantsValidator variantFilter) {
         Objects.requireNonNull(variantFilter, "Variant filter cannot be 
null.");
 
         variantsFilters.add(variantFilter);
@@ -157,8 +144,8 @@ public class NCModelConfigAdapter extends 
NCPropertyMapAdapter implements NCMode
     }
 
     @Override
-    public List<NCTokenParser> getTokenParsers() {
-        return tokParsers;
+    public NCTokenParser getTokenParser() {
+        return tokParser;
     }
 
     @Override
@@ -167,11 +154,6 @@ public class NCModelConfigAdapter extends 
NCPropertyMapAdapter implements NCMode
     }
 
     @Override
-    public NCTokenizer getTokenizer() {
-        return tokenizer;
-    }
-
-    @Override
     public List<NCTokenValidator> getTokenValidators() {
         return tokenValidators;
     }
@@ -182,7 +164,7 @@ public class NCModelConfigAdapter extends 
NCPropertyMapAdapter implements NCMode
     }
 
     @Override
-    public List<NCVariantsFilter> getVariantsFilters() {
+    public List<NCVariantsValidator> getVariantValidators() {
         return variantsFilters;
     }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
index 72faad8..468f8ac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
@@ -52,12 +52,6 @@ public interface NCRequest {
     String getText();
 
     /**
-     *
-     * @return
-     */
-    List<NCWord> getWords();
-
-    /**
      * Gets UTC/GMT timestamp in millis when user input was received.
      *
      * @return UTC/GMT timestamp in ms when user input was received.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
index 4eeacc5..23a6205 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
@@ -20,28 +20,34 @@ package org.apache.nlpcraft;
 /**
  *
  */
-public interface NCToken extends NCWord, NCPropertyMap {
+public interface NCToken extends NCPropertyMap {
     /**
      *
      * @return
      */
-    String getLemma();
+    String getText();
 
     /**
      *
      * @return
      */
-    String getStem();
+    int getIndex();
 
     /**
      *
      * @return
      */
-    String getPos();
+    String getLemma();
 
     /**
      *
      * @return
      */
-    boolean isStopWord();
+    String getStem();
+
+    /**
+     *
+     * @return
+     */
+    String getPos();
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
index 669df6e..91aa783 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenParser.java
@@ -25,11 +25,30 @@ import java.util.List;
 public interface NCTokenParser extends NCLifecycle {
     /**
      *
-     * @param req
-     * @param cfg
+     * @param text
      * @return
-     * @throws
-     * @throws NCException
      */
-    List<NCToken> parse(NCRequest req, NCModelConfig cfg);
+    List<String> tokenize(String text);
+
+    /**
+     *
+     * @param s
+     * @return
+     */
+    String getStem(String s);
+
+    /**
+     *
+     * @param toks
+     * @return
+     */
+    List<String> getPoses(List<String> toks);
+
+    /**
+     *
+     * @param toks
+     * @param poses
+     * @return
+     */
+    List<String> getLemmas(List<String> toks, List<String> poses);
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenizer.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenizer.java
deleted file mode 100644
index 1ee784d..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenizer.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft;
-
-import java.util.List;
-
-/**
- *
- */
-public interface NCTokenizer extends NCLifecycle {
-    /**
-     * 
-     * @param cfg
-     * @param txt
-     * @return
-     */
-    List<NCWord> tokenize(NCModelConfig cfg, String txt);
-}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsValidator.java
similarity index 94%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsValidator.java
index 09ec82c..8829e2f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsValidator.java
@@ -22,7 +22,7 @@ import java.util.List;
 /**
  *
  */
-public interface NCVariantsFilter extends NCLifecycle {
+public interface NCVariantsValidator extends NCLifecycle {
     /**
      * Filters all found entities variants.
      *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCWord.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCWord.java
deleted file mode 100644
index a13840a..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCWord.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft;
-
-/**
- *
- */
-public interface NCWord  {
-    /**
-     *
-     * @return
-     */
-    String getText();
-
-    /**
-     *
-     * @return
-     */
-    int getStartCharIndex();
-
-    /**
-     *
-     * @return
-     */
-    int getEndCharIndex();
-
-    /**
-     *
-     * @return
-     */
-    int getLength();
-
-    /**
-     *
-     * @return
-     */
-    int getIndex();
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index dcf24cb..ffaaa77 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -19,23 +19,21 @@ package org.apache.nlpcraft.internal.util
 
 import com.google.gson.GsonBuilder
 import com.typesafe.scalalogging.*
-import org.apache.nlpcraft.NCToken
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.ansi.NCAnsi.*
 
 import java.io.*
 import java.net.*
-import java.util.{Random, UUID}
 import java.util.regex.Pattern
 import java.util.zip.*
+import java.util.{Random, UUID}
 import scala.annotation.tailrec
 import scala.collection.{IndexedSeq, Seq}
-import scala.concurrent.duration.Duration
 import scala.concurrent.*
-import scala.io.Source
+import scala.concurrent.duration.Duration
+import scala.io.*
 import scala.sys.SystemProperties
 import scala.util.Using
-import scala.io.BufferedSource
 
 /**
   * 
@@ -921,63 +919,3 @@ object NCUtils extends LazyLogging:
       */
     def genUUID(): UUID = UUID.randomUUID()
 
-    /**
-      * Gets all sequential permutations of tokens in this NLP sentence.
-      *
-      * For example, if NLP sentence contains "a, b, c, d" tokens, then
-      * this function will return the sequence of following token sequences in 
this order:
-      * "a b c d"
-      * "a b c"
-      * "b c d"
-      * "a b"
-      * "b c"
-      * "c d"
-      * "a"
-      * "b"
-      * "c"
-      * "d"
-      *
-      * NOTE: this method will not return any permutations with a quoted token.
-      *
-      * @param tokens Tokens.
-      * @param stopWords Whether or not include tokens marked as stop words.
-      * @param maxLen Maximum number of tokens in the sequence.
-      */
-    def tokenMix(tokens: Seq[NCToken], stopWords: Boolean = false, maxLen: Int 
= Integer.MAX_VALUE): Seq[Seq[NCToken]] =
-        val toks = tokens.filter(t => stopWords || (!stopWords && 
!t.isStopWord))
-
-        (for (n <- toks.length until 0 by -1 if n <= maxLen) yield 
toks.sliding(n)).flatten
-
-    /**
-      * Gets all sequential permutations of tokens in this NLP sentence.
-      * This method is like a 'tokenMix', but with all combinations of 
stop-words (with and without)
-      *
-      * @param tokens Tokens.
-      * @param maxLen Maximum number of tokens in the sequence.
-      */
-    def tokenMixWithStopWords(tokens: Seq[NCToken], maxLen: Int = 
Integer.MAX_VALUE): Seq[Seq[NCToken]] =
-        /**
-          * Gets all combinations for sequence of mandatory tokens with 
stop-words and without.
-          *
-          * Example:
-          * 'A (stop), B, C(stop) -> [A, B, C]; [A, B]; [B, C], [B]
-          * 'A, B(stop), C(stop) -> [A, B, C]; [A, B]; [A, C], [A].
-          *
-          * @param toks Tokens.
-          */
-        def permutations(toks: Seq[NCToken]): Seq[Seq[NCToken]] = 
-            def multiple(seq: Seq[Seq[Option[NCToken]]], t: NCToken): 
Seq[Seq[Option[NCToken]]] =
-                if seq.isEmpty then
-                    if t.isStopWord then IndexedSeq(IndexedSeq(Some(t)), 
IndexedSeq(None)) else IndexedSeq(IndexedSeq(Some(t)))
-                else
-                    (for (subSeq <- seq) yield subSeq :+ Some(t)) ++ (if 
t.isStopWord then for (subSeq <- seq) yield subSeq :+ None else Seq.empty)
-
-            var res: Seq[Seq[Option[NCToken]]] = Seq.empty
-            for (t <- toks) res = multiple(res, t)
-            res.map(_.flatten).filter(_.nonEmpty)
-
-        tokenMix(tokens, stopWords = true, maxLen).
-            flatMap(permutations).
-            filter(_.nonEmpty).
-            distinct.
-            sortBy(seq => (-seq.length, seq.head.getStartCharIndex))
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
index 1ea0930..f92f8ba 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
@@ -17,7 +17,11 @@
 
 package org.apache.nlpcraft.nlp.entity.parser.opennlp;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCEntity;
+import org.apache.nlpcraft.NCEntityParser;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
 import 
org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
index 9c09111..c28f03e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
@@ -24,7 +24,10 @@ import org.apache.nlpcraft.NCRequest;
 import org.apache.nlpcraft.NCToken;
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParserImpl;
 
-import java.util.*;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
 
 /**
  *
@@ -37,11 +40,10 @@ public class NCSemanticEntityParser implements 
NCEntityParser {
      * @param stemmer
      * @param elems
      */
-    public NCSemanticEntityParser(NCSemanticTextStemmer stemmer, 
List<NCSemanticElement> elems) {
-        Objects.requireNonNull(stemmer, "Stemmer cannot be null");
+    public NCSemanticEntityParser(List<NCSemanticElement> elems) {
         Objects.requireNonNull(elems, "Elements cannot be null");
 
-        impl = NCSemanticEntityParserImpl.apply(stemmer, 
Collections.emptyMap(), elems);
+        impl = NCSemanticEntityParserImpl.apply(Collections.emptyMap(), elems);
     }
 
     /**
@@ -50,11 +52,10 @@ public class NCSemanticEntityParser implements 
NCEntityParser {
      * @param macros
      * @param elems
      */
-    public NCSemanticEntityParser(NCSemanticTextStemmer stemmer, Map<String, 
String> macros, List<NCSemanticElement> elems) {
-        Objects.requireNonNull(stemmer, "Stemmer cannot be null");
+    public NCSemanticEntityParser(Map<String, String> macros, 
List<NCSemanticElement> elems) {
         Objects.requireNonNull(elems, "Elements cannot be null");
 
-        impl = NCSemanticEntityParserImpl.apply(stemmer, macros, elems);
+        impl = NCSemanticEntityParserImpl.apply(macros, elems);
     }
 
     /**
@@ -62,11 +63,10 @@ public class NCSemanticEntityParser implements 
NCEntityParser {
      * @param stemmer
      * @param mdlSrc
      */
-    public NCSemanticEntityParser(NCSemanticTextStemmer stemmer, String 
mdlSrc) {
-        Objects.requireNonNull(stemmer, "Stemmer cannot be null");
+    public NCSemanticEntityParser(String mdlSrc) {
         Objects.requireNonNull(mdlSrc, "Source cannot be null");
 
-        impl = NCSemanticEntityParserImpl.apply(stemmer, mdlSrc);
+        impl = NCSemanticEntityParserImpl.apply(mdlSrc);
     }
 
     @Override
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTextStemmer.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTextStemmer.java
deleted file mode 100644
index 5ef08d3..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticTextStemmer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp.entity.parser.semantic;
-
-/**
- *
- */
-public interface NCSemanticTextStemmer {
-    /**
-     *
-     * @param text
-     * @return
-     */
-    String stem(String text);
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
deleted file mode 100644
index ae21ccb..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package org.apache.nlpcraft.nlp.entity.parser.semantic.en;
-
-import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticElement;
-import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser;
-import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticTextStemmer;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * TODO: Do we need it?
- */
-public class NCEnSemanticEntityParser extends NCSemanticEntityParser {
-    /**
-     *
-     * @param elems
-     */
-    public NCEnSemanticEntityParser(List<NCSemanticElement> elems) {
-        super(new NCEnSemanticTextStemmer(), elems);
-    }
-
-    /**
-     *
-     * @param macros
-     * @param elems
-     */
-    public NCEnSemanticEntityParser(Map<String, String> macros, 
List<NCSemanticElement> elems) {
-        super(new NCEnSemanticTextStemmer(), macros, elems);
-    }
-
-    /**
-     * 
-     * @param mdlSrc
-     */
-    public NCEnSemanticEntityParser(String mdlSrc) {
-        super(new NCEnSemanticTextStemmer(), mdlSrc);
-    }
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 1a3dfbe..8680193 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -22,8 +22,8 @@ import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.makro.NCMacroParser
 import org.apache.nlpcraft.internal.util.NCUtils
 import org.apache.nlpcraft.nlp.entity.parser.semantic.*
-import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticSourceType.*
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.*
+import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticSourceType.*
 
 import java.io.*
 import java.util.regex.*
@@ -32,19 +32,17 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters.*
 
 object NCSemanticEntityParserImpl:
-    def apply(stemmer: NCSemanticTextStemmer, macros: Jmap[String, String], 
elems: JList[NCSemanticElement]): NCSemanticEntityParserImpl =
-        require(stemmer != null)
+    def apply(macros: Jmap[String, String], elems: JList[NCSemanticElement]): 
NCSemanticEntityParserImpl =
         require(elems != null)
 
         new NCSemanticEntityParserImpl(
-            stemmer, macros = if macros == null then null else 
macros.asScala.toMap, elements = elems.asScala.toSeq
+            macros = if macros == null then null else macros.asScala.toMap, 
elements = elems.asScala.toSeq
         )
 
-    def apply(stemmer: NCSemanticTextStemmer, mdlSrc: String): 
NCSemanticEntityParserImpl =
-        require(stemmer != null)
+    def apply(mdlSrc: String): NCSemanticEntityParserImpl =
         require(mdlSrc != null)
 
-        new NCSemanticEntityParserImpl(stemmer, mdlSrc = mdlSrc, typ = 
NCSemanticSourceType(mdlSrc))
+        new NCSemanticEntityParserImpl(mdlSrc = mdlSrc, scrType = 
NCSemanticSourceType(mdlSrc))
 
     /**
       * @param baseTokens Tokens.
@@ -52,6 +50,9 @@ object NCSemanticEntityParserImpl:
       */
     private case class Piece(baseTokens: Seq[NCToken], variants: 
Seq[Seq[NCToken]])
 
+    // TODO: error?
+    private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
+
     /**
       *
       * 1. Prepares combination of tokens (sliding).
@@ -68,7 +69,7 @@ object NCSemanticEntityParserImpl:
       */
     private def getPieces(toks: Seq[NCToken]): Seq[Piece] =
         (for (n <- toks.size until 0 by -1) yield 
toks.sliding(n)).flatten.map(p => p).map(combo => {
-            val stops = combo.filter(s => s.isStopWord && s != combo.head && s 
!= combo.last)
+            val stops = combo.filter(s => isStopWord(s) && s != combo.head && 
s != combo.last)
             val slides = 
mutable.ArrayBuffer.empty[mutable.ArrayBuffer[NCToken]]
 
             for (stop <- stops)
@@ -111,31 +112,31 @@ import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParse
 
 /**
   *
-  * @param stemmer
   * @param macros
   * @param elements
   */
 class NCSemanticEntityParserImpl(
-    stemmer: NCSemanticTextStemmer,
     macros: Map[String, String] = null,
     elements: Seq[NCSemanticElement] = null,
     mdlSrc: String = null,
-    typ: NCSemanticSourceType = null
+    scrType: NCSemanticSourceType = null
 ) extends NCEntityParser with LazyLogging:
-    require(stemmer != null)
-    require(macros != null && elements != null || mdlSrc != null && typ != 
null)
+    require(macros != null && elements != null || mdlSrc != null && scrType != 
null)
 
     @volatile private var h: NCSemanticSynonymsHolder = _
 
     override def start(cfg: NCModelConfig): Unit =
         val (macros, elements) =
             if mdlSrc != null then
-                val src = NCSemanticDataReader.read(new 
BufferedInputStream(NCUtils.getStream(mdlSrc)), typ)
+                val src = NCSemanticDataReader.read(new 
BufferedInputStream(NCUtils.getStream(mdlSrc)), scrType)
+
+                logger.trace(s"Loaded resource: $mdlSrc")
+
                 (src.macros, src.elements)
             else
                 (this.macros, this.elements)
 
-        h = NCSemanticSynonymsProcessor.prepare(cfg, stemmer, macros, elements)
+        h = NCSemanticSynonymsProcessor.prepare(cfg, macros, elements)
 
     override def stop(): Unit = h = null
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 4a49dae..16e3a46 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -19,11 +19,11 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic.impl
 import com.fasterxml.jackson.databind.*
 import com.fasterxml.jackson.dataformat.yaml.*
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.makro.NCMacroParser
 import org.apache.nlpcraft.nlp.entity.parser.semantic.*
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.*
-import com.typesafe.scalalogging.LazyLogging
 
 import java.io.InputStream
 import java.util
@@ -137,7 +137,7 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
                     checkSynonyms(v.getSynonyms, elemId, Some(name))
 
     private def startsAndEnds(fix: String, s: String): Boolean = 
s.startsWith(fix) && s.endsWith(fix)
-    private def mkChunk(stemmer: NCSemanticTextStemmer, chunk: String): 
NCSemanticSynonymChunk =
+    private def mkChunk(p: NCTokenParser, chunk: String): 
NCSemanticSynonymChunk =
         def stripSuffix(fix: String, s: String): String = s.slice(fix.length, 
s.length - fix.length)
 
         // Regex synonym.
@@ -152,23 +152,21 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
             else
                 throw new NCException(s"Empty regex synonym detected 
[chunk=$chunk]")
         else
-            NCSemanticSynonymChunk(kind = TEXT, text = chunk, stem = 
stemmer.stem(chunk))
+            NCSemanticSynonymChunk(kind = TEXT, text = chunk, stem = 
p.getStem(chunk))
 
     /**
       *
       * @param cfg
-      * @param stemmer
       * @param macros
       * @param elements
       * @throws NCException
       */
     def prepare(
         cfg: NCModelConfig,
-        stemmer: NCSemanticTextStemmer,
         macros: Map[String, String],
         elements: Seq[NCSemanticElement]
     ): NCSemanticSynonymsHolder =
-        require(cfg != null && stemmer != null)
+        require(cfg != null)
 
         checkElements(elements)
         checkMacros(macros, elements)
@@ -188,8 +186,8 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
             def add(syns: Seq[NCSemanticSynonym]): Unit = buf ++= 
syns.map(Holder(_, elemId))
             def convert(syns: JList[String]): Seq[Seq[NCSemanticSynonymChunk]] 
=
                 syns.asScala.flatMap(p.expand).
-                    map(t => cfg.getTokenizer.tokenize(cfg, t).asScala.map(w 
=> mkChunk(stemmer, w.getText)).toSeq).toSeq
-            def mkSpecChunk(id: String): NCSemanticSynonymChunk = 
NCSemanticSynonymChunk(TEXT, id, stemmer.stem(id))
+                    map(t => cfg.getTokenParser.tokenize(t).asScala.map(w => 
mkChunk(cfg.getTokenParser, w)).toSeq).toSeq
+            def mkSpecChunk(id: String): NCSemanticSynonymChunk = 
NCSemanticSynonymChunk(TEXT, id, cfg.getTokenParser.getStem(id))
 
             // TODO:
             add(Seq(NCSemanticSynonym(Seq(mkSpecChunk(elemId)), isElementId = 
true)))
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticTextStemmer.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticTextStemmer.java
deleted file mode 100644
index 4571053..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticTextStemmer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en;
-
-import opennlp.tools.stemmer.PorterStemmer;
-import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticTextStemmer;
-
-public class NCEnSemanticTextStemmer implements NCSemanticTextStemmer {
-    private final PorterStemmer s = new PorterStemmer();
-    
-    @Override
-    public synchronized String stem(String text) {
-        return s.stem(text);
-    }
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
index 219018f..14ee3a2 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
@@ -17,7 +17,10 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnBracketsImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
index f54d4e1..8c3275f 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
@@ -17,7 +17,10 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnDictionaryImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
index b52350c..9ecbd90 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
@@ -17,7 +17,10 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnLanguageWordsImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
index fe8516f..c38f29e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
@@ -17,7 +17,10 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnQuotesImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
similarity index 71%
copy from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
copy to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
index aca1786..e431a6a 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
@@ -17,27 +17,26 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnSwearWordsImpl;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnStopWordsImpl;
 
 import java.util.List;
-import java.util.Objects;
+import java.util.Set;
 
 /**
  * TODO: enriches with <code>dict:en</code> property.
  */
-public class NCEnSwearWordsTokenEnricher implements NCTokenEnricher {
-    private final NCEnSwearWordsImpl impl;
+public class NCEnStopWordsTokenEnricher implements NCTokenEnricher {
+    private final NCEnStopWordsImpl impl;
 
     /**
-     * TODO: swear_words.txt
-     * 
-     * @param mdlSrc
+     *
      */
-    public NCEnSwearWordsTokenEnricher(String mdlSrc) {
-        Objects.requireNonNull(mdlSrc, "Swear words model file cannot be 
null.");
-
-        impl = new NCEnSwearWordsImpl(mdlSrc);
+    public NCEnStopWordsTokenEnricher(Set<String> addStems, Set<String> 
exclStems) {
+        impl = new NCEnStopWordsImpl(addStems, exclStems);
     }
 
     @Override
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
index aca1786..2de4d0b 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
@@ -17,7 +17,10 @@
 
 package org.apache.nlpcraft.nlp.token.enricher.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnSwearWordsImpl;
 
 import java.util.List;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
index 6b34033..80f6e62 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
@@ -46,5 +46,4 @@ class NCEnBracketsImpl extends NCTokenEnricher with 
LazyLogging:
                 case _ => mark(t)
 
         if ok && stack.isEmpty then map.foreach { (tok, b) => 
tok.put("brackets:en", b) }
-        else
-            logger.trace(s"Invalid brackets: ${req.getText}")
\ No newline at end of file
+        else logger.trace(s"Invalid brackets: ${req.getText}")
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordGenerator.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
similarity index 99%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordGenerator.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
index 71cdd28..4b3ac8a 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordGenerator.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
@@ -1,4 +1,4 @@
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl
+package org.apache.nlpcraft.nlp.token.enricher.en.impl
 
 import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.internal.util.NCUtils
@@ -9,6 +9,7 @@ import scala.collection.mutable
   * Generates first word sequences.
   */
 object NCEnStopWordGenerator:
+    // TODO: ?
     private final lazy val stemmer = new PorterStemmer
 
     // Output files.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordsFinder.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
similarity index 77%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordsFinder.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
index e7806c2..248fac8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnStopWordsFinder.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
@@ -15,24 +15,21 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl
+package org.apache.nlpcraft.nlp.token.enricher.en.impl
 
 import com.typesafe.scalalogging.LazyLogging
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 
+import java.io.*
 import java.util
 import java.util.{List as JList, Set as JSet}
 import scala.annotation.tailrec
-import scala.collection.{Seq, mutable}
+import scala.collection.{IndexedSeq, Seq, mutable}
 import scala.concurrent.ExecutionContext
-import scala.jdk.CollectionConverters.SetHasAsScala
+import scala.jdk.CollectionConverters.*
 
-/**
-  *
-  */
-private[impl] object NCEnStopWordsFinder:
+object NCEnStopWordsImpl:
     // Condition types.
     type Wildcard = (String, String)
     type Word = String
@@ -102,7 +99,7 @@ private[impl] object NCEnStopWordsFinder:
             posOpt match
                 case Some(pos) =>
                     !excludes.getOrElse(pos, Set.empty).contains(s) &&
-                    (any.contains(s) || includes.getOrElse(pos, 
Set.empty).contains(s))
+                        (any.contains(s) || includes.getOrElse(pos, 
Set.empty).contains(s))
                 case _ => any.contains(s)
 
     /**
@@ -168,11 +165,73 @@ private[impl] object NCEnStopWordsFinder:
 
             // Hash access.
             stems.matches(toStemKey(toks), posOpt) ||
-            lemmas.matches(toLemmaKey(toks), posOpt) ||
-            origins.matches(toOriginalKey(toks), posOpt) ||
-            // Scan access.
-            wildcardsLemmas.matches(toLemmaKey(toks), posOpt) ||
-            wildcardsOrigins.matches(toOriginalKey(toks), posOpt)
+                lemmas.matches(toLemmaKey(toks), posOpt) ||
+                origins.matches(toOriginalKey(toks), posOpt) ||
+                // Scan access.
+                wildcardsLemmas.matches(toLemmaKey(toks), posOpt) ||
+                wildcardsOrigins.matches(toOriginalKey(toks), posOpt)
+
+    /**
+      * Gets all sequential permutations of tokens in this NLP sentence.
+      * This method is like a 'tokenMix', but with all combinations of 
stop-words (with and without)
+      *
+      * @param tokens Tokens.
+      * @param maxLen Maximum number of tokens in the sequence.
+      */
+    private def tokenMixWithStopWords(tokens: Seq[NCToken], maxLen: Int = 
Integer.MAX_VALUE): Seq[Seq[NCToken]] =
+        /**
+          * Gets all combinations for sequence of mandatory tokens with 
stop-words and without.
+          *
+          * Example:
+          * 'A (stop), B, C(stop) -> [A, B, C]; [A, B]; [B, C], [B]
+          * 'A, B(stop), C(stop) -> [A, B, C]; [A, B]; [A, C], [A].
+          *
+          * @param toks Tokens.
+          */
+        def permutations(toks: Seq[NCToken]): Seq[Seq[NCToken]] =
+            def multiple(seq: Seq[Seq[Option[NCToken]]], t: NCToken): 
Seq[Seq[Option[NCToken]]] =
+                if seq.isEmpty then
+                    if isStopWord(t) then IndexedSeq(IndexedSeq(Some(t)), 
IndexedSeq(None)) else IndexedSeq(IndexedSeq(Some(t)))
+                else
+                    (for (subSeq <- seq) yield subSeq :+ Some(t)) ++ (if 
isStopWord(t) then for (subSeq <- seq) yield subSeq :+ None else Seq.empty)
+
+            var res: Seq[Seq[Option[NCToken]]] = Seq.empty
+            for (t <- toks) res = multiple(res, t)
+            res.map(_.flatten).filter(_.nonEmpty)
+
+        tokenMix(tokens, stopWords = true, maxLen).
+            flatMap(permutations).
+            filter(_.nonEmpty).
+            distinct.
+            sortBy(seq => (-seq.length, seq.head.getIndex))
+
+    /**
+      * Gets all sequential permutations of tokens in this NLP sentence.
+      *
+      * For example, if NLP sentence contains "a, b, c, d" tokens, then
+      * this function will return the sequence of following token sequences in 
this order:
+      * "a b c d"
+      * "a b c"
+      * "b c d"
+      * "a b"
+      * "b c"
+      * "c d"
+      * "a"
+      * "b"
+      * "c"
+      * "d"
+      *
+      * NOTE: this method will not return any permutations with a quoted token.
+      *
+      * @param tokens Tokens.
+      * @param stopWords Whether or not include tokens marked as stop words.
+      * @param maxLen Maximum number of tokens in the sequence.
+      */
+    private def tokenMix(tokens: Seq[NCToken], stopWords: Boolean = false, 
maxLen: Int = Integer.MAX_VALUE): Seq[Seq[NCToken]] =
+        val toks = tokens.filter(t => stopWords || (!stopWords && 
!isStopWord(t)))
+
+        (for (n <- toks.length until 0 by -1 if n <= maxLen) yield 
toks.sliding(n)).flatten
+
 
     private def isQuote(t: NCToken): Boolean = Q_POS.contains(t.getPos)
     private def toStemKey(toks: Seq[NCToken]): String = 
toks.map(_.getStem).mkString(" ")
@@ -180,62 +239,72 @@ private[impl] object NCEnStopWordsFinder:
     private def toValueKey(toks: Seq[NCToken]): String = 
toks.map(_.getText.toLowerCase).mkString(" ")
     private def toOriginalKey(toks: Seq[NCToken]): String = 
toks.map(_.getText).mkString(" ")
 
-/**
-  *
-  * @param addStems
-  * @param exclStems
-  */
-private[impl] class NCEnStopWordsFinder(addStems: Set[String], exclStems: 
Set[String]) extends LazyLogging:
-    import NCEnStopWordsFinder.*
-
-    require(addStems != null)
-    require(exclStems != null)
-
-    private val stemmer = new PorterStemmer
-
-    private val percents = Set(
-        "%",
-        "pct",
-        "pc",
-        "percentage",
-        "proportion",
-        "interest",
-        "rate",
-        "percent"
-    ).map(stemmer.stem)
+    // TODO: error?
+    private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
 
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnStopWordsImpl.*
+
+class NCEnStopWordsImpl(addStemsSet: util.Set[String], exclStemsSet: 
util.Set[String]) extends NCTokenEnricher with LazyLogging:
+    private val addStems: Set[String] = if addStemsSet == null then Set.empty 
else addStemsSet.asScala.toSet
+    private val exclStems: Set[String] = if exclStemsSet == null then 
Set.empty else exclStemsSet.asScala.toSet
+
+    @volatile private var percents: Set[String] = _
     @volatile private var firstWords: Set[String] = _
     @volatile private var nounWords: Set[String] = _
+    @volatile private var stopWords: StopWordHolder = _
+    @volatile private var exceptions: StopWordHolder = _
 
-    // Stemmatization is done already by generator.
-    NCUtils.execPar(
-        () => firstWords = read("stopwords/first_words.txt.gz"),
-        () => nounWords = read("stopwords/noun_words.txt.gz")
-    )(ExecutionContext.Implicits.global)
+    private def read(path: String): Set[String] = 
NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet
 
-    // Case sensitive.
-    private val (stopWords, exceptions) =
+    override def start(cfg: NCModelConfig): Unit =
+        percents = Set(
+            "%",
+            "pct",
+            "pc",
+            "percentage",
+            "proportion",
+            "interest",
+            "rate",
+            "percent"
+        ).map(cfg.getTokenParser.getStem)
+
+        // Stemmatization is done already by generator. TODO:
+        NCUtils.execPar(
+            () => firstWords = read("stopwords/first_words.txt.gz"),
+            () => nounWords = read("stopwords/noun_words.txt.gz")
+        )(ExecutionContext.Implicits.global)
+
+        // Case sensitive.
         val m =
             readStopWords(
+                cfg.getTokenParser,
                 NCUtils.readResource("stopwords/stop_words.txt", "UTF-8", 
logger).
                     map(_.strip).filter(s => s.nonEmpty && !s.startsWith("#"))
             )
-        (m(false), m(true))
 
-    private def read(path: String): Set[String] = 
NCUtils.readTextGzipResource(path, "UTF-8", logger).toSet
+        stopWords = m(false)
+        exceptions = m(true)
+
+    override def stop(): Unit =
+        percents = null
+        firstWords = null
+        nounWords = null
+        stopWords = null
+        exceptions = null
 
     /**
       * Parses configuration template.
       *
+      * @param p Token parser.
       * @param lines Configuration file content.
       * @return Holder and `is-exception` flag.
       */
-    private def readStopWords(lines: Seq[String]): Map[Boolean, 
StopWordHolder] =
+    private def readStopWords(p: NCTokenParser, lines: Seq[String]): 
Map[Boolean, StopWordHolder] =
         // 1. Prepares accumulation data structure.
         enum WordForm:
             case STEM, LEM, ORIG
 
-        import WordForm.*
+        import WordForm._
 
         class Condition[T]:
             val any = mutable.HashSet.empty[T]
@@ -252,8 +321,8 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
                                 case Some(set) => set.add(cond)
                                 case _ =>
                                     val set = mutable.HashSet.empty[T]
-                                    set += cond
-                                    m += pos -> set
+                                        set += cond
+                                        m += pos -> set
                         )
 
                     add(includes, incl = true)
@@ -267,7 +336,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
                 m += tuple._1 -> tuple._2
             WordForm.values.foreach(f =>
                 add(f, mkT, isExc = true)
-                add(f, mkT, isExc = false)
+                    add(f, mkT, isExc = false)
             )
             m.toMap
 
@@ -327,7 +396,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
                 val (word, form) =
                     if isCase then (s, ORIG)
                     else
-                        if !hasPoses then (stemmer.stem(s), STEM) else 
(stemmer.stem(s), LEM)
+                        if !hasPoses then (p.getStem(s), STEM) else 
(p.getStem(s), LEM)
                 mHash((isExc, form)).addCondition(word, poses)
             else
                 val b = s.take(idxWild)
@@ -349,13 +418,13 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
                 val incl = toImmutable(m((isExc, form)).includes)
                 val excl = toImmutable(m((isExc, form)).excludes)
 
-                mkInstance(any ++ excl.values.flatten, incl, excl)
+                    mkInstance(any ++ excl.values.flatten, incl, excl)
             end mkHolder
             def mkHash(form: WordForm): HashHolder = mkHolder(mHash, form, 
HashHolder.apply)
             def mkScan(form: WordForm):
             ScanHolder = mkHolder(mScan, form, ScanHolder.apply)
 
-            isExc -> StopWordHolder(mkHash(STEM), mkHash(LEM), mkHash(ORIG), 
mkScan(LEM), mkScan(ORIG))
+                isExc -> StopWordHolder(mkHash(STEM), mkHash(LEM), 
mkHash(ORIG), mkScan(LEM), mkScan(ORIG))
         ).toMap
 
     private def isVerb(pos: String): Boolean = pos.head == 'V'
@@ -379,8 +448,8 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
     ): Boolean =
         var stop = true
 
-        for ((tok, idx) <- ns.zipWithIndex if idx != lastIdx && 
!tok.isStopWord && !isException(Seq(tok)) &&
-            stopPoses.contains(tok.getPos) && ns(idx + 1).isStopWord)
+        for ((tok, idx) <- ns.zipWithIndex if idx != lastIdx && 
!isStopWord(tok) && !isException(Seq(tok)) &&
+            stopPoses.contains(tok.getPos) && isStopWord(ns(idx + 1)))
             stops += tok
             stop = false
 
@@ -413,8 +482,8 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
             val max = ns.size - 1
             var stop = true
 
-            for ((tok, idx) <- ns.zipWithIndex if idx != max && 
!tok.isStopWord && !exclStems.contains(tok.getStem) &&
-                POSES.contains(tok.getPos) && ns(idx + 1).isStopWord)
+            for ((tok, idx) <- ns.zipWithIndex if idx != max && 
!isStopWord(tok) && !exclStems.contains(tok.getStem) &&
+                POSES.contains(tok.getPos) && isStopWord(ns(idx + 1)))
                 stops += tok
                 stop = false
 
@@ -422,11 +491,9 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
 
         processCommonStops0(ns)
 
-    /**
-      *
-      * @param toks
-      */
-    def find(toks: Seq[NCToken]): Seq[NCToken] =
+    override def enrich(req: NCRequest, cfg: NCModelConfig, toksList: 
JList[NCToken]): Unit =
+        val toks = toksList.asScala
+
         // Stop words and exceptions caches for this sentence.
         val cacheSw = mutable.HashMap.empty[Seq[NCToken], Boolean]
         val cacheEx = mutable.HashMap.empty[Seq[NCToken], Boolean]
@@ -462,7 +529,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
                         !isFirst && prev().getPos == "CD" &&
                         // 3. It's last word or any words after except numbers.
                         (isLast || next().getPos != "CD")
-                ) ||
+                    ) ||
                 // be, was, is etc. or has been etc.
                 isCommonVerbs("have", "be") ||
                 // be, was, is etc. or have done etc.
@@ -474,7 +541,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
         // | Find all words from predefined list. |
         // +--------------------------------------+
         val buf = mutable.Buffer.empty[Seq[NCToken]]
-        val mix = NCUtils.tokenMixWithStopWords(toks)
+        val mix = tokenMixWithStopWords(toks)
 
         for (toks <- mix if !buf.exists(_.containsSlice(toks)) && isStop(toks) 
&& !isException(toks))
             toks.foreach(tok => stops += tok)
@@ -492,7 +559,7 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
         val foundKeys = new mutable.HashSet[String]()
 
         // All sentence first stop words + first non stop word.
-        val startToks = toks.takeWhile(_.isStopWord) ++ 
toks.find(!_.isStopWord).map(p => p)
+        val startToks = toks.takeWhile(isStopWord) ++ toks.find(p => 
!isStopWord(p)).map(p => p)
         for (startTok <- startToks; tup <- origToks.filter(_._1.head == 
startTok); key = tup._2 if firstWords.contains(key) && !isException(tup._1))
             tup._1.foreach(tok => stops += tok)
             foundKeys += key
@@ -574,4 +641,5 @@ private[impl] class NCEnStopWordsFinder(addStems: 
Set[String], exclStems: Set[St
         if ok && stack.isEmpty then
             stops --= stops.intersect(set)
 
-        stops.toSeq.sortBy(_.getStartCharIndex)
\ No newline at end of file
+        // TODO: name is important  and language independent.
+        toks.foreach(t => t.put("stopword", stops.contains(t)))
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
index ea11dc0..c7e2534 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
@@ -18,7 +18,6 @@
 package org.apache.nlpcraft.nlp.token.enricher.en.impl
 
 import com.typesafe.scalalogging.LazyLogging
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 
@@ -28,8 +27,7 @@ class NCEnSwearWordsImpl(res: String) extends NCTokenEnricher 
with LazyLogging:
     @volatile private var swearWords: Set[String] = _
 
     override def start(cfg: NCModelConfig): Unit =
-        val stemmer = new PorterStemmer
-        swearWords = NCUtils.readTextStream(NCUtils.getStream(res), 
"UTF-8").map(stemmer.stem).toSet
+        swearWords = NCUtils.readTextStream(NCUtils.getStream(res), 
"UTF-8").map(cfg.getTokenParser.getStem).toSet
         logger.trace(s"Loaded resource: $res")
     override def stop(): Unit = swearWords = null
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
java.util.List[NCToken]): Unit =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
index cb0a8b4..2c59def 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
@@ -17,11 +17,13 @@
 
 package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
 
-import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.NCException;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCTokenParser;
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.NCEnOpenNlpImpl;
 
-import java.io.*;
-import java.util.*;
+import java.util.List;
+import java.util.Objects;
 
 /*
  * Models can be downloaded from the following resources:
@@ -53,52 +55,36 @@ public class NCEnOpenNlpTokenParser implements 
NCTokenParser {
      * @param lemmaDicSrc Local filesystem path, resources file path or URL 
for OpenNLP lemmatizer dictionary.
      * @throws NCException
      */
-    public NCEnOpenNlpTokenParser(String posMdlSrc, String lemmaDicSrc) {
+    public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String 
lemmaDicSrc) {
+        Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be 
null.");
         Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
         Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be 
null.");
 
         try {
-            impl = new NCEnOpenNlpImpl(posMdlSrc, lemmaDicSrc);
+            impl = new NCEnOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc);
         }
         catch (Exception e) {
             throw new NCException("Failed to create OpenNLP token parser.", e);
         }
     }
 
-    /**
-     *
-     * @return
-     */
-    public Set<String> getAdditionalStopWords() {
-        return impl.getAdditionalStopWords();
-    }
-
-    /**
-     *
-     * @param addStopWords
-     */
-    public void setAdditionalStopWords(Set<String> addStopWords) {
-        impl.setAdditionalStopWords(addStopWords);
+    @Override
+    public List<String> tokenize(String text) {
+        return impl.tokenize(text);
     }
 
-    /**
-     *
-     * @return
-     */
-    public Set<String> getExcludedStopWords() {
-        return impl.getExcludedStopWords();
+    @Override
+    public String getStem(String s) {
+        return impl.getStem(s);
     }
 
-    /**
-     *
-     * @param exclStopWords
-     */
-    public void setExcludedStopWords(Set<String> exclStopWords) {
-        impl.setExcludedStopWords(exclStopWords);
+    @Override
+    public List<String> getPoses(List<String> toks) {
+        return impl.getPoses(toks);
     }
 
     @Override
-    public List<NCToken> parse(NCRequest req, NCModelConfig cfg) {
-        return impl.parse(req, cfg);
+    public List<String> getLemmas(List<String> toks, List<String> poses) {
+        return impl.getLemmas(toks, poses);
     }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
index 1496de2..f7714a3 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
@@ -17,6 +17,7 @@
 
 package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl
 
+import com.typesafe.scalalogging.LazyLogging
 import opennlp.tools.lemmatizer.*
 import opennlp.tools.postag.*
 import opennlp.tools.stemmer.*
@@ -37,115 +38,76 @@ import scala.jdk.CollectionConverters.*
   * @param posMdlSrc
   * @param lemmaDicSrc
   */
-class NCEnOpenNlpImpl(posMdlSrc: String, lemmaDicSrc: String) extends 
NCTokenParser :
-    private val stemmer = new PorterStemmer
-
+class NCEnOpenNlpImpl(tokMdl: String,  posMdlSrc: String, lemmaDicSrc: String) 
extends NCTokenParser with LazyLogging:
+    @volatile private var stemmer: PorterStemmer = _
     @volatile var tagger: POSTaggerME = _
     @volatile var lemmatizer: DictionaryLemmatizer = _
-    @volatile var swFinder: NCEnStopWordsFinder = _
-
-    private var addStopWords: JSet[String] = _
-    private var exclStopWords: JSet[String] = _
+    @volatile var tokenizer: TokenizerME = _
 
     override def start(cfg: NCModelConfig): Unit =
         NCUtils.execPar(
-            () => tagger = new POSTaggerME(new 
POSModel(NCUtils.getStream(posMdlSrc))),
-            () => lemmatizer = new 
DictionaryLemmatizer(NCUtils.getStream(lemmaDicSrc)),
-            () => swFinder = new NCEnStopWordsFinder(stem(addStopWords), 
stem(exclStopWords))
+            () => stemmer = new PorterStemmer,
+            () =>
+                tagger = new POSTaggerME(new 
POSModel(NCUtils.getStream(posMdlSrc)))
+                logger.trace(s"Loaded resource: $posMdlSrc")
+            ,
+            () =>
+                lemmatizer = new 
DictionaryLemmatizer(NCUtils.getStream(lemmaDicSrc))
+                logger.trace(s"Loaded resource: $lemmaDicSrc")
+            ,
+            () =>
+                tokenizer = new TokenizerME(new 
TokenizerModel(NCUtils.getStream(tokMdl)))
+                logger.trace(s"Loaded resource: $tokMdl")
+
         )(ExecutionContext.Implicits.global)
 
     override def stop(): Unit =
-        swFinder = null
         lemmatizer = null
         tagger = null
+        tokenizer = null
+        stemmer = null
 
     /**
       *
-      * @param addStopWords
+      * @param set
       */
-    def setAdditionalStopWords(addStopWords: JSet[String]): Unit = 
this.addStopWords = addStopWords
+    private def stem(set: JSet[String]): Set[String] =
+        if set == null then Set.empty else set.asScala.toSet.map(stemmer.stem)
 
-    /**
-      *
-      * @return
-      */
-    def getAdditionalStopWords: JSet[String] = addStopWords
+    override def tokenize(text: String): JList[String] =
+        this.synchronized { tokenizer.tokenizePos(text) 
}.map(_.getCoveredText(text).toString).toSeq.asJava
 
-    /**
-      *
-      * @param exclStopWords
-      */
-    def setExcludedStopWords(exclStopWords: JSet[String]): Unit = 
this.exclStopWords = exclStopWords
+    override def getStem(s: String): String = this.synchronized { 
stemmer.stem(s) }
 
-    /**
-      *
-      * @return
-      */
-    def getExcludedStopWords: JSet[String] = exclStopWords
+    override def getPoses(toksList: JList[String]): JList[String] =
+        val toks = toksList.asScala.toArray
 
-    /**
-      *
-      * @param set
-      */
-    private def stem(set: JSet[String]): Set[String] =
-        if set == null then Set.empty else set.asScala.toSet.map(stemmer.stem)
+        this.synchronized { tagger.tag(toks) }.toSeq.asJava
 
-    override def parse(req: NCRequest, cfg: NCModelConfig): JList[NCToken] =
-        // OpenNLP classes are not thread-safe.
-        this.synchronized {
-            val words = req.getWords.asScala
-            val wordsTxts = words.map(_.getText).toArray
-            val posTags = tagger.tag(wordsTxts)
-            var lemmas = lemmatizer.lemmatize(wordsTxts, posTags).toSeq
-
-            require(words.length == posTags.length)
-
-            // For some reasons lemmatizer (en-lemmatizer.dict) marks some 
words with non-existent POS 'NNN'
-            // Valid POS list: 
https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
-            val suspIdxs = lemmas.zip(posTags).zipWithIndex.flatMap {
-                // "0" is flag that lemma cannot be obtained for some reasons.
-                case ((lemma, pos), i) => Option.when(lemma == "O" && pos == 
"NN")(i)
-            }
+    override def getLemmas(toksList: JList[String], posesList: JList[String]): 
JList[String] =
+        require(toksList.size() == posesList.size())
+
+        val toks = toksList.asScala
+        val poses = posesList.asScala
+
+        var lemmas = this.synchronized { lemmatizer.lemmatize(toks.toArray, 
poses.toArray).toSeq }
+
+        // For some reasons lemmatizer (en-lemmatizer.dict) marks some words 
with non-existent POS 'NNN'
+        // Valid POS list: 
https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
+        val suspIdxs = lemmas.zip(poses).zipWithIndex.flatMap {
+            // "0" is flag that lemma cannot be obtained for some reasons.
+            case ((lemma, pos), i) => Option.when(lemma == "O" && pos == 
"NN")(i)
+        }
 
-            if suspIdxs.nonEmpty then
-                val fixes: Map[Int, String] = lemmatizer.
-                    lemmatize(suspIdxs.map(i => wordsTxts(i)).toArray, 
suspIdxs.map(_ => "NNN").toArray).
-                    zipWithIndex.
-                    flatMap {
-                        (lemma, i) => Option.when(lemma != "0")(suspIdxs(i) -> 
lemma)
-                    }.toMap
-                lemmas = lemmas.zipWithIndex.map {
-                    (lemma, idx) => fixes.getOrElse(idx, lemma)
-                }
-
-            val res: Seq[NCToken] = 
words.zip(posTags).zip(lemmas).toIndexedSeq.zipWithIndex.map { case (((w, pos), 
lemma), idx) =>
-                new NCPropertyMapAdapter with NCToken:
-                    override def getText: String = w.getText
-                    override def getLemma: String = lemma
-                    override def getStem: String = 
stemmer.stem(w.getText.toLowerCase)
-                    override def getPos: String = pos
-                    override def isStopWord: Boolean = false
-                    override def getStartCharIndex: Int = w.getStartCharIndex
-                    override def getEndCharIndex: Int = w.getEndCharIndex
-                    override def getLength: Int = w.getLength
-                    override def getIndex: Int = w.getIndex
+        if suspIdxs.nonEmpty then
+            val fixes: Map[Int, String] = lemmatizer.
+                lemmatize(suspIdxs.map(i => toks(i)).toArray, suspIdxs.map(_ 
=> "NNN").toArray).
+                zipWithIndex.
+                flatMap {
+                    (lemma, i) => Option.when(lemma != "0")(suspIdxs(i) -> 
lemma)
+                }.toMap
+            lemmas = lemmas.zipWithIndex.map {
+                (lemma, idx) => fixes.getOrElse(idx, lemma)
             }
 
-            val stops = swFinder.find(res)
-
-            res.map(tok =>
-                if stops.contains(tok) then
-                    new NCPropertyMapAdapter with NCToken:
-                        override def getText: String = tok.getText
-                        override def getLemma: String = tok.getLemma
-                        override def getStem: String = tok.getStem
-                        override def getPos: String = tok.getPos
-                        override def isStopWord: Boolean = true
-                        override def getStartCharIndex: Int = 
tok.getStartCharIndex
-                        override def getEndCharIndex: Int = tok.getEndCharIndex
-                        override def getLength: Int = tok.getLength
-                        override def getIndex: Int = tok.getIndex
-                else
-                    tok
-            ).asJava
-        }
\ No newline at end of file
+        lemmas.asJava
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
deleted file mode 100644
index bf63f0d..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp.tokenizer.opennlp;
-
-import org.apache.nlpcraft.NCException;
-import org.apache.nlpcraft.NCModelConfig;
-import org.apache.nlpcraft.NCTokenizer;
-import org.apache.nlpcraft.NCWord;
-import org.apache.nlpcraft.nlp.tokenizer.opennlp.impl.NCOpenNlpTokenizerImpl;
-
-import java.util.List;
-import java.util.Objects;
-
-/**
- *
- */
-public class NCOpenNlpTokenizer implements NCTokenizer {
-    private final NCOpenNlpTokenizerImpl impl;
-
-    /**
-     *
-     * @param tokMdl
-     */
-    public NCOpenNlpTokenizer(String tokMdl) {
-        Objects.requireNonNull(tokMdl, "Tokenizer model source cannot be 
null.");
-
-        try {
-           impl = new NCOpenNlpTokenizerImpl(tokMdl);
-        }
-        catch (Exception e) {
-            throw new NCException("Failed to create OpenNLP tokenizer from: " 
+ tokMdl, e);
-        }
-    }
-
-    @Override
-    public List<NCWord> tokenize(NCModelConfig cfg, String txt) {
-        return impl.tokenize(cfg, txt);
-    }
-
-    @Override
-    public void start(NCModelConfig cfg) {
-        impl.start(cfg);
-    }
-
-    @Override
-    public void stop() {
-        impl.stop();
-    }
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
deleted file mode 100644
index 49ac329..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.nlp.tokenizer.opennlp.impl
-
-import opennlp.tools.tokenize.*
-import org.apache.nlpcraft.*
-import org.apache.nlpcraft.internal.util.NCUtils
-
-import java.io.*
-import java.util
-import scala.jdk.CollectionConverters.*
-
-/**
-  *
-  * @param src
-  */
-class NCOpenNlpTokenizerImpl(src: String) extends NCTokenizer:
-    @volatile var tokenizer: TokenizerME = _
-
-    override def start(cfg: NCModelConfig): Unit = tokenizer = new 
TokenizerME(new TokenizerModel(NCUtils.getStream(src)))
-    override def stop(): Unit = tokenizer = null
-    override def tokenize(cfg: NCModelConfig, txt: String): util.List[NCWord] =
-        this.synchronized { tokenizer.tokenizePos(txt) }.zipWithIndex.map { 
(span, idx) =>
-            new NCWord:
-                override def getText: String = 
span.getCoveredText(txt).toString
-                override def getStartCharIndex: Int = span.getStart
-                override def getEndCharIndex: Int = span.getEnd
-                override def getLength: Int = span.length()
-                override def getIndex: Int = idx
-        }.toSeq.asJava
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
index f5096e5..c4d3ea5 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
@@ -36,7 +36,6 @@ import java.util.concurrent.TimeUnit;
 @Fork(value = 1, jvmArgs = {"-Xms2G", "-Xmx2G"})
 @Warmup(iterations = 5, time = 10)
 @Measurement(iterations = 5, time = 5)
-@Disabled
 public class NCBenchmarkAdapter {
     @State(Scope.Thread)
     public static class NCBenchmarkAdapterState {
@@ -50,7 +49,8 @@ public class NCBenchmarkAdapter {
      * @param args
      * @throws RunnerException
      */
-    @Test
+    // @Test
+    // TODO:
     public void benchmark() throws RunnerException {
         new Runner(new 
OptionsBuilder().include(this.getClass().getSimpleName()).build()).run();
     }
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index f345dde..c30f536 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -44,7 +44,7 @@ public class NCEnOpenNlpTokenParserBenchmark extends 
NCBenchmarkAdapter {
 
     @Benchmark
     public void testParse(Blackhole bh, NCBenchmarkAdapterState state) {
-        bh.consume(parser.parse(state.request, null));
+        bh.consume(parser.tokenize(state.request.getText()));
     }
 
     /**
@@ -52,6 +52,7 @@ public class NCEnOpenNlpTokenParserBenchmark extends 
NCBenchmarkAdapter {
      */
     private static NCEnOpenNlpTokenParser prepareParser() {
         NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser(
+            "opennlp/en-token.bin",
             "opennlp/en-pos-maxent.bin",
             "opennlp/en-lemmatizer.dict"
         );
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
index a9cfb14..842ba4f 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala
@@ -22,7 +22,6 @@ import org.apache.nlpcraft.internal.util.NCUtils
 import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import java.util
@@ -53,8 +52,8 @@ class NCOpenNlpEntityParserSpec:
 
     private def checkSingleEntity(txt: String, expected: String): Unit =
         val req = NCTestRequest(txt)
-        val toks = EN_PARSER.parse(req, EN_MDL_CFG)
-        val resSeq = parsers.map(_.parse(req, EN_MDL_CFG, 
toks).asScala.toSeq).filter(_.size == 1)
+        val toks = NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, 
req.txt)
+        val resSeq = parsers.map(_.parse(req, NCTestConfig.EN, 
toks).asScala.toSeq).filter(_.size == 1)
 
         require(resSeq.size == 1)
 
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
index 77a002b..e4c6323 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
@@ -20,10 +20,8 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
-import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.*
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import java.util
@@ -40,14 +38,14 @@ class NCSemanticEntityParserJsonSpec:
 
     @BeforeEach
     def start(): Unit =
-        parser =
-            NCTestUtils.makeAndStart(
-                new NCSemanticEntityParser(new NCEnSemanticTextStemmer, 
"models/alarm_model.json")
-            )
+        parser = NCTestUtils.makeAndStart(new 
NCSemanticEntityParser("models/alarm_model.json"))
 
     private def checkSingleEntity(txt: String, expected: String): Unit =
         val req = NCTestRequest(txt)
-        val res = parser.parse(req, EN_MDL_CFG, EN_PARSER.parse(req, 
EN_MDL_CFG)).asScala.toSeq
+        val res = parser.parse(
+            req,
+            NCTestConfig.EN, 
NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, req.txt)
+        ).asScala.toSeq
 
         NCTestUtils.printEntities(txt, res)
 
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 09fc3b4..2bdad65 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -20,10 +20,8 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
-import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.*
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import java.util
@@ -56,16 +54,14 @@ class NCSemanticEntityParserSpec:
     @BeforeEach
     def start(): Unit =
         parser =
-            NCTestUtils.makeAndStart(
-                new NCSemanticEntityParser(
-                    new NCEnSemanticTextStemmer,
-                    Seq(Element("testId", synonyms = Seq("test"))).asJava
-                )
-            )
+            NCTestUtils.makeAndStart(new 
NCSemanticEntityParser(Seq(Element("testId", synonyms = Seq("test"))).asJava))
 
     private def checkSingleEntity(txt: String, expected: String): Unit =
         val req = NCTestRequest(txt)
-        val res = parser.parse(req, EN_MDL_CFG, EN_PARSER.parse(req, 
EN_MDL_CFG)).asScala.toSeq
+        val res = parser.parse(
+            req, 
+            NCTestConfig.EN, 
NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, req.txt)
+        ).asScala.toSeq
 
         require(res.size == 1)
 
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
index 273f7d1..36d4960 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
@@ -20,10 +20,8 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNlpEntityParser
-import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.*
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import java.util
@@ -39,14 +37,14 @@ class NCSemanticEntityParserYamlSpec:
 
     @BeforeEach
     def start(): Unit =
-        parser =
-            NCTestUtils.makeAndStart(
-                new NCSemanticEntityParser(new NCEnSemanticTextStemmer, 
"models/lightswitch_model.yaml")
-            )
+        parser = NCTestUtils.makeAndStart(new 
NCSemanticEntityParser("models/lightswitch_model.yaml"))
 
     private def checkSingleEntity(txt: String, expected: String): Unit =
         val req = NCTestRequest(txt)
-        val res = parser.parse(req, EN_MDL_CFG, EN_PARSER.parse(req, 
EN_MDL_CFG)).asScala.toSeq
+        val res = parser.parse(
+            req,
+            NCTestConfig.EN, 
NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, req.txt)
+        ).asScala.toSeq
 
         NCTestUtils.printEntities(txt, res)
 
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
index a4d07b1..dc63f06 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
@@ -21,7 +21,6 @@ import org.apache.nlpcraft.*
 import org.apache.nlpcraft.nlp.token.enricher.en.NCEnBracketsTokenEnricher
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import scala.jdk.CollectionConverters.*
@@ -42,8 +41,8 @@ class NCEnBracketsTokenEnricherSpec:
       * @param brackets
       */
     private def check(txt: String, brackets: Set[Integer]): Unit =
-        val toks = EN_PARSER.parse(NCTestRequest(txt), EN_MDL_CFG)
-        enricher.enrich(NCTestRequest(txt), EN_MDL_CFG, toks)
+        val toks = NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, txt)
+        enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toks)
         val seq = toks.asScala.toSeq
         NCTestUtils.printTokens(seq)
         seq.foreach (tok =>
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
index bd03637..2b8a20a 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.nlp.token.enricher.en
 import org.apache.nlpcraft.nlp.token.enricher.en.NCEnDictionaryTokenEnricher
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import scala.jdk.CollectionConverters.SeqHasAsJava
@@ -44,7 +43,7 @@ class NCEnDictionaryTokenEnricherSpec:
         require(toks.head.getOpt[Boolean]("dict:en").isEmpty)
         require(toks.last.getOpt[Boolean]("dict:en").isEmpty)
 
-        enricher.enrich(null, EN_MDL_CFG, toks.asJava)
+        enricher.enrich(null, NCTestConfig.EN, toks.asJava)
 
         NCTestUtils.printTokens(toks)
 
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
index c8ca5cb..c694f9a 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.nlp.token.enricher.en
 import org.apache.nlpcraft.NCToken
 import org.apache.nlpcraft.nlp.token.enricher.en.NCEnQuotesTokenEnricher
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.apache.nlpcraft.nlp.util.*
 import org.junit.jupiter.api.*
 
@@ -42,9 +41,9 @@ class NCEnQuotesTokenEnricherSpec:
       * @param quotes
       */
     private def check(txt: String, quotes: Set[Integer]): Unit =
-        val toks = EN_PARSER.parse(NCTestRequest(txt), EN_MDL_CFG)
+        val toks = NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, txt)
         val toksSeq = toks.asScala.toSeq
-        enricher.enrich(NCTestRequest(txt), EN_MDL_CFG, toks)
+        enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toks)
         NCTestUtils.printTokens(toksSeq)
         toksSeq.foreach (tok =>
             require(!(tok.get[Boolean]("quoted:en") ^ 
quotes.contains(tok.getIndex)))
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
index 2311889..d4dd0cc 100644
--- 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
+++ 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
@@ -19,9 +19,9 @@ package org.apache.nlpcraft.nlp.token.parser.opennlp.en
 
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.ascii.NCAsciiTable
+import org.apache.nlpcraft.nlp.token.enricher.en.{NCEnBracketsTokenEnricher, 
NCEnStopWordsTokenEnricher}
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.nlp.util.*
-import org.apache.nlpcraft.nlp.util.NCTestConfig.*
 import org.junit.jupiter.api.*
 
 import java.util
@@ -31,57 +31,70 @@ import scala.jdk.CollectionConverters.*
   *
   */
 class NCEnOpenNlpTokenParserSpec:
+    private var enricher: NCEnStopWordsTokenEnricher = _
+
+    private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
+
     private def test(txt: String, validate: Seq[NCToken] => _): Unit =
-        val toks = EN_PARSER.parse(nlp.util.NCTestRequest(txt), 
EN_MDL_CFG).asScala.toSeq
+        val toksList = NCTestUtils.mkTokens(NCTestConfig.EN.getTokenParser, 
txt)
+        enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toksList)
+
+        val toks = toksList.asScala.toSeq
+
         assert(toks.nonEmpty)
         NCTestUtils.printTokens(toks)
         validate(toks)
 
+    @BeforeEach
+    def start(): Unit = enricher =
+        NCTestUtils.makeAndStart(new NCEnStopWordsTokenEnricher(null, null))
+
+
     @Test
     def test(): Unit =
         test(
             "Test requests!",
             toks =>
                 require(toks.length == 3);
-                require(!toks.head.isStopWord);
-                require(toks.last.isStopWord)
+                require(!isStopWord(toks.head));
+                require(isStopWord(toks.last))
         )
         test(
             "Test requests !",
             toks =>
                 require(toks.length == 3);
-                require(!toks.head.isStopWord);
-                require(toks.last.isStopWord)
+                require(!isStopWord(toks.head));
+                require(isStopWord(toks.last))
         )
         test(
             // First and last are stop words,
             // Third and fourth are not because quoted.
-            // Note that "A ` A A` A" parsed as 5 tokens ("A", "`", ""A, "A`", 
"A") because OpenNLP tokenizer logic,
+            // Note that "a ` a a` a" parsed as 5 tokens ("a", "`", ""a, "a`", 
"a") because OpenNLP tokenizer logic,
             // So we use spaces around quotes to simplify test.
-            "A ` A A ` A",
+            "a ` a a ` a",
             toks =>
                 require(toks.length == 6);
-                require(toks.head.isStopWord);
-                require(toks.last.isStopWord);
-                require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
+                require(isStopWord(toks.head));
+                require(isStopWord(toks.last));
+                require(toks.drop(1).reverse.drop(1).forall(p => 
!isStopWord(p)))
         )
         test(
             // First and last are stop words,
             // Third and fourth are not because brackets.
-            "A ( A A ) A",
+            "a ( a a ) a",
             toks =>
                 require(toks.length == 6);
-                require(toks.head.isStopWord);
-                require(toks.last.isStopWord);
-                require(toks.drop(1).reverse.drop(1).forall(!_.isStopWord))
+                require(isStopWord(toks.head));
+                require(isStopWord(toks.last));
+                require(toks.drop(1).reverse.drop(1).forall(p => 
!isStopWord(p)))
         )
         test(
             // Invalid brackets.
-            "A ( A A A",
-            toks => toks.filter(_.getText != "(").forall(_.isStopWord)
+            "a ( a a a",
+            toks => toks.filter(_.getText != "(").forall(isStopWord)
         )
         test(
             // Nested brackets.
-            "< < [ A ] > >",
-            toks => require(!toks.find(_.getText == "A").get.isStopWord)
+            "< < [ a ] > >",
+            toks => require(!isStopWord(toks.find(_.getText == "a").get))
         )
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index bd1d1b1..b5ff768 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -17,35 +17,43 @@
 
 package org.apache.nlpcraft.nlp.util
 
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.tokenizer.opennlp.NCOpenNlpTokenizer
+import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
 
-import java.util
-import java.util.Optional
+import java.util.{Optional, ArrayList as JAList, List as JList}
 
+/***
+  *
+  */
 object NCTestConfig:
-    final val EN_TOKENIZER = new NCOpenNlpTokenizer(
-        "opennlp/en-token.bin"
-    )
-    final val EN_PARSER: NCTokenParser = new NCEnOpenNlpTokenParser(
-        "opennlp/en-pos-maxent.bin",
-        "opennlp/en-lemmatizer.dict"
-    )
-
-    final val EN_MDL_CFG: NCModelConfig = new NCPropertyMapAdapter() with 
NCModelConfig:
-        override def getTokenizer: NCTokenizer = EN_TOKENIZER
-        override def getTokenParsers: util.List[NCTokenParser] = 
util.Collections.singletonList(EN_PARSER);
-        override def getTokenEnrichers: util.List[NCTokenEnricher] = new 
util.ArrayList[NCTokenEnricher]()
-        override def getEntityEnrichers: util.List[NCEntityEnricher] = new 
util.ArrayList[NCEntityEnricher]()
-        override def getEntityParsers: util.List[NCEntityParser] = new 
util.ArrayList[NCEntityParser]()
-        override def getTokenValidators: util.List[NCTokenValidator] = new 
util.ArrayList[NCTokenValidator]()
-        override def getEntityValidators: util.List[NCEntityValidator] = new 
util.ArrayList[NCEntityValidator]()
-        override def getVariantsFilters: util.List[NCVariantsFilter] = new 
util.ArrayList[NCVariantsFilter]()
+    final val EN: NCModelConfig = new NCPropertyMapAdapter() with 
NCModelConfig:
+        private val p =
+            new NCEnOpenNlpTokenParser(
+                "opennlp/en-token.bin",
+                "opennlp/en-pos-maxent.bin",
+                "opennlp/en-lemmatizer.dict"
+            )
+
+        override def getTokenParser: NCTokenParser = p
+        override def getTokenEnrichers: JList[NCTokenEnricher] = new 
JAList[NCTokenEnricher]()
+        override def getEntityEnrichers: JList[NCEntityEnricher] = new 
JAList[NCEntityEnricher]()
+        override def getEntityParsers: JList[NCEntityParser] = new 
JAList[NCEntityParser]()
+        override def getTokenValidators: JList[NCTokenValidator] = new 
JAList[NCTokenValidator]()
+        override def getEntityValidators: JList[NCEntityValidator] = new 
JAList[NCEntityValidator]()
+        override def getVariantValidators: JList[NCVariantsValidator] = new 
JAList[NCVariantsValidator]()
 
         override def getId: String = "test"
         override def getName: String = "test"
         override def getVersion: String = "1.0"
 
-    EN_TOKENIZER.start(EN_MDL_CFG)
-    EN_PARSER.start(EN_MDL_CFG)
+    // TODO: references?
+    EN.getTokenParser.start(EN)
+
+    start(EN.getTokenEnrichers)
+    start(EN.getEntityEnrichers)
+    start(EN.getEntityParsers)
+    start(EN.getTokenValidators)
+    start(EN.getEntityValidators)
+    start(EN.getVariantValidators)
+
+    private def start[T <: NCLifecycle](l: JList[T]): Unit = if l != null then 
l.forEach(_.start(EN))
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
index 7af1a71..0cbdc70 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
@@ -46,7 +46,6 @@ case class NCTestRequest(
     override def getReceiveTimestamp: Long = ts
     override def getUserAgent: String = userAgent
     override def getRequestData: util.Map[String, AnyRef] = data
-    override def getWords: util.List[NCWord] = 
EN_TOKENIZER.tokenize(EN_MDL_CFG, txt)
 
 
 /**
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestToken.scala 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestToken.scala
index d027e9c..a8e5584 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestToken.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestToken.scala
@@ -35,17 +35,10 @@ case class NCTestToken(
     idx: Int,
     lemma: String = null,
     stem: String = null,
-    pos: String = null,
-    isStop: Boolean = false,
-    start: Int = -1,
-    end: Int = -1
+    pos: String = null
 ) extends NCPropertyMapAdapter with NCToken:
     override def getText: String = txt
     override def getLemma: String = lemma
     override def getStem: String = stem
     override def getPos: String = pos
-    override def isStopWord: Boolean = isStop
-    override def getStartCharIndex: Int = start
-    override def getEndCharIndex: Int = end
-    override def getLength: Int = end - start + 1
     override def getIndex: Int = idx
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala 
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index a69039d..ac01f0d 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -20,10 +20,10 @@ package org.apache.nlpcraft.nlp.util
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.ascii.NCAsciiTable
 import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser
-import org.apache.nlpcraft.nlp.tokenizer.opennlp.NCOpenNlpTokenizer
 
+import java.util
 import scala.jdk.CollectionConverters.*
-
+import scala.jdk.OptionConverters.RichOptional
 /**
   *
   */
@@ -32,7 +32,7 @@ object NCTestUtils:
       * @param toks
       */
     def printTokens(toks: Seq[NCToken]): Unit =
-        val tbl = NCAsciiTable("Text", "Index", "POS", "Stem", "Lemma", 
"Start", "End", "Length", "Stopword", "Properties")
+        val tbl = NCAsciiTable("Text", "Index", "POS", "Stem", "Lemma", 
"Stopword", "Properties")
 
         for (t <- toks)
             tbl += (
@@ -41,10 +41,10 @@ object NCTestUtils:
                 t.getPos,
                 t.getStem,
                 t.getLemma,
-                t.getStartCharIndex,
-                t.getEndCharIndex,
-                t.getLength,
-                t.isStopWord,
+                t.getOpt[Boolean]("stopword").toScala match
+                    case Some(b) => b.toString
+                    case None => "undef."
+                ,
                 t.keysSet().asScala.map(p => 
s"$p=${t.get[Any](p)}").mkString("[", ", ", "]")
             )
 
@@ -67,6 +67,20 @@ object NCTestUtils:
 
         tbl.print(s"Request: $req")
 
+    def mkTokens(p: NCTokenParser, txt: String): util.List[NCToken] =
+        val toks = p.tokenize(txt)
+        val poses = p.getPoses(toks)
+        val lemmas = p.getLemmas(toks, poses)
+
+        toks.asScala.zip(poses.asScala).zip(lemmas.asScala).zipWithIndex.map { 
case (((t, pos), lemma), idx) =>
+            new NCPropertyMapAdapter with NCToken:
+                override def getText: String = t
+                override def getLemma: String = lemma
+                override def getStem: String = p.getStem(t)
+                override def getPos: String = pos
+                override def getIndex: Int = idx
+        }.asJava
+
     /**
       *
       * @param make
@@ -79,7 +93,9 @@ object NCTestUtils:
         val start = now()
         val t = make
         val started = now()
+
+        t.start(NCTestConfig.EN)
         
-        t.start(NCTestConfig.EN_MDL_CFG)
         println(s"'${t.getClass.getSimpleName}' created in ${started - 
start}ms and started in ${now() - started}ms.")
+
         t
\ No newline at end of file

[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

Reply via email to