[incubator-nlpcraft] 01/01: WIP.

sergeykamov Fri, 09 Dec 2022 00:12:53 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


commit 6d63a451ed2628aa2f3f5718606ed814ac6a52a6
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Dec 9 12:12:49 2022 +0400

    WIP.
---
 .../nlpcraft/examples/time/CalculatorModel.scala   |  4 +-
 .../entity/parser/NCFrSemanticEntityParser.scala   |  3 +-
 .../entity/parser/NCRuSemanticEntityParser.scala   |  3 +-
 .../components/PizzeriaModelPipeline.scala         |  5 +-
 .../org/apache/nlpcraft/NCPipelineBuilder.scala    | 54 ++++++++++++++--------
 .../NCStemmer.scala}                               | 11 ++---
 ...nricher.scala => NCBracketsTokenEnricher.scala} |  4 +-
 ...icher.scala => NCDictionaryTokenEnricher.scala} | 14 +++---
 .../nlp/enrichers/NCEnStopWordsTokenEnricher.scala |  2 +-
 .../nlp/enrichers/NCOpenNLPTokenEnricher.scala     | 18 ++++----
 ...nEnricher.scala => NCQuotesTokenEnricher.scala} |  6 +--
 ...icher.scala => NCSwearWordsTokenEnricher.scala} | 17 +++----
 .../nlpcraft/nlp/parsers/NCNLPEntityParser.scala   | 13 +++---
 .../nlp/parsers/NCOpenNLPEntityParser.scala        | 16 +++----
 .../nlp/parsers/NCOpenNLPTokenParser.scala         | 10 ++--
 .../nlpcraft/nlp/parsers/NCSemanticElement.scala   |  3 +-
 .../nlp/parsers/NCSemanticEntityParser.scala       | 32 ++++++-------
 .../parsers/impl/NCSemanticSynonymsProcessor.scala |  5 +-
 .../apache/nlpcraft/nlp/NCTokenEnricherSpec.scala  |  2 +-
 .../enrichers/NCBracketsTokenEnricherSpec.scala    |  4 +-
 .../enrichers/NCDictionaryTokenEnricherSpec.scala  |  6 +--
 .../nlp/enrichers/NCQuotesTokenEnricherSpec.scala  |  2 +-
 .../enrichers/NCSwearWordsTokenEnricherSpec.scala  | 15 ++++--
 .../parsers/NCSemanticEntityParserLemmaSpec.scala  |  3 +-
 .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala |  7 +--
 25 files changed, 144 insertions(+), 115 deletions(-)

diff --git 
a/nlpcraft-examples/calculator/src/main/scala/org/apache/nlpcraft/examples/time/CalculatorModel.scala
 
b/nlpcraft-examples/calculator/src/main/scala/org/apache/nlpcraft/examples/time/CalculatorModel.scala
index 0aecbc86..e1eb0a9c 100644
--- 
a/nlpcraft-examples/calculator/src/main/scala/org/apache/nlpcraft/examples/time/CalculatorModel.scala
+++ 
b/nlpcraft-examples/calculator/src/main/scala/org/apache/nlpcraft/examples/time/CalculatorModel.scala
@@ -65,7 +65,7 @@ class CalculatorModel extends 
NCModel(NCModelConfig("nlpcraft.calculator.ex", "C
     @NCIntent(
         "intent=calc options={ 'ordered': true }" +
         "   term(x)={# == 'stanford:number'}" +
-        "   term(op)={has(list('+', '-', '*', '/'), 
meta_ent('nlp:token:text')) == true}" +
+        "   term(op)={has(list('+', '-', '*', '/'), 
meta_ent('nlp:entity:text')) == true}" +
         "   term(y)={# == 'stanford:number'}"
     )
     @unused def onMatch(
@@ -78,7 +78,7 @@ class CalculatorModel extends 
NCModel(NCModelConfig("nlpcraft.calculator.ex", "C
 
     @NCIntent(
         "intent=calcMem options={ 'ordered': true }" +
-        "   term(op)={has(list('+', '-', '*', '/'), 
meta_ent('nlp:token:text')) == true}" +
+        "   term(op)={has(list('+', '-', '*', '/'), 
meta_ent('nlp:entity:text')) == true}" +
         "   term(y)={# == 'stanford:number'}"
     )
     @unused def onMatchMem(
diff --git 
a/nlpcraft-examples/lightswitch-fr/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCFrSemanticEntityParser.scala
 
b/nlpcraft-examples/lightswitch-fr/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCFrSemanticEntityParser.scala
index 55350bf1..c13251f3 100644
--- 
a/nlpcraft-examples/lightswitch-fr/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCFrSemanticEntityParser.scala
+++ 
b/nlpcraft-examples/lightswitch-fr/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCFrSemanticEntityParser.scala
@@ -20,6 +20,7 @@ package 
org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser
 import opennlp.tools.stemmer.snowball.SnowballStemmer
 import 
org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCFrTokenParser
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 
 /**
@@ -27,7 +28,7 @@ import org.apache.nlpcraft.nlp.parsers.*
   * @param src
   */
 class NCFrSemanticEntityParser(src: String) extends NCSemanticEntityParser(
-    new NCSemanticStemmer:
+    new NCStemmer:
         private val stemmer = new 
SnowballStemmer(SnowballStemmer.ALGORITHM.FRENCH)
         override def stem(txt: String): String = stemmer.synchronized { 
stemmer.stem(txt.toLowerCase).toString }
     ,
diff --git 
a/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala
 
b/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala
index 695a118d..e4c48b94 100644
--- 
a/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala
+++ 
b/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala
@@ -21,13 +21,14 @@ import opennlp.tools.stemmer.snowball.SnowballStemmer
 import 
org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.nlp.common.NCStemmer
 
 /**
   *
   * @param src
   */
 class NCRuSemanticEntityParser(src: String) extends NCSemanticEntityParser(
-    new NCSemanticStemmer:
+    new NCStemmer:
         private val stemmer = new 
SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN)
         override def stem(txt: String): String = stemmer.synchronized { 
stemmer.stem(txt.toLowerCase).toString }
     ,
diff --git 
a/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
 
b/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
index 046cf159..c9e86301 100644
--- 
a/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
+++ 
b/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
@@ -6,8 +6,9 @@ import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.entity.parser.stanford.NCStanfordNLPEntityParser
 import org.apache.nlpcraft.nlp.token.parser.stanford.NCStanfordNLPTokenParser
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher
-import org.apache.nlpcraft.nlp.parsers.{NCSemanticEntityParser, 
NCSemanticStemmer}
+import org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser
 
 import java.util.Properties
 
@@ -20,7 +21,7 @@ object PizzeriaModelPipeline:
             props.setProperty("annotators", "tokenize, ssplit, pos, lemma, 
ner")
             new StanfordCoreNLP(props)
         val tokParser = new NCStanfordNLPTokenParser(stanford)
-        val stemmer = new NCSemanticStemmer():
+        val stemmer = new NCStemmer():
             private val ps = new PorterStemmer
             override def stem(txt: String): String = ps.synchronized { 
ps.stem(txt) }
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
index fb21dcce..371b96e5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
@@ -19,6 +19,7 @@ package org.apache.nlpcraft
 
 import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.enrichers.*
 
@@ -39,8 +40,8 @@ class NCPipelineBuilder:
     private val entMappers: Buf[NCEntityMapper] = Buf.empty
     private val varFilters: Buf[NCVariantFilter] = Buf.empty
 
-    private def mkEnStemmer: NCSemanticStemmer =
-        new NCSemanticStemmer:
+    private def mkEnStemmer: NCStemmer =
+        new NCStemmer:
             final private val ps: PorterStemmer = new PorterStemmer
             override def stem(txt: String): String = ps.stem(txt)
 
@@ -219,10 +220,13 @@ class NCPipelineBuilder:
         tokParser = mkEnOpenNLPTokenParser.?
         tokEnrichers += new 
NCOpenNLPTokenEnricher(NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), 
NCResourceReader.getPath("opennlp/en-lemmatizer.dict"))
         tokEnrichers += new NCEnStopWordsTokenEnricher
-        tokEnrichers += new 
NCEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))
-        tokEnrichers += new NCEnQuotesTokenEnricher
-        tokEnrichers += new NCEnDictionaryTokenEnricher
-        tokEnrichers += new NCEnBracketsTokenEnricher
+        tokEnrichers += new NCSwearWordsTokenEnricher(
+            NCResourceReader.getPath("badfilter/swear_words.txt"),
+            mkEnStemmer
+        )
+        tokEnrichers += new NCQuotesTokenEnricher
+        tokEnrichers += new NCDictionaryTokenEnricher("moby/354984si.ngl")
+        tokEnrichers += new NCBracketsTokenEnricher
 
     /**
       * Shortcut to configure pipeline with [[NCSemanticEntityParser]].
@@ -238,11 +242,15 @@ class NCPipelineBuilder:
       *     
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
 en-lemmatizer.dict]] model for
       *     
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
 DictionaryLemmatizer]].
       *  - [[NCEnStopWordsTokenEnricher Stop-word]] token enricher.
-      *  - [[NCEnSwearWordsTokenEnricher Swear-word]] token enricher 
initialized by
+      *  - [[NCSwearWordsTokenEnricher Swear-word]] token enricher initialized 
by
       *    
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
 swear_words.txt]] dictionary.
-      *  - [[NCEnQuotesTokenEnricher Quotes]] token enricher.
-      *  - [[NCEnDictionaryTokenEnricher Known-word]] token enricher.
-      *  - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+      *  - [[NCQuotesTokenEnricher Quotes]] token enricher.
+      *  - [[NCDictionaryTokenEnricher Known-word]] token enricher initialized 
by "moby/354984si.ngl" dictionary,
+      *      look more about [[https://en.wikipedia.org/wiki/Moby_Project Moby 
Project]].
+      *  - [[NCBracketsTokenEnricher Brackets]] token enricher.
+      *
+      *  Also there is used [[https://en.wikipedia.org/wiki/Stemming Porter 
stemmer]] implementation of [[NCStemmer]],
+      *  based on [[https://opennlp.apache.org/ OpenNLP]] solution.
       *
       * @param lang ISO 639-1 language code. Currently, only "en" (English) is 
supported.
       * @param macros Macros to use with [[NCSemanticEntityParser]].
@@ -276,11 +284,15 @@ class NCPipelineBuilder:
       *     
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
 en-lemmatizer.dict]] model for
       *     
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
 DictionaryLemmatizer]].
       *  - [[NCEnStopWordsTokenEnricher Stop-word]] token enricher.
-      *  - [[NCEnSwearWordsTokenEnricher Swear-word]] token enricher 
initialized by
+      *  - [[NCSwearWordsTokenEnricher Swear-word]] token enricher initialized 
by
       *    
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
 swear_words.txt]] dictionary.
-      *  - [[NCEnQuotesTokenEnricher Quotes]] token enricher.
-      *  - [[NCEnDictionaryTokenEnricher Known-word]] token enricher.
-      *  - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+      *  - [[NCQuotesTokenEnricher Quotes]] token enricher.
+      *  - [[NCDictionaryTokenEnricher Known-word]] token enricher initialized 
by "moby/354984si.ngl" dictionary,
+      *    look more about [[https://en.wikipedia.org/wiki/Moby_Project Moby 
Project]].
+      *  - [[NCBracketsTokenEnricher Brackets]] token enricher.
+      *
+      * Also there is used [[https://en.wikipedia.org/wiki/Stemming Porter 
stemmer]] implementation of [[NCStemmer]],
+      * based on [[https://opennlp.apache.org/ OpenNLP]] solution.
       *
       * @param lang ISO 639-1 language code. Currently, only "en" (English) is 
supported.
       * @param elms Semantic elements to use with [[NCSemanticEntityParser]].
@@ -301,13 +313,17 @@ class NCPipelineBuilder:
       *     
[[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
 en-lemmatizer.dict]] model for
       *     
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
 DictionaryLemmatizer]].
       *  - [[NCEnStopWordsTokenEnricher Stop-word]] token enricher.
-      *  - [[NCEnSwearWordsTokenEnricher Swear-word]] token enricher 
initialized by
+      *  - [[NCSwearWordsTokenEnricher Swear-word]] token enricher initialized 
by
       *    
[[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt
 swear_words.txt]] dictionary.
-      *  - [[NCEnQuotesTokenEnricher Quotes]] token enricher.
-      *  - [[NCEnDictionaryTokenEnricher Known-word]] token enricher.
-      *  - [[NCEnBracketsTokenEnricher Brackets]] token enricher.
+      *  - [[NCQuotesTokenEnricher Quotes]] token enricher.
+      *  - [[NCDictionaryTokenEnricher Known-word]] token enricher initialized 
by "moby/354984si.ngl" dictionary,
+      *    look more about [[https://en.wikipedia.org/wiki/Moby_Project Moby 
Project]].
+      *  - [[NCBracketsTokenEnricher Brackets]] token enricher.
       *
-      * @param lang ISO 639-1 language code. Currently, only "en" (English) is 
supported.
+      * Also there is used [[https://en.wikipedia.org/wiki/Stemming Porter 
stemmer]] implementation of [[NCStemmer]],
+      * based on [[https://opennlp.apache.org/ OpenNLP]] solution.
+      *
+      * @param lang   ISO 639-1 language code. Currently, only "en" (English) 
is supported.
       * @param mdlSrc Classpath resource, file path or URL for YAML or JSON 
semantic model definition file.
       */
     def withSemantic(lang: String, mdlSrc: String): NCPipelineBuilder =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
 b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCStemmer.scala
similarity index 79%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCStemmer.scala
index 27490eda..b68d1986 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCStemmer.scala
@@ -15,19 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.nlp.parsers
+package org.apache.nlpcraft.nlp.common
+
+import org.apache.nlpcraft.nlp.parsers.*
 
 /**
   *
   * `Stemmer` trait. Stems are used for finding words by their reduced form.
+  * `Stemmer` trait implementation depends on language.
   * Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
   *
-  * See detailed description on the website 
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic 
Semantic Parser]].
-  *
-  * @see [[NCSemanticEntityParser]]
-  * @see [[NCSemanticElement]]
   */
-trait NCSemanticStemmer:
+trait NCStemmer:
     /**
       * Gets text's stem.
       *
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
similarity index 94%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
index 29e562e7..cf3563c5 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
@@ -24,7 +24,7 @@ import java.io.*
 import scala.collection.mutable
 
 /**
-  * Brackets [[NCTokenEnricher enricher]] for English language.
+  * Brackets [[NCTokenEnricher enricher]].
   *
   * This enricher adds `brackets` boolean [[NCPropertyMap metadata]] property 
to the [[NCToken token]]
   * instance if the word it represents is enclosed in brackets. Supported 
brackets are: `()`, `{}`,
@@ -33,7 +33,7 @@ import scala.collection.mutable
   * **NOTE:** invalid enclosed brackets are ignored.
   */
 //noinspection DuplicatedCode,ScalaWeakerAccess
-class NCEnBracketsTokenEnricher extends NCTokenEnricher with LazyLogging:
+class NCBracketsTokenEnricher extends NCTokenEnricher with LazyLogging:
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
List[NCToken]): Unit =
         val stack = new java.util.Stack[String]()
         val map = mutable.HashMap.empty[NCToken, Boolean]
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
similarity index 77%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
index 67615aa1..241adf0c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
@@ -21,26 +21,26 @@ import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
 
 /**
-  * "Known-word" [[NCTokenEnricher enricher]] for English language.
+  * "Known-word" [[NCTokenEnricher enricher]].
   *
   * This enricher adds `dict` boolean [[NCPropertyMap metadata]] property to 
the [[NCToken token]]
-  * instance if word it represents is a known English word, i.e. the English 
dictionary contains this word's
+  * instance if word it represents is a known dictionary word, i.e. the 
configured dictionary contains this word's
   * lemma. The value `true` of the metadata property indicates that this 
word's lemma is found in the dictionary,
   * `false` value indicates otherwise.
   *
-  * Implementation uses the [[https://en.wikipedia.org/wiki/Moby_Project Moby 
Project]] English dictionary.
-  *
   * **NOTE:** this implementation requires `lemma` string [[NCPropertyMap 
metadata]] property that contains
-  * token's lemma. You can configure [[NCOpenNLPTokenEnricher]] that provides 
this metadata property before
+  * token's lemma. You can configure [[NCOpenNLPTokenEnricher]] for required 
language that provides this metadata property before
   * this enricher in your [[NCPipeline pipeline]].
+  *
+  * @param dictRes Path to the dictionary. This dictionary should has a simple 
plain text format with one dictionary word on one line.
   */
 //noinspection DuplicatedCode,ScalaWeakerAccess
-class NCEnDictionaryTokenEnricher extends NCTokenEnricher:
+class NCDictionaryTokenEnricher(dictRes: String) extends NCTokenEnricher:
     private var dict: Set[String] = _
 
     init()
 
-    private def init(): Unit = dict = 
NCUtils.readResource("moby/354984si.ngl", "iso-8859-1").toSet
+    private def init(): Unit = dict = NCUtils.readResource(dictRes, 
"UTF-8").toSet
     private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw 
new NCException("Lemma not found in token."))
 
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
List[NCToken]): Unit =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index f0ffb1a7..b5b0c762 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -172,7 +172,7 @@ import 
org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher.*
   * Look more about stop-words [[https://en.wikipedia.org/wiki/Stop_word 
here]].
   *
   * **NOTE:** this implementation requires `lemma` and `pos` string 
[[NCPropertyMap metadata]] properties that contains
-  * token's lemma and part of speech. You can configure 
[[NCOpenNLPTokenEnricher]] that provides this metadata property before
+  * token's lemma and part of speech. You can configure 
[[NCOpenNLPTokenEnricher]] for English language that provides this metadata 
property before
   * this enricher in your [[NCPipeline pipeline]].
   *
   * @param addStopsSet User defined collection of additional stop-words.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index af8d6f10..7ba30164 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -39,10 +39,10 @@ import scala.concurrent.ExecutionContext
   *
   * Some of OpenNLP prepared models can be found 
[[https://opennlp.sourceforge.net/models-1.5/ here]].
   *
-  * @param posMdlSrc Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
 POSTaggerME]] model.
-  * @param lemmaDicSrc Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
 DictionaryLemmatizer]] model.
+  * @param posMdlRes Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html
 POSTaggerME]] model.
+  * @param lemmaDicRes Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html
 DictionaryLemmatizer]] model.
   */
-class NCOpenNLPTokenEnricher(posMdlSrc: String = null, lemmaDicSrc: String = 
null) extends NCTokenEnricher with LazyLogging:
+class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = 
null) extends NCTokenEnricher with LazyLogging:
     private var tagger: POSTaggerME = _
     private var lemmatizer: DictionaryLemmatizer = _
 
@@ -52,15 +52,15 @@ class NCOpenNLPTokenEnricher(posMdlSrc: String = null, 
lemmaDicSrc: String = nul
         NCUtils.execPar(
             Seq(
                 () => {
-                    if posMdlSrc != null then
-                        tagger = new POSTaggerME(new 
POSModel(NCUtils.getStream(posMdlSrc)))
-                        logger.trace(s"Loaded resource: $posMdlSrc")
+                    if posMdlRes != null then
+                        tagger = new POSTaggerME(new 
POSModel(NCUtils.getStream(posMdlRes)))
+                        logger.trace(s"Loaded resource: $posMdlRes")
                     else logger.warn("POS tagger is not configured.")
                 },
                 () => {
-                    if lemmaDicSrc != null then
-                        lemmatizer = new 
DictionaryLemmatizer(NCUtils.getStream(lemmaDicSrc))
-                        logger.trace(s"Loaded resource: $lemmaDicSrc")
+                    if lemmaDicRes != null then
+                        lemmatizer = new 
DictionaryLemmatizer(NCUtils.getStream(lemmaDicRes))
+                        logger.trace(s"Loaded resource: $lemmaDicRes")
                     else logger.warn("Lemmatizer is not configured.")
                 }
             )
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
similarity index 92%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
index ea9bd28a..6f82ca76 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
@@ -21,18 +21,18 @@ import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 
 /**
-  * Quotes [[NCTokenEnricher enricher]] for English language.
+  * Quotes [[NCTokenEnricher enricher]].
   *
   * This enricher adds `quoted` boolean [[NCPropertyMap metadata]] property to 
the [[NCToken token]]
   * instance if word it represents is in quotes. The value `true` of the 
metadata property indicates that this word is in quotes,
   * `false` value indicates otherwise.
   *
   * **NOTE:** this implementation requires `lemma` string [[NCPropertyMap 
metadata]] property that contains
-  * token's lemma. You can configure [[NCOpenNLPTokenEnricher]] that provides 
this metadata property before
+  * token's lemma. You can configure [[NCOpenNLPTokenEnricher]] for required 
language that provides this metadata property before
   * this enricher in your [[NCPipeline pipeline]].
   */
 //noinspection ScalaWeakerAccess
-class NCEnQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
+class NCQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
     private final val Q_POS: Set[String] = Set("``", "''")
     private def getPos(t: NCToken): String = t.get("pos").getOrElse(throw new 
NCException("POS not found in token."))
     private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
similarity index 71%
rename from 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
rename to 
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index c4fa7d8b..f0d282c7 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -18,15 +18,15 @@
 package org.apache.nlpcraft.nlp.enrichers
 
 import com.typesafe.scalalogging.LazyLogging
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.common.NCStemmer
 
 import java.io.*
 import java.util.Objects
 
 /**
-  * "Swear-word" [[NCTokenEnricher enricher]] for English language.
+  * "Swear-word" [[NCTokenEnricher enricher]].
   *
   * This enricher adds `swear` boolean [[NCPropertyMap metadata]] property to 
the [[NCToken token]]
   * instance if word it represents is a swear word dictionary, i.e. the swear 
dictionary contains this word's
@@ -34,22 +34,23 @@ import java.util.Objects
   * `false` value indicates otherwise.
   *
   * Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
+  * Stemming is used here because it is too difficult to be based on more 
accurate `lemma` approach for swear words.
   *
-  * @param res Path to English swear dictionary. English swear dictionary has 
simple plain text format with one word on one line.
+  * @param dictRes Path to the swear dictionary. This swear dictionary should 
has a simple plain text format with one dictionary word on one line.
+  * @param stemmer Stemmer implementation for the dictionary language.
   */
 //noinspection ScalaWeakerAccess
-class NCEnSwearWordsTokenEnricher(res: String) extends NCTokenEnricher with 
LazyLogging:
-    require(res != null, "Swear words model file cannot be null.")
+class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends 
NCTokenEnricher with LazyLogging:
+    require(dictRes != null, "Swear words model file cannot be null.")
 
-    private final val stemmer = new PorterStemmer
     private var swearWords: Set[String] = _
 
     init()
 
     private def init(): Unit =
-        swearWords = NCUtils.readTextStream(NCUtils.getStream(res), "UTF-8").
+        swearWords = NCUtils.readTextStream(NCUtils.getStream(dictRes), 
"UTF-8").
             map(p => stemmer.stem(p.toLowerCase)).toSet
-        logger.trace(s"Loaded resource: $res")
+        logger.trace(s"Loaded resource: $dictRes")
 
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
List[NCToken]): Unit =
         toks.foreach(t => t.put("swear", 
swearWords.contains(stemmer.stem(t.getText.toLowerCase))))
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index d23d42a0..b84d3c18 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -26,22 +26,23 @@ import java.util.stream.Collectors
   * [[NCNLPEntityParser]] helper.
   */
 object NCNLPEntityParser:
-    private val id: String = "nlp:token"
+    private val id: String = "nlp:entity"
 
 import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
 
 /**
   *  NLP data [[NCEntityParser parser]].
   *
-  * This parser converts list of input [[NCToken]] instances to list of 
[[NCEntity]] instances with ID `nlp:token`.
+  * This parser converts list of input [[NCToken]] instances to list of 
[[NCEntity]] instances with ID `nlp:entity`.
   * All [[NCEntity]] instances contain following mandatory [[NCPropertyMap 
metadata]] properties:
-  *  - nlp:token:text
-  *  - nlp:token:index
-  *  - nlp:token:startCharIndex
-  *  - nlp:token:endCharIndex
+  *  - nlp:entity:text
+  *  - nlp:entity:index
+  *  - nlp:entity:startCharIndex
+  *  - nlp:entity:endCharIndex
   *
   *  Also created [[NCEntity]] instances receive all another [[NCPropertyMap 
metadata]] properties
   *  which were added by configured in [[NCPipeline pipeline]] token 
[[org.apache.nlpcraft.NCTokenEnricher enrichers]].
+  *  These properties identifiers will be prefixed by `nlp:entity:`.
   *
   *  @param predicate Predicate which allows to filter list of converted 
[[NCToken]] instances.
   *  By default all [[NCToken]] instances converted.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index e40e8ff2..7613e237 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -39,12 +39,12 @@ object NCOpenNLPEntityParser:
     /**
       * Creates [[NCOpenNLPEntityParser]] instance.
       *
-      * @param src Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
 model]].
+      * @param mdl Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
 model]].
       * @return [[NCOpenNLPEntityParser]] instance.
       */
-    def apply(src: String): NCOpenNLPEntityParser =
-        require(src != null, "Model source cannot be null.")
-        new NCOpenNLPEntityParser(List(src))
+    def apply(mdl: String): NCOpenNLPEntityParser =
+        require(mdl != null, "Model source cannot be null.")
+        new NCOpenNLPEntityParser(List(mdl))
 
 /**
   *  [[https://opennlp.apache.org/ OpenNLP]] based language independent 
[[NCEntityParser parser]] configured by
@@ -59,10 +59,10 @@ object NCOpenNLPEntityParser:
   *
   * **NOTE:** that each input [[NCToken]] can be included into several output 
[[NCEntity]] instances.
   *
-  * @param srcs Paths to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
 models]].
+  * @param findersMdlsRes Paths to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html
 models]].
   */
-class NCOpenNLPEntityParser(srcs: List[String]) extends NCEntityParser with 
LazyLogging:
-    require(srcs != null, "Models source cannot be null.")
+class NCOpenNLPEntityParser(findersMdlsRes: List[String]) extends 
NCEntityParser with LazyLogging:
+    require(findersMdlsRes != null, "Models sources cannot be null.")
 
     private var finders: Seq[NameFinderME] = _
     private case class Holder(start: Int, end: Int, name: String, probability: 
Double)
@@ -74,7 +74,7 @@ class NCOpenNLPEntityParser(srcs: List[String]) extends 
NCEntityParser with Lazy
     private def init(): Unit =
         val finders = mutable.ArrayBuffer.empty[NameFinderME]
         NCUtils.execPar(
-            srcs.map(res => () => {
+            findersMdlsRes.map(res => () => {
                 val f = new NameFinderME(new 
TokenNameFinderModel(NCUtils.getStream(res)))
                 logger.trace(s"Loaded resource: $res")
                 finders.synchronized { finders += f }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
index dbc6657e..82c4b120 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
@@ -32,19 +32,19 @@ import java.util.Objects
   *
   * Some of OpenNLP prepared models can be found 
[[https://opennlp.sourceforge.net/models-1.5/ here]].
   *
-  * @param tokMdl Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/tokenize/TokenizerModel.html
 model]].
+  * @param tokMdlRes Path to 
[[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/tokenize/TokenizerModel.html
 model]].
   */
-class NCOpenNLPTokenParser(tokMdl: String) extends NCTokenParser with 
LazyLogging:
-    require(tokMdl != null, "Tokenizer model path cannot be null.")
+class NCOpenNLPTokenParser(tokMdlRes: String) extends NCTokenParser with 
LazyLogging:
+    require(tokMdlRes != null, "Tokenizer model path cannot be null.")
 
     @volatile private var tokenizer: TokenizerME = _
 
     init()
 
     private def init(): Unit =
-        tokenizer = new TokenizerME(new 
TokenizerModel(NCUtils.getStream(tokMdl)))
+        tokenizer = new TokenizerME(new 
TokenizerModel(NCUtils.getStream(tokMdlRes)))
 
-        logger.trace(s"Loaded resource: $tokMdl")
+        logger.trace(s"Loaded resource: $tokMdlRes")
 
     override def tokenize(text: String): List[NCToken] =
         this.synchronized {
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index b9768e59..e8d43aa1 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.nlp.parsers
 
+import org.apache.nlpcraft.nlp.common.NCStemmer
+
 /**
   *
   * Configuration element which helps to detect [[org.apache.nlpcraft.NCEntity 
NCEntity]] for
@@ -25,7 +27,6 @@ package org.apache.nlpcraft.nlp.parsers
   * See detailed description on the website 
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic 
Semantic Parser]].
   *
   * @see [[NCSemanticEntityParser]]
-  * @see [[NCSemanticStemmer]]
   */
 trait NCSemanticElement:
     /**
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index 3942584e..e96a257e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -21,6 +21,7 @@ import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.makro.NCMacroParser
 import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.parsers.impl.*
 
@@ -38,13 +39,13 @@ object NCSemanticEntityParser:
     /**
       * Creates [[NCSemanticEntityParser]] instance.
       *
-      * @param stemmer [[NCSemanticStemmer]] implementation.
-      * @param parser [[NCTokenParser]] implementation.
-      * @param macros Macros map. Empty by default.
+      * @param stemmer  [[NCStemmer]] implementation for synonyms language.
+      * @param parser   [[NCTokenParser]] implementation.
+      * @param macros   Macros map. Empty by default.
       * @param elements [[NCSemanticElement]] list.
       */
     def apply(
-        stemmer: NCSemanticStemmer,
+        stemmer: NCStemmer,
         parser: NCTokenParser,
         macros: Map[String, String],
         elements: List[NCSemanticElement]
@@ -60,12 +61,12 @@ object NCSemanticEntityParser:
       *
       * Creates [[NCSemanticEntityParser]] instance.
       *
-      * @param stemmer  [[NCSemanticStemmer]] implementation.
+      * @param stemmer  [[NCStemmer]] implementation for synonyms language.
       * @param parser   [[NCTokenParser]] implementation.
       * @param elements [[NCSemanticElement]] list.
       */
     def apply(
-        stemmer: NCSemanticStemmer,
+        stemmer: NCStemmer,
         parser: NCTokenParser,
         elements: List[NCSemanticElement]
     ): NCSemanticEntityParser =
@@ -79,11 +80,11 @@ object NCSemanticEntityParser:
       *
       * Creates [[NCSemanticEntityParser]] instance.
       *
-      * @param stemmer  [[NCSemanticStemmer]] implementation.
-      * @param parser   [[NCTokenParser]] implementation.
-      * @param mdlSrc Classpath resource, file path or URL for YAML or JSON 
semantic model definition file.
+      * @param stemmer [[NCStemmer]] implementation for synonyms language.
+      * @param parser  [[NCTokenParser]] implementation.
+      * @param mdlSrc  Classpath resource, file path or URL for YAML or JSON 
semantic model definition file.
       */
-    def apply(stemmer: NCSemanticStemmer, parser: NCTokenParser, mdlSrc: 
String): NCSemanticEntityParser =
+    def apply(stemmer: NCStemmer, parser: NCTokenParser, mdlSrc: String): 
NCSemanticEntityParser =
         require(stemmer != null, "Stemmer cannot be null.")
         require(parser != null, "Parser cannot be null.")
         require(mdlSrc != null, "Model source cannot be null.")
@@ -181,18 +182,15 @@ import 
org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser.*
   *
   * See detailed description on the website 
[[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic 
Semantic Parser]].
   *
-  *
   * @see [[NCSemanticElement]]
-  * @see [[NCSemanticStemmer]]
-  *
-  * @param stemmer [[NCSemanticStemmer]] implementation.
-  * @param parser [[NCTokenParser]] implementation.
-  * @param macros Macros map. Empty by default.
+  * @param stemmer   [[NCStemmer]] implementation for synonyms language.
+  * @param parser    [[NCTokenParser]] implementation.
+  * @param macros    Macros map. Empty by default.
   * @param elements  [[NCSemanticElement]] list.
   * @param mdlSrcOpt Optional classpath resource, file path or URL for YAML or 
JSON semantic model definition file.
   */
 class NCSemanticEntityParser(
-    stemmer: NCSemanticStemmer,
+    stemmer: NCStemmer,
     parser: NCTokenParser,
     macros: Map[String, String] = Map.empty,
     elements: List[NCSemanticElement] = List.empty,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/impl/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/impl/NCSemanticSynonymsProcessor.scala
index 7c3992e4..e5c0b09d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/impl/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/impl/NCSemanticSynonymsProcessor.scala
@@ -24,6 +24,7 @@ import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.makro.NCMacroParser
 import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.parsers.impl.NCSemanticChunkKind.*
 
@@ -144,7 +145,7 @@ private[parsers] object NCSemanticSynonymsProcessor extends 
LazyLogging:
       * @param syns
       */
     private def convertSynonyms(
-        stemmer: NCSemanticStemmer,
+        stemmer: NCStemmer,
         tokParser: NCTokenParser,
         macroParser: NCMacroParser,
         elemId: String,
@@ -205,7 +206,7 @@ private[parsers] object NCSemanticSynonymsProcessor extends 
LazyLogging:
       * @param elements
       */
     def prepare(
-        stemmer: NCSemanticStemmer,
+        stemmer: NCStemmer,
         tokParser: NCTokenParser,
         macros: Map[String, String],
         elements: Seq[NCSemanticElement]
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
index 4712b55e..2562e317 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
@@ -31,7 +31,7 @@ import scala.util.Using
 class NCTokenEnricherSpec extends AnyFunSuite:
     private def test0(pipeline: NCPipeline, ok: Boolean): Unit =
         val mdl: NCModel = new NCModel(NCModelConfig("test.id", "Test model", 
"1.0"), pipeline):
-            @NCIntent("intent=i term(any)={meta_ent('nlp:token:k1') == 'v1'}")
+            @NCIntent("intent=i term(any)={meta_ent('nlp:entity:k1') == 'v1'}")
             def onMatch(ctx: NCContext, im: NCIntentMatch): NCResult = 
TEST_RESULT
 
         NCTestUtils.askSomething(mdl, ok)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
index 480edd24..6739a703 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
 package org.apache.nlpcraft.nlp.enrichers
 
 import org.apache.nlpcraft.*
-import nlp.enrichers.NCEnBracketsTokenEnricher
+import nlp.enrichers.NCBracketsTokenEnricher
 import nlp.util.*
 import org.scalatest.funsuite.AnyFunSuite
 
@@ -26,7 +26,7 @@ import org.scalatest.funsuite.AnyFunSuite
   *
   */
 class NCBracketsTokenEnricherSpec extends AnyFunSuite:
-    private val bracketsEnricher = new NCEnBracketsTokenEnricher()
+    private val bracketsEnricher = new NCBracketsTokenEnricher()
 
     /**
       *
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricherSpec.scala
index f6f945b7..537ec5cb 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricherSpec.scala
@@ -25,14 +25,14 @@ import internal.util.NCResourceReader
 import org.scalatest.funsuite.AnyFunSuite
 
 class NCDictionaryTokenEnricherSpec extends AnyFunSuite:
-    private val dictEnricher = new NCEnDictionaryTokenEnricher()
+    private val dictEnricher = new 
NCDictionaryTokenEnricher("moby/354984si.ngl")
 
     test("test") {
         val txt = "milk XYZ"
         val toks = EN_TOK_PARSER.tokenize(txt)
 
-        require(toks.head.get[Boolean]("dict:en").isEmpty)
-        require(toks.last.get[Boolean]("dict:en").isEmpty)
+        require(toks.head.get[Boolean]("dict").isEmpty)
+        require(toks.last.get[Boolean]("dict").isEmpty)
 
         val req = NCTestRequest(txt)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricherSpec.scala
index 3f87f757..ee3ad403 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricherSpec.scala
@@ -28,7 +28,7 @@ import org.scalatest.funsuite.AnyFunSuite
   *
   */
 class NCQuotesTokenEnricherSpec extends AnyFunSuite:
-    private val quoteEnricher = new NCEnQuotesTokenEnricher
+    private val quoteEnricher = new NCQuotesTokenEnricher
 
     /**
       *
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
index 86303dea..fcea197c 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
@@ -17,8 +17,10 @@
 
 package org.apache.nlpcraft.nlp.enrichers
 
+import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.enrichers.NCEnSwearWordsTokenEnricher
+import org.apache.nlpcraft.nlp.common.NCStemmer
+import org.apache.nlpcraft.nlp.enrichers.NCSwearWordsTokenEnricher
 import org.apache.nlpcraft.nlp.enrichers.*
 import org.apache.nlpcraft.nlp.util.*
 import org.scalatest.funsuite.AnyFunSuite
@@ -26,13 +28,18 @@ import org.scalatest.funsuite.AnyFunSuite
   *
   */
 class NCSwearWordsTokenEnricherSpec extends AnyFunSuite:
-    private val swEnricher = new 
NCEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))
+    private val swEnricher = new NCSwearWordsTokenEnricher(
+        NCResourceReader.getPath("badfilter/swear_words.txt"),
+        new NCStemmer:
+            final private val ps: PorterStemmer = new PorterStemmer
+            override def stem(txt: String): String = ps.stem(txt)
+    )
 
     test("test") {
         val toks = EN_TOK_PARSER.tokenize("english ass")
 
-        require(toks.head.get[Boolean]("swear:en").isEmpty)
-        require(toks.last.get[Boolean]("swear:en").isEmpty)
+        require(toks.head.get[Boolean]("swear").isEmpty)
+        require(toks.last.get[Boolean]("swear").isEmpty)
 
         swEnricher.enrich(null, null, toks)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParserLemmaSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParserLemmaSpec.scala
index 299a8fdf..cb134ef3 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParserLemmaSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParserLemmaSpec.scala
@@ -22,6 +22,7 @@ import annotations.*
 import nlp.parsers.*
 import internal.impl.*
 import nlp.util.*
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.scalatest.funsuite.AnyFunSuite
 
 import java.util
@@ -32,7 +33,7 @@ import scala.collection.mutable
   */
 class NCSemanticEntityParserLemmaSpec extends AnyFunSuite:
     private val lemmaStemmer =
-        new NCSemanticStemmer():
+        new NCStemmer():
             override def stem(txt: String): String = if wrapped(txt) then 
unwrap(txt) else UUID.randomUUID().toString
 
     case class Data(text: String, elemId: String)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index fd1e0b07..a23b0f89 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -21,9 +21,10 @@ import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.ascii.NCAsciiTable
 import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.common.NCStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.parsers
-import org.apache.nlpcraft.nlp.parsers.{NCOpenNLPTokenParser, 
NCSemanticElement, NCSemanticEntityParser, NCSemanticStemmer}
+import org.apache.nlpcraft.nlp.parsers.{NCOpenNLPTokenParser, 
NCSemanticElement, NCSemanticEntityParser}
 
 import java.util
 import scala.util.Using
@@ -122,8 +123,8 @@ object NCTestUtils:
     /**
       *
       */
-    private def mkSemanticStemmer: NCSemanticStemmer =
-        new NCSemanticStemmer():
+    private def mkSemanticStemmer: NCStemmer =
+        new NCStemmer():
             private val ps = new PorterStemmer
             override def stem(txt: String): String = ps.synchronized { 
ps.stem(txt) }

[incubator-nlpcraft] 01/01: WIP.

Reply via email to