This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push:
new 4a2c455 WIP.
4a2c455 is described below
commit 4a2c455df79498e1ac6cf64ec79eae42ebe9d444
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Mar 2 15:50:57 2022 +0300
WIP.
---
.../examples/lightswitch/LightSwitchRuModel.scala | 2 +-
.../lightswitch/LightSwitchScalaModel.scala | 2 +-
.../apache/nlpcraft/NCModelPipelineBuilder.java | 24 +++++++++++-----------
...nricher.java => NCENBracketsTokenEnricher.java} | 6 +++---
...icher.java => NCENDictionaryTokenEnricher.java} | 6 +++---
...nricher.java => NCENLemmaPosTokenEnricher.java} | 11 ++--------
...nEnricher.java => NCENQuotesTokenEnricher.java} | 6 +++---
...richer.java => NCENStopWordsTokenEnricher.java} | 14 ++++++-------
.../N\320\241ENSwearWordsTokenEnricher.java" | 10 ++++-----
...l.scala => NCENBracketsTokenEnricherImpl.scala} | 2 +-
...scala => NCENDictionaryTokenEnricherImpl.scala} | 2 +-
...mpl.scala => NCENQuotesTokenEnricherImpl.scala} | 2 +-
...Generator.scala => NCENStopWordGenerator.scala} | 2 +-
....scala => NCENStopWordsTokenEnricherImpl.scala} | 6 +++---
...scala => NCENSwearWordsTokenEnricherImpl.scala} | 2 +-
.../enricher/opennlp/NCLemmaPosTokenEnricher.java | 2 +-
.../impl/NCLemmaPosTokenEnricherImpl.scala | 5 +++--
.../semantic/NCSemanticEntityParserSpec.scala | 6 +++---
.../enricher/en/NCBracketsTokenEnricherSpec.scala | 4 ++--
.../en/NCDictionaryTokenEnricherSpec.scala | 6 +++---
.../enricher/en/NCQuotesTokenEnricherSpec.scala | 6 +++---
.../enricher/en/NCStopWordsEnricherSpec.scala | 8 ++++----
.../en/NCSwearWordsTokenEnricherSpec.scala | 4 ++--
.../enricher/en/impl/NCStopWordsImplSpec.scala | 4 ++--
.../parser/opennlp/NCOpenNLPTokenParserSpec.scala | 6 +++---
25 files changed, 71 insertions(+), 77 deletions(-)
diff --git
a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
index 708d99f..018002e 100644
---
a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
+++
b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.*
import
org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic.NCRuSemanticEntityParser
import
org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.{NCRuLemmaPosTokenEnricher,
NCRuStopWordsTokenEnricher}
import
org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
-import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENStopWordsTokenEnricher
import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.NCNLPEntityParser
import
org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser
diff --git
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
index 96ce7e1..182d4c8 100644
---
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
+++
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCResourceReader
import org.apache.nlpcraft.nlp.*
import
org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser
-import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENStopWordsTokenEnricher
import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.NCNLPEntityParser
import
org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index 699751f..b4eec06 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -18,12 +18,12 @@
package org.apache.nlpcraft;
import org.apache.nlpcraft.internal.util.NCResourceReader;
-import org.apache.nlpcraft.nlp.en.token.enricher.NCBracketsTokenEnricher;
-import org.apache.nlpcraft.nlp.en.token.enricher.NCDictionaryTokenEnricher;
-import org.apache.nlpcraft.nlp.en.token.enricher.NCEnLemmaPosTokenEnricher;
-import org.apache.nlpcraft.nlp.en.token.enricher.NCQuotesTokenEnricher;
-import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher;
-import org.apache.nlpcraft.nlp.en.token.enricher.NСSwearWordsTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENBracketsTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENDictionaryTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENLemmaPosTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENQuotesTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NCENStopWordsTokenEnricher;
+import org.apache.nlpcraft.nlp.en.token.enricher.NСENSwearWordsTokenEnricher;
import org.apache.nlpcraft.nlp.en.token.parser.opennlp.NCENOpenNLPTokenParser;
import java.util.*;
@@ -79,12 +79,12 @@ public class NCModelPipelineBuilder {
tokParser = new NCENOpenNLPTokenParser();
- tokEnrichers.add(new NCEnLemmaPosTokenEnricher());
- tokEnrichers.add(new NCStopWordsTokenEnricher());
- tokEnrichers.add(new
NСSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
- tokEnrichers.add(new NCQuotesTokenEnricher());
- tokEnrichers.add(new NCDictionaryTokenEnricher());
- tokEnrichers.add(new NCBracketsTokenEnricher());
+ tokEnrichers.add(new NCENLemmaPosTokenEnricher());
+ tokEnrichers.add(new NCENStopWordsTokenEnricher());
+ tokEnrichers.add(new
NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
+ tokEnrichers.add(new NCENQuotesTokenEnricher());
+ tokEnrichers.add(new NCENDictionaryTokenEnricher());
+ tokEnrichers.add(new NCENBracketsTokenEnricher());
this.entParsers.addAll(entParsers);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENBracketsTokenEnricher.java
similarity index 85%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENBracketsTokenEnricher.java
index cc1ed3a..684248d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENBracketsTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCBracketsTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENBracketsTokenEnricherImpl;
import java.util.List;
/**
* TODO: enriches with <code>brackets</code> property.
*/
-public class NCBracketsTokenEnricher implements NCTokenEnricher {
- private final NCBracketsTokenEnricherImpl impl = new
NCBracketsTokenEnricherImpl();
+public class NCENBracketsTokenEnricher implements NCTokenEnricher {
+ private final NCENBracketsTokenEnricherImpl impl = new
NCENBracketsTokenEnricherImpl();
@Override
public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENDictionaryTokenEnricher.java
similarity index 85%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENDictionaryTokenEnricher.java
index e788924..9a70962 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENDictionaryTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCDictionaryTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENDictionaryTokenEnricherImpl;
import java.util.List;
/**
* TODO: enriches with <code>dict</code> property.
*/
-public class NCDictionaryTokenEnricher implements NCTokenEnricher {
- private final NCDictionaryTokenEnricherImpl impl = new
NCDictionaryTokenEnricherImpl();
+public class NCENDictionaryTokenEnricher implements NCTokenEnricher {
+ private final NCENDictionaryTokenEnricherImpl impl = new
NCENDictionaryTokenEnricherImpl();
@Override
public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENLemmaPosTokenEnricher.java
similarity index 80%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENLemmaPosTokenEnricher.java
index aedcf84..cf7cd7e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENLemmaPosTokenEnricher.java
@@ -17,16 +17,9 @@
package org.apache.nlpcraft.nlp.en.token.enricher;
-import org.apache.nlpcraft.NCModelConfig;
-import org.apache.nlpcraft.NCRequest;
-import org.apache.nlpcraft.NCToken;
-import org.apache.nlpcraft.NCTokenEnricher;
import org.apache.nlpcraft.internal.util.NCResourceReader;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl;
import
org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.NCLemmaPosTokenEnricher;
-import java.util.List;
-
/**
* TODO: enriches with <code>lemma</code> and <code>pos</code> properties.
*
@@ -34,11 +27,11 @@ import java.util.List;
* - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
* - lemmatizer:
https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
*/
-public class NCEnLemmaPosTokenEnricher extends NCLemmaPosTokenEnricher {
+public class NCENLemmaPosTokenEnricher extends NCLemmaPosTokenEnricher {
/**
*
*/
- public NCEnLemmaPosTokenEnricher() {
+ public NCENLemmaPosTokenEnricher() {
super(
NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENQuotesTokenEnricher.java
similarity index 85%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENQuotesTokenEnricher.java
index 0de565d..9afb145 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENQuotesTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCQuotesTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENQuotesTokenEnricherImpl;
import java.util.List;
/**
* TODO: enriches with <code>quoted</code> property.
*/
-public class NCQuotesTokenEnricher implements NCTokenEnricher {
- private final NCQuotesTokenEnricherImpl impl = new
NCQuotesTokenEnricherImpl();
+public class NCENQuotesTokenEnricher implements NCTokenEnricher {
+ private final NCENQuotesTokenEnricherImpl impl = new
NCENQuotesTokenEnricherImpl();
@Override
public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENStopWordsTokenEnricher.java
similarity index 76%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENStopWordsTokenEnricher.java
index 5e1dd34..afbeedd 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCENStopWordsTokenEnricher.java
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCStopWordsTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENStopWordsTokenEnricherImpl;
import java.util.List;
import java.util.Set;
@@ -29,18 +29,18 @@ import java.util.Set;
/**
* TODO: enriches with <code>stopword</code> property.
*/
-public class NCStopWordsTokenEnricher implements NCTokenEnricher {
- private final NCStopWordsTokenEnricherImpl impl;
+public class NCENStopWordsTokenEnricher implements NCTokenEnricher {
+ private final NCENStopWordsTokenEnricherImpl impl;
/**
*
*/
- public NCStopWordsTokenEnricher(Set<String> addSw, Set<String> exclSw) {
- impl = new NCStopWordsTokenEnricherImpl(addSw, exclSw);
+ public NCENStopWordsTokenEnricher(Set<String> addSw, Set<String> exclSw) {
+ impl = new NCENStopWordsTokenEnricherImpl(addSw, exclSw);
}
- public NCStopWordsTokenEnricher() {
- impl = new NCStopWordsTokenEnricherImpl(null, null);
+ public NCENStopWordsTokenEnricher() {
+ impl = new NCENStopWordsTokenEnricherImpl(null, null);
}
@Override
diff --git
"a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java"
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241ENSwearWordsTokenEnricher.java"
similarity index 83%
rename from
"nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java"
rename to
"nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241ENSwearWordsTokenEnricher.java"
index 462f024..e852b59 100644
---
"a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java"
+++
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241ENSwearWordsTokenEnricher.java"
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCSwearWordsTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENSwearWordsTokenEnricherImpl;
import java.util.List;
import java.util.Objects;
@@ -29,18 +29,18 @@ import java.util.Objects;
/**
* TODO: enriches with <code>swear</code> property.
*/
-public class NСSwearWordsTokenEnricher implements NCTokenEnricher {
- private final NCSwearWordsTokenEnricherImpl impl;
+public class NСENSwearWordsTokenEnricher implements NCTokenEnricher {
+ private final NCENSwearWordsTokenEnricherImpl impl;
/**
* TODO: swear_words.txt - describe where it can be downloaded.
*
* @param mdlSrc
*/
- public NСSwearWordsTokenEnricher(String mdlSrc) {
+ public NСENSwearWordsTokenEnricher(String mdlSrc) {
Objects.requireNonNull(mdlSrc, "Swear words model file cannot be
null.");
- impl = new NCSwearWordsTokenEnricherImpl(mdlSrc);
+ impl = new NCENSwearWordsTokenEnricherImpl(mdlSrc);
}
@Override
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala
similarity index 96%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala
index ace6847..6414ff3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala
@@ -28,7 +28,7 @@ import scala.jdk.CollectionConverters.CollectionHasAsScala
/**
*
*/
-class NCBracketsTokenEnricherImpl extends NCTokenEnricher with LazyLogging:
+class NCENBracketsTokenEnricherImpl extends NCTokenEnricher with LazyLogging:
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
val stack = new java.util.Stack[String]()
val map = mutable.HashMap.empty[NCToken, Boolean]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala
similarity index 95%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala
index c842ace..e753967 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala
@@ -25,7 +25,7 @@ import java.util.List as JList
/**
*
*/
-class NCDictionaryTokenEnricherImpl extends NCTokenEnricher:
+class NCENDictionaryTokenEnricherImpl extends NCTokenEnricher:
private var dict: Set[String] = _
init()
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala
similarity index 96%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala
index 04359af..b804040 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala
@@ -26,7 +26,7 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCQuotesTokenEnricherImpl extends NCTokenEnricher with LazyLogging:
+class NCENQuotesTokenEnricherImpl extends NCTokenEnricher with LazyLogging:
private final val Q_POS: Set[String] = Set("``", "''")
private def getPos(t: NCToken): String = t.getOpt("pos").orElseThrow(() =>
throw new NCException("POS not found in token."))
private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordGenerator.scala
similarity index 99%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordGenerator.scala
index 5d68a05..f310750 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordGenerator.scala
@@ -8,7 +8,7 @@ import scala.collection.mutable
/**
* Generates first word sequences.
*/
-object NCStopWordGenerator:
+object NCENStopWordGenerator:
private final lazy val stemmer = new PorterStemmer
// Output files.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala
similarity index 98%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala
index 7b31018..3e06e2c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala
@@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-object NCStopWordsTokenEnricherImpl:
+object NCENStopWordsTokenEnricherImpl:
// Condition types.
type Wildcard = (String, String)
type Word = String
@@ -165,14 +165,14 @@ object NCStopWordsTokenEnricherImpl:
private def tokenMix(toks: Seq[NCToken], maxLen: Int = Integer.MAX_VALUE):
Seq[Seq[NCToken]] =
(for (n <- toks.length until 0 by -1 if n <= maxLen) yield
toks.sliding(n)).flatten
-import NCStopWordsTokenEnricherImpl.*
+import NCENStopWordsTokenEnricherImpl.*
/**
*
* @param addStopsSet
* @param exclStopsSet
*/
-class NCStopWordsTokenEnricherImpl(addStopsSet: JSet[String], exclStopsSet:
JSet[String]) extends NCTokenEnricher with LazyLogging:
+class NCENStopWordsTokenEnricherImpl(addStopsSet: JSet[String], exclStopsSet:
JSet[String]) extends NCTokenEnricher with LazyLogging:
private final val stemmer = new PorterStemmer
private var addStems: Set[String] = _
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala
similarity index 94%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala
index bfb97fa..686b2dc 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala
@@ -29,7 +29,7 @@ import java.util.List as JList
*
* @param res
*/
-class NCSwearWordsTokenEnricherImpl(res: String) extends NCTokenEnricher with
LazyLogging:
+class NCENSwearWordsTokenEnricherImpl(res: String) extends NCTokenEnricher
with LazyLogging:
require(res != null)
private final val stemmer = new PorterStemmer
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java
index 13a72cc..b94e792 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.impl.NCLemmaPosTokenEnricherImpl;
import java.util.List;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/impl/NCLemmaPosTokenEnricherImpl.scala
similarity index 98%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/impl/NCLemmaPosTokenEnricherImpl.scala
index 9f378f0..02a6c18 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/impl/NCLemmaPosTokenEnricherImpl.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.en.token.enricher.impl
+package org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.impl
import com.typesafe.scalalogging.LazyLogging
import opennlp.tools.lemmatizer.DictionaryLemmatizer
@@ -23,11 +23,12 @@ import opennlp.tools.postag.*
import opennlp.tools.stemmer.PorterStemmer
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.*
+
import java.io.*
import java.util
+import java.util.List as JList
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
-import java.util.List as JList
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index e9588a0..ee63788 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -20,7 +20,7 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.{NCResourceReader, NCUtils}
import
org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser
-import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher,
NCStopWordsTokenEnricher}
+import org.apache.nlpcraft.nlp.en.token.enricher.{NCENLemmaPosTokenEnricher,
NCENStopWordsTokenEnricher}
import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser
import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.{NCSemanticElement,
NCSemanticEntityParser}
import org.apache.nlpcraft.nlp.token.enricher.en.*
@@ -86,9 +86,9 @@ class NCSemanticEntityParserSpec:
).asJava
)
- private val stopWordsEnricher = new NCStopWordsTokenEnricher()
+ private val stopWordsEnricher = new NCENStopWordsTokenEnricher()
- private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher()
+ private val lemmaPosEnricher = new NCENLemmaPosTokenEnricher()
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
index e621782..698f9d0 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.en.token.enricher.NCBracketsTokenEnricher
+import org.apache.nlpcraft.nlp.en.token.enricher.*
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -30,7 +30,7 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCBracketsTokenEnricherSpec:
- private val enricher = new NCBracketsTokenEnricher()
+ private val enricher = new NCENBracketsTokenEnricher()
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
index c961a2e..8269f2d 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.en.token.enricher.{NCDictionaryTokenEnricher,
NCEnLemmaPosTokenEnricher}
+import org.apache.nlpcraft.nlp.en.token.enricher.{NCENDictionaryTokenEnricher,
NCENLemmaPosTokenEnricher}
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -30,9 +30,9 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCDictionaryTokenEnricherSpec:
- private val dictEnricher = new NCDictionaryTokenEnricher()
+ private val dictEnricher = new NCENDictionaryTokenEnricher()
- private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher()
+ private val lemmaPosEnricher = new NCENLemmaPosTokenEnricher()
@Test
def test(): Unit =
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
index 376f0b0..5a9c1c9 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.NCToken
import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher,
NCQuotesTokenEnricher}
+import org.apache.nlpcraft.nlp.en.token.enricher.{NCENLemmaPosTokenEnricher,
NCENQuotesTokenEnricher}
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -31,8 +31,8 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCQuotesTokenEnricherSpec:
- private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher
- private val quoteEnricher = new NCQuotesTokenEnricher
+ private val lemmaPosEnricher = new NCENLemmaPosTokenEnricher
+ private val quoteEnricher = new NCENQuotesTokenEnricher
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala
index 492593d..dc95d1c 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala
@@ -32,7 +32,7 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCStopWordsEnricherSpec:
- private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher
+ private val lemmaPosEnricher = new NCENLemmaPosTokenEnricher
/**
*
@@ -40,7 +40,7 @@ class NCStopWordsEnricherSpec:
* @param txt
* @param boolVals
*/
- private def test(stopEnricher: NCStopWordsTokenEnricher, txt: String,
boolVals: Boolean*): Unit =
+ private def test(stopEnricher: NCENStopWordsTokenEnricher, txt: String,
boolVals: Boolean*): Unit =
val toksList = EN_PIPELINE.getTokenParser.tokenize(txt)
require(toksList.size == boolVals.size)
val toks = toksList.asScala.toSeq
@@ -58,13 +58,13 @@ class NCStopWordsEnricherSpec:
@Test
def test(): Unit =
test(
- new NCStopWordsTokenEnricher(),
+ new NCENStopWordsTokenEnricher(),
"the test",
true,
false
)
test(
- new NCStopWordsTokenEnricher(Set("test").asJava,
Set("the").asJava),
+ new NCENStopWordsTokenEnricher(Set("test").asJava,
Set("the").asJava),
"the test",
false,
true
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
index 12b8ebc..d69aebd 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.en.token.enricher.NСSwearWordsTokenEnricher
+import org.apache.nlpcraft.nlp.en.token.enricher.NСENSwearWordsTokenEnricher
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -31,7 +31,7 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCSwearWordsTokenEnricherSpec:
- private val enricher = new
NСSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))
+ private val enricher = new
NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))
@Test
def test(): Unit =
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala
index 4787634..ca0861e 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en.impl
import org.apache.nlpcraft.*
-import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCStopWordsTokenEnricherImpl
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCENStopWordsTokenEnricherImpl
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -45,7 +45,7 @@ class NCStopWordsImplSpec:
toks.zip(words).foreach { (t, w) => t.put("stopword", w.stop) }
- val mix = NCStopWordsTokenEnricherImpl.tokenMixWithStopWords(toks)
+ val mix = NCENStopWordsTokenEnricherImpl.tokenMixWithStopWords(toks)
val resSorted = mix.map(_.map(_.getText).mkString).sorted
val expectedSorted = expected.sorted
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala
index 57ecd4e..99e521c 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala
@@ -20,7 +20,7 @@ package org.apache.nlpcraft.nlp.token.parser.opennlp
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher,
NCStopWordsTokenEnricher}
+import org.apache.nlpcraft.nlp.en.token.enricher.{NCENLemmaPosTokenEnricher,
NCENStopWordsTokenEnricher}
import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.util.opennlp.*
@@ -33,8 +33,8 @@ import scala.jdk.CollectionConverters.*
*
*/
class NCOpenNLPTokenParserSpec:
- private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher
- private val stopEnricher = new NCStopWordsTokenEnricher(null, null)
+ private val lemmaPosEnricher = new NCENLemmaPosTokenEnricher
+ private val stopEnricher = new NCENStopWordsTokenEnricher(null, null)
private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")