This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 04ceca9 WIP.
04ceca9 is described below
commit 04ceca960e6fd370c813e1d57b708d6252516afc
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Jan 5 14:38:38 2022 +0300
WIP.
---
.../nlp/entity/parser/stanford/NCStanfordEntityParser.java | 9 +++++++--
.../parser/stanford/impl/NCStanfordEntityParserImpl.scala | 6 ++----
.../nlp/token/parser/stanford/NCStanfordTokenParser.java | 7 +++++--
.../nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala | 9 ++++-----
.../parser/stanford/NCStanfordEntityParserSpec.scala | 4 +---
.../token/parser/stanford/NCStanfordTokenParserSpec.scala | 4 ++--
.../nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java | 2 --
.../nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala | 7 ++++---
...otesTokenEnricher.java => NCBracketsTokenEnricher.java} | 8 ++++----
...tsTokenEnricher.java => NCDictionaryTokenEnricher.java} | 8 ++++----
...uageTokenEnricher.java => NCLanguageTokenEnricher.java} | 6 +++---
...ionaryTokenEnricher.java => NCQuotesTokenEnricher.java} | 8 ++++----
...rdsTokenEnricher.java => NCStopWordsTokenEnricher.java} | 10 +++++-----
.../enricher/en/N\320\241SwearWordsTokenEnricher.java" | 12 ++++++------
.../impl/{NCEnBracketsImpl.scala => NCBracketsImpl.scala} | 7 ++++---
.../{NCEnDictionaryImpl.scala => NCDictionaryImpl.scala} | 7 ++++---
...EnLanguageWordsImpl.scala => NCLanguageWordsImpl.scala} | 6 +++---
.../en/impl/{NCEnQuotesImpl.scala => NCQuotesImpl.scala} | 11 ++++++-----
...EnStopWordGenerator.scala => NCStopWordGenerator.scala} | 2 +-
.../{NCEnStopWordsImpl.scala => NCStopWordsImpl.scala} | 8 ++++----
.../{NCEnSwearWordsImpl.scala => NCSwearWordsImpl.scala} | 7 ++++---
.../opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java | 1 -
.../apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java | 14 ++++++++++----
.../parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java | 3 ++-
.../parser/semantic/NCSemanticEntityParserSpec.scala | 7 ++++---
...nricherSpec.scala => NCBracketsTokenEnricherSpec.scala} | 10 +++++-----
...icherSpec.scala => NCDictionaryTokenEnricherSpec.scala} | 12 ++++++------
...nricherSpec.scala => NCLanguageTokenEnricherSpec.scala} | 8 ++++----
...nEnricherSpec.scala => NCQuotesTokenEnricherSpec.scala} | 10 +++++-----
...icherSpec.scala => NCSwearWordsTokenEnricherSpec.scala} | 12 ++++++------
...okenParserSpec.scala => NCOpenNlpTokenParserSpec.scala} | 8 ++++----
.../java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala | 7 ++++---
.../java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 3 ++-
33 files changed, 129 insertions(+), 114 deletions(-)
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
index d5c5e4a..4ef1396 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
@@ -18,8 +18,13 @@
package org.apache.nlpcraft.nlp.entity.parser.stanford;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import org.apache.nlpcraft.*;
-import org.apache.nlpcraft.nlp.entity.parser.stanford.impl.*;
+import org.apache.nlpcraft.NCEntity;
+import org.apache.nlpcraft.NCEntityParser;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import
org.apache.nlpcraft.nlp.entity.parser.stanford.impl.NCStanfordEntityParserImpl;
+
import java.util.List;
import java.util.Objects;
import java.util.Set;
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
index a7d02a8..ac98c11 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
@@ -18,13 +18,11 @@
package org.apache.nlpcraft.nlp.entity.parser.stanford.impl
import edu.stanford.nlp.ling.CoreAnnotations.NormalizedNamedEntityTagAnnotation
-import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
+import edu.stanford.nlp.pipeline.*
import org.apache.nlpcraft.*
import java.util
-import java.util.List as JList
-import java.util.Set as JSet
-import java.util.Properties
+import java.util.{Properties, List as JList, Set as JSet}
import java.util.stream.Collectors
import scala.jdk.CollectionConverters.*
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
index 8dfd6c8..fa6986d 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
@@ -18,8 +18,11 @@
package org.apache.nlpcraft.nlp.token.parser.stanford;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import org.apache.nlpcraft.*;
-import org.apache.nlpcraft.nlp.token.parser.stanford.impl.*;
+import org.apache.nlpcraft.NCException;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCTokenParser;
+import org.apache.nlpcraft.nlp.token.parser.stanford.impl.NCStanfordNlpImpl;
+
import java.util.List;
import java.util.Objects;
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
index e427b8d..6b3e5c7 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
@@ -31,10 +31,6 @@ import java.util.{Properties, List as JList}
import scala.jdk.CollectionConverters.*
class NCStanfordNlpImpl(stanford: StanfordCoreNLP) extends NCTokenParser:
- override def tokenize(text: String): JList[String] =
- PTBTokenizer.newPTBTokenizer(new
StringReader(text)).tokenize().stream().map(p =>
p.word()).collect(Collectors.toList)
- override def getStem(s: String): String = null // TODO:
-
private def get(toks: JList[String], getData: CoreLabel => String) =
val doc = new CoreDocument(toks.stream().collect(Collectors.joining("
")))
@@ -53,4 +49,7 @@ class NCStanfordNlpImpl(stanford: StanfordCoreNLP) extends
NCTokenParser:
// TODO: getPoses and getLemmas are equal.
override def getPoses(toks: JList[String]): JList[String] = get(toks,
_.tag())
- override def getLemmas(toks: JList[String], poses: JList[String]):
JList[String] = get(toks, _.lemma())
\ No newline at end of file
+ override def getLemmas(toks: JList[String], poses: JList[String]):
JList[String] = get(toks, _.lemma())
+ override def tokenize(text: String): JList[String] =
+ PTBTokenizer.newPTBTokenizer(new
StringReader(text)).tokenize().stream().map(_.word()).collect(Collectors.toList)
+ override def getStem(s: String): String = null // TODO:
\ No newline at end of file
diff --git
a/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala
b/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala
index 464587c..811fb07 100644
---
a/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala
+++
b/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParserSpec.scala
@@ -19,13 +19,11 @@ package org.apache.nlpcraft.nlp.entity.parser.stanford
import org.apache.nlpcraft.nlp.entity.parser.stanford.NCStanfordEntityParser
import org.apache.nlpcraft.nlp.token.parser.stanford.NCStanfordTokenParser
-import org.apache.nlpcraft.nlp.util.NCTestToken
+import org.apache.nlpcraft.nlp.util.*
import org.apache.nlpcraft.nlp.utils.NCStanfordTestConfig
import org.junit.jupiter.api.Test
-import org.apache.nlpcraft.nlp.util.NCTestToken
import scala.jdk.CollectionConverters.*
-import org.apache.nlpcraft.nlp.util.NCTestUtils
class NCStanfordEntityParserSpec:
private val parser = NCStanfordTokenParser(NCStanfordTestConfig.STANFORD)
diff --git
a/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala
b/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala
index 185d3f7..49b5993 100644
---
a/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala
+++
b/nlpcraft-stanford/src/test/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala
@@ -17,11 +17,11 @@
package org.apache.nlpcraft.nlp.token.parser.stanford
-import java.util.Properties
-import org.junit.jupiter.api.*
import edu.stanford.nlp.pipeline.StanfordCoreNLP
import org.apache.nlpcraft.nlp.utils.NCStanfordTestConfig
+import org.junit.jupiter.api.*
+import java.util.Properties
import scala.jdk.CollectionConverters.*
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
index efb3a95..d887031 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
@@ -23,10 +23,8 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl;
-import
org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl;
import java.util.List;
-import java.util.Objects;
/**
* Umbrella for NLP tokens.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
index a7e4116..2409a2e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.nlp.entity.parser.nlp.impl
import org.apache.nlpcraft.*
import java.util
+import java.util.List as JList
import java.util.stream.Collectors
/**
@@ -28,13 +29,13 @@ import java.util.stream.Collectors
object NCNlpEntityParserImpl:
private def id = "nlp:token"
-import NCNlpEntityParserImpl._
+import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl.*
/**
*
*/
class NCNlpEntityParserImpl extends NCEntityParser:
- override def parse(req: NCRequest, cfg: NCModelConfig, toks:
util.List[NCToken]): util.List[NCEntity] =
+ override def parse(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): JList[NCEntity] =
toks.stream().map(t =>
new NCPropertyMapAdapter with NCEntity:
put(s"$id:stem", t.getStem)
@@ -43,7 +44,7 @@ class NCNlpEntityParserImpl extends NCEntityParser:
put(s"$id:text", t.getText)
put(s"$id:index", t.getIndex)
- override def getTokens: util.List[NCToken] =
util.Collections.singletonList(t)
+ override def getTokens: JList[NCToken] =
util.Collections.singletonList(t)
override def getRequestId: String = req.getRequestId
override def getId: String = id
).collect(Collectors.toList)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java
similarity index 84%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java
index c38f29e..6331fd1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnQuotesImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCBracketsImpl;
import java.util.List;
/**
- * TODO: enriches with <code>quotes:en</code> property.
+ * TODO: enriches with <code>brackets</code> property.
*/
-public class NCEnQuotesTokenEnricher implements NCTokenEnricher {
- private final NCEnQuotesImpl impl = new NCEnQuotesImpl();
+public class NCBracketsTokenEnricher implements NCTokenEnricher {
+ private final NCBracketsImpl impl = new NCBracketsImpl();
@Override
public void start(NCModelConfig cfg) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java
similarity index 84%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java
index 14ee3a2..2bdb129 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnBracketsImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCDictionaryImpl;
import java.util.List;
/**
- * TODO: enriches with <code>brackets:en</code> property.
+ * TODO: enriches with <code>dict</code> property.
*/
-public class NCEnBracketsTokenEnricher implements NCTokenEnricher {
- private final NCEnBracketsImpl impl = new NCEnBracketsImpl();
+public class NCDictionaryTokenEnricher implements NCTokenEnricher {
+ private final NCDictionaryImpl impl = new NCDictionaryImpl();
@Override
public void start(NCModelConfig cfg) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java
similarity index 86%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java
index 9ecbd90..2cd8896 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnLanguageWordsImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCLanguageWordsImpl;
import java.util.List;
/**
* TODO: enriches with <code>lang:en</code> property.
*/
-public class NCEnLanguageTokenEnricher implements NCTokenEnricher {
- private final NCEnLanguageWordsImpl impl = new NCEnLanguageWordsImpl();
+public class NCLanguageTokenEnricher implements NCTokenEnricher {
+ private final NCLanguageWordsImpl impl = new NCLanguageWordsImpl();
@Override
public void start(NCModelConfig cfg) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java
similarity index 83%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java
index 8c3275f..64846ce 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnDictionaryImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCQuotesImpl;
import java.util.List;
/**
- * TODO: enriches with <code>dict:en</code> property.
+ * TODO: enriches with <code>quoted</code> property.
*/
-public class NCEnDictionaryTokenEnricher implements NCTokenEnricher {
- private final NCEnDictionaryImpl impl = new NCEnDictionaryImpl();
+public class NCQuotesTokenEnricher implements NCTokenEnricher {
+ private final NCQuotesImpl impl = new NCQuotesImpl();
@Override
public void start(NCModelConfig cfg) {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java
similarity index 81%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java
index e431a6a..14c5ca5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnStopWordsTokenEnricher.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnStopWordsImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCStopWordsImpl;
import java.util.List;
import java.util.Set;
@@ -29,14 +29,14 @@ import java.util.Set;
/**
* TODO: enriches with <code>dict:en</code> property.
*/
-public class NCEnStopWordsTokenEnricher implements NCTokenEnricher {
- private final NCEnStopWordsImpl impl;
+public class NCStopWordsTokenEnricher implements NCTokenEnricher {
+ private final NCStopWordsImpl impl;
/**
*
*/
- public NCEnStopWordsTokenEnricher(Set<String> addStems, Set<String>
exclStems) {
- impl = new NCEnStopWordsImpl(addStems, exclStems);
+ public NCStopWordsTokenEnricher(Set<String> addStems, Set<String>
exclStems) {
+ impl = new NCStopWordsImpl(addStems, exclStems);
}
@Override
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
similarity index 81%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
rename to
"nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
index 2de4d0b..c7c65af 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
+++
"b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java"
@@ -21,26 +21,26 @@ import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenEnricher;
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnSwearWordsImpl;
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCSwearWordsImpl;
import java.util.List;
import java.util.Objects;
/**
- * TODO: enriches with <code>dict:en</code> property.
+ * TODO: enriches with <code>swear</code> property.
*/
-public class NCEnSwearWordsTokenEnricher implements NCTokenEnricher {
- private final NCEnSwearWordsImpl impl;
+public class NСSwearWordsTokenEnricher implements NCTokenEnricher {
+ private final NCSwearWordsImpl impl;
/**
* TODO: swear_words.txt
*
* @param mdlSrc
*/
- public NCEnSwearWordsTokenEnricher(String mdlSrc) {
+ public NСSwearWordsTokenEnricher(String mdlSrc) {
Objects.requireNonNull(mdlSrc, "Swear words model file cannot be
null.");
- impl = new NCEnSwearWordsImpl(mdlSrc);
+ impl = new NCSwearWordsImpl(mdlSrc);
}
@Override
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsImpl.scala
similarity index 90%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsImpl.scala
index 80f6e62..67b9926 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnBracketsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsImpl.scala
@@ -21,14 +21,15 @@ import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
import java.io.*
+import java.util.List as JList
import scala.collection.mutable
import scala.jdk.CollectionConverters.CollectionHasAsScala
/**
*
*/
-class NCEnBracketsImpl extends NCTokenEnricher with LazyLogging:
- override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
+class NCBracketsImpl extends NCTokenEnricher with LazyLogging:
+ override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
val stack = new java.util.Stack[String]()
val map = mutable.HashMap.empty[NCToken, Boolean]
var ok = true
@@ -45,5 +46,5 @@ class NCEnBracketsImpl extends NCTokenEnricher with
LazyLogging:
case ">" => check("<"); mark(t)
case _ => mark(t)
- if ok && stack.isEmpty then map.foreach { (tok, b) =>
tok.put("brackets:en", b) }
+ if ok && stack.isEmpty then map.foreach { (tok, b) =>
tok.put("brackets", b) }
else logger.trace(s"Invalid brackets: ${req.getText}")
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnDictionaryImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala
similarity index 87%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnDictionaryImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala
index 4af022c..b6469dd 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnDictionaryImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryImpl.scala
@@ -20,13 +20,14 @@ package org.apache.nlpcraft.nlp.token.enricher.en.impl
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCUtils
+import java.util.List as JList
/**
*
*/
-class NCEnDictionaryImpl extends NCTokenEnricher:
+class NCDictionaryImpl extends NCTokenEnricher:
@volatile private var dict: Set[String] = _
override def start(cfg: NCModelConfig): Unit = dict =
NCUtils.readResource("moby/354984si.ngl", "iso-8859-1").toSet
override def stop(): Unit = dict = null
- override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
- toks.forEach(t => t.put("dict:en", dict.contains(t.getLemma)))
+ override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
+ toks.forEach(t => t.put("dict", dict.contains(t.getLemma)))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnLanguageWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLanguageWordsImpl.scala
similarity index 88%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnLanguageWordsImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLanguageWordsImpl.scala
index 0662379..557f893 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnLanguageWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLanguageWordsImpl.scala
@@ -20,10 +20,10 @@ package org.apache.nlpcraft.nlp.token.enricher.en.impl
import org.apache.nlpcraft.*
import java.io.*
-
+import java.util.List as JList
/**
*
*/
-class NCEnLanguageWordsImpl extends NCTokenEnricher:
- override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
+class NCLanguageWordsImpl extends NCTokenEnricher:
+ override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
toks.forEach(t => t.put("lang:en",
t.getText.matches("""[\s\w\p{Punct}]+""")))
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnQuotesImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesImpl.scala
similarity index 80%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnQuotesImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesImpl.scala
index f5342f5..44d5615 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnQuotesImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesImpl.scala
@@ -20,25 +20,26 @@ package org.apache.nlpcraft.nlp.token.enricher.en.impl
import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
+import java.util.List as JList
import scala.jdk.CollectionConverters.*
-object NCEnQuotesImpl:
+object NCQuotesImpl:
private final val Q_POS: Set[String] = Set("``", "''")
private def isQuote(t: NCToken): Boolean = Q_POS.contains(t.getPos)
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnQuotesImpl.*
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCQuotesImpl.*
/**
*
*/
-class NCEnQuotesImpl extends NCTokenEnricher with LazyLogging:
+class NCQuotesImpl extends NCTokenEnricher with LazyLogging:
/**
*
* @param req
* @param cfg
* @param toks
*/
- def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
+ def enrich(req: NCRequest, cfg: NCModelConfig, toks: JList[NCToken]): Unit
=
val toksSeq = toks.asScala
val quotes = toksSeq.filter(isQuote)
@@ -49,7 +50,7 @@ class NCEnQuotesImpl extends NCTokenEnricher with LazyLogging:
Option.when(idx % 2 != 0)(m(t) -> m(quotes(idx - 1)))
}
toksSeq.zipWithIndex.foreach { (tok, idx) =>
- tok.put("quoted:en", pairs.exists { case (from, to) => from >
idx && to < idx })
+ tok.put("quoted", pairs.exists { case (from, to) => from > idx
&& to < idx })
}
else
logger.warn(s"Invalid quotes: ${req.getText}")
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala
similarity index 99%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala
index 4b3ac8a..4312b1b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordGenerator.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala
@@ -8,7 +8,7 @@ import scala.collection.mutable
/**
* Generates first word sequences.
*/
-object NCEnStopWordGenerator:
+object NCStopWordGenerator:
// TODO: ?
private final lazy val stemmer = new PorterStemmer
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
similarity index 98%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
index 248fac8..508f6d8 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnStopWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
@@ -29,7 +29,7 @@ import scala.collection.{IndexedSeq, Seq, mutable}
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
-object NCEnStopWordsImpl:
+object NCStopWordsImpl:
// Condition types.
type Wildcard = (String, String)
type Word = String
@@ -242,9 +242,9 @@ object NCEnStopWordsImpl:
// TODO: error?
private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
-import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnStopWordsImpl.*
+import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCStopWordsImpl.*
-class NCEnStopWordsImpl(addStemsSet: util.Set[String], exclStemsSet:
util.Set[String]) extends NCTokenEnricher with LazyLogging:
+class NCStopWordsImpl(addStemsSet: util.Set[String], exclStemsSet:
util.Set[String]) extends NCTokenEnricher with LazyLogging:
private val addStems: Set[String] = if addStemsSet == null then Set.empty
else addStemsSet.asScala.toSet
private val exclStems: Set[String] = if exclStemsSet == null then
Set.empty else exclStemsSet.asScala.toSet
@@ -304,7 +304,7 @@ class NCEnStopWordsImpl(addStemsSet: util.Set[String],
exclStemsSet: util.Set[St
enum WordForm:
case STEM, LEM, ORIG
- import WordForm._
+ import WordForm.*
class Condition[T]:
val any = mutable.HashSet.empty[T]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
similarity index 86%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
index c7e2534..e03481a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsImpl.scala
@@ -22,14 +22,15 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCUtils
import java.io.*
+import java.util.List as JList
-class NCEnSwearWordsImpl(res: String) extends NCTokenEnricher with LazyLogging:
+class NCSwearWordsImpl(res: String) extends NCTokenEnricher with LazyLogging:
@volatile private var swearWords: Set[String] = _
override def start(cfg: NCModelConfig): Unit =
swearWords = NCUtils.readTextStream(NCUtils.getStream(res),
"UTF-8").map(cfg.getTokenParser.getStem).toSet
logger.trace(s"Loaded resource: $res")
override def stop(): Unit = swearWords = null
- override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
- toks.forEach(t => t.put("swear:en", swearWords.contains(t.getStem)))
+ override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
JList[NCToken]): Unit =
+ toks.forEach(t => t.put("swear", swearWords.contains(t.getStem)))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
index 6ecbbfd..d4f334e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
@@ -2,7 +2,6 @@ package org.apache.nlpcraft.nlp.token.parser.opennlp.impl.en;
import opennlp.tools.stemmer.PorterStemmer;
import
org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParserStemmer;
-import
org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParserStemmer;
public class NCOpenNlpTokenParserStemmerImpl implements
NCOpenNlpTokenParserStemmer {
private PorterStemmer stemmer = new PorterStemmer();
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
index c4d3ea5..8a750a1 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/NCBenchmarkAdapter.java
@@ -19,10 +19,16 @@ package org.apache.nlpcraft.nlp.benchmark;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.nlp.util.NCTestRequest;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-import org.openjdk.jmh.annotations.*;
-import org.openjdk.jmh.runner.*;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.concurrent.TimeUnit;
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index 60de192..58a903a 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -21,7 +21,8 @@ import org.apache.nlpcraft.nlp.benchmark.NCBenchmarkAdapter;
import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParser;
import
org.apache.nlpcraft.nlp.token.parser.opennlp.impl.en.NCOpenNlpTokenParserStemmerImpl;
import org.junit.jupiter.api.Disabled;
-import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.infra.Blackhole;
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index bf12f93..d55f0d8 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -24,6 +24,7 @@ import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
import java.util
+import java.util.{List as JList, Map as JMap}
import scala.collection.mutable
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
@@ -36,9 +37,9 @@ case class Element(
groups: Seq[String] = Seq.empty
) extends NCSemanticElement {
override def getId: String = id
- override def getGroups: util.List[String] = groups.asJava
- override def getValues: util.Map[String, util.List[String]] = values.map {
case (k, v) => k -> v.asJava}.asJava
- override def getSynonyms: util.List[String] = synonyms.asJava
+ override def getGroups: JList[String] = groups.asJava
+ override def getValues: JMap[String, JList[String]] = values.map { case
(k, v) => k -> v.asJava}.asJava
+ override def getSynonyms: JList[String] = synonyms.asJava
}
/**
*
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
similarity index 83%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
index f2c6d45..ebd774d 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnBracketsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.token.enricher.en.NCEnBracketsTokenEnricher
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -27,12 +27,12 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCEnBracketsTokenEnricherSpec:
- private var enricher: NCEnBracketsTokenEnricher = _
+class NCBracketsTokenEnricherSpec:
+ private var enricher: NCBracketsTokenEnricher = _
@BeforeEach
def start(): Unit = enricher =
- NCTestUtils.makeAndStart(new NCEnBracketsTokenEnricher())
+ NCTestUtils.makeAndStart(new NCBracketsTokenEnricher())
/**
*
@@ -45,7 +45,7 @@ class NCEnBracketsTokenEnricherSpec:
val seq = toks.asScala.toSeq
NCTestUtils.printTokens(seq)
seq.foreach (tok =>
- require(!(tok.get[Boolean]("brackets:en") ^
brackets.contains(tok.getIndex)))
+ require(!(tok.get[Boolean]("brackets") ^
brackets.contains(tok.getIndex)))
)
@Test
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
similarity index 83%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
index 06df2ba..5a9c039 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnDictionaryTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
-import org.apache.nlpcraft.nlp.token.enricher.en.NCEnDictionaryTokenEnricher
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -26,11 +26,11 @@ import scala.jdk.CollectionConverters.SeqHasAsJava
/**
*
*/
-class NCEnDictionaryTokenEnricherSpec:
- private var enricher: NCEnDictionaryTokenEnricher = _
+class NCDictionaryTokenEnricherSpec:
+ private var enricher: NCDictionaryTokenEnricher = _
@BeforeEach
- def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCEnDictionaryTokenEnricher())
+ def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCDictionaryTokenEnricher())
@Test
def test(): Unit =
@@ -46,5 +46,5 @@ class NCEnDictionaryTokenEnricherSpec:
NCTestUtils.printTokens(toks)
- require(toks.head.get[Boolean]("dict:en"))
- require(!toks.last.get[Boolean]("dict:en"))
\ No newline at end of file
+ require(toks.head.get[Boolean]("dict"))
+ require(!toks.last.get[Boolean]("dict"))
\ No newline at end of file
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala
similarity index 86%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricherSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala
index 56a2414..a365a81 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnLanguageTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCLanguageTokenEnricherSpec.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
-import org.apache.nlpcraft.nlp.token.enricher.en.NCEnLanguageTokenEnricher
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -26,11 +26,11 @@ import scala.jdk.CollectionConverters.SeqHasAsJava
/**
*
*/
-class NCEnLanguageTokenEnricherSpec:
- private var enricher: NCEnLanguageTokenEnricher = _
+class NCLanguageTokenEnricherSpec:
+ private var enricher: NCLanguageTokenEnricher = _
@BeforeEach
- def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCEnLanguageTokenEnricher)
+ def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCLanguageTokenEnricher)
@Test
def test(): Unit =
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
similarity index 82%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
index d5eee27..7a21c67 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnQuotesTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
import org.apache.nlpcraft.NCToken
-import org.apache.nlpcraft.nlp.token.enricher.en.NCEnQuotesTokenEnricher
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -27,12 +27,12 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCEnQuotesTokenEnricherSpec:
- private var enricher: NCEnQuotesTokenEnricher = _
+class NCQuotesTokenEnricherSpec:
+ private var enricher: NCQuotesTokenEnricher = _
@BeforeEach
def start(): Unit =
- enricher = NCTestUtils.makeAndStart(new NCEnQuotesTokenEnricher)
+ enricher = NCTestUtils.makeAndStart(new NCQuotesTokenEnricher)
/**
*
@@ -45,7 +45,7 @@ class NCEnQuotesTokenEnricherSpec:
enricher.enrich(NCTestRequest(txt), NCTestConfig.EN, toks)
NCTestUtils.printTokens(toksSeq)
toksSeq.foreach (tok =>
- require(!(tok.get[Boolean]("quoted:en") ^
quotes.contains(tok.getIndex)))
+ require(!(tok.get[Boolean]("quoted") ^
quotes.contains(tok.getIndex)))
)
@Test
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
similarity index 81%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricherSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
index d2c183d..4108bd0 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.nlp.token.enricher.en
-import org.apache.nlpcraft.nlp.token.enricher.en.NCEnSwearWordsTokenEnricher
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -26,11 +26,11 @@ import scala.jdk.CollectionConverters.SeqHasAsJava
/**
*
*/
-class NCEnSwearWordsTokenEnricherSpec:
- private var enricher: NCEnSwearWordsTokenEnricher = _
+class NCSwearWordsTokenEnricherSpec:
+ private var enricher: NСSwearWordsTokenEnricher = _
@BeforeEach
- def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NCEnSwearWordsTokenEnricher("badfilter/swear_words.txt"))
+ def start(): Unit = enricher = NCTestUtils.makeAndStart(new
NСSwearWordsTokenEnricher("badfilter/swear_words.txt"))
@Test
def test(): Unit =
@@ -46,5 +46,5 @@ class NCEnSwearWordsTokenEnricherSpec:
NCTestUtils.printTokens(toks)
- require(!toks.head.get[Boolean]("swear:en"))
- require(toks.last.get[Boolean]("swear:en"))
\ No newline at end of file
+ require(!toks.head.get[Boolean]("swear"))
+ require(toks.last.get[Boolean]("swear"))
\ No newline at end of file
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
similarity index 92%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
index b0cf72f..2333989 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.nlp.token.parser.opennlp
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
-import org.apache.nlpcraft.nlp.token.enricher.en.{NCEnBracketsTokenEnricher,
NCEnStopWordsTokenEnricher}
+import org.apache.nlpcraft.nlp.token.enricher.en.*
import org.apache.nlpcraft.nlp.util.*
import org.junit.jupiter.api.*
@@ -29,8 +29,8 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCEnOpenNlpTokenParserSpec:
- private var enricher: NCEnStopWordsTokenEnricher = _
+class NCOpenNlpTokenParserSpec:
+ private var enricher: NCStopWordsTokenEnricher = _
private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
@@ -46,7 +46,7 @@ class NCEnOpenNlpTokenParserSpec:
@BeforeEach
def start(): Unit = enricher =
- NCTestUtils.makeAndStart(new NCEnStopWordsTokenEnricher(null, null))
+ NCTestUtils.makeAndStart(new NCStopWordsTokenEnricher(null, null))
@Test
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
index bed1050..1819ee7 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestRequest.scala
@@ -19,8 +19,9 @@ package org.apache.nlpcraft.nlp.util
import org.apache.nlpcraft.*
import org.apache.nlpcraft.nlp.util.NCTestConfig.*
+
import java.util
-import java.util.List
+import java.util.Map as JMap
/**
* Request test implementation.
@@ -36,13 +37,13 @@ case class NCTestRequest(
userId: String = null,
reqId: String = null,
ts: Long = -1,
- data: util.Map[String, AnyRef] = null
+ data: JMap[String, AnyRef] = null
) extends NCRequest:
override def getUserId: String = userId
override def getRequestId: String = reqId
override def getText: String = txt
override def getReceiveTimestamp: Long = ts
- override def getRequestData: util.Map[String, AnyRef] = data
+ override def getRequestData: JMap[String, AnyRef] = data
/**
* Java side helper.
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index a90922b..328f4dc 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import java.util
+import java.util.List as JList
import scala.jdk.CollectionConverters.*
import scala.jdk.OptionConverters.RichOptional
/**
@@ -66,7 +67,7 @@ object NCTestUtils:
tbl.print(s"Request: $req")
- def mkTokens(p: NCTokenParser, txt: String): util.List[NCToken] =
+ def mkTokens(p: NCTokenParser, txt: String): JList[NCToken] =
val toks = p.tokenize(txt)
val poses = p.getPoses(toks)
val lemmas = p.getLemmas(toks, poses)