This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
new 5868fe5 WIP
5868fe5 is described below
commit 5868fe59a032f5a65cf8a215f5a8961572f574dc
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Dec 23 22:45:17 2021 -0800
WIP
---
...OpenNlpTokenParser.java => NCEnOpenNlpTokenParser.java} | 14 +++++++-------
.../impl/{NCOpenNlpImpl.scala => NCEnOpenNlpImpl.scala} | 12 ++++++------
.../token/parser/opennlp/impl/NCEnStopWordsFinder.scala | 4 ++--
...Benchmark.java => NCEnOpenNlpTokenParserBenchmark.java} | 10 +++++-----
.../token/enricher/NCEnDictionaryTokenEnricherSpec.scala | 2 +-
...enParserSpec.scala => NCEnOpenNlpTokenParserSpec.scala} | 6 +++---
6 files changed, 24 insertions(+), 24 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
similarity index 89%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
index c0e921f..44d8140 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
@@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCException;
import org.apache.nlpcraft.NCRequest;
import org.apache.nlpcraft.NCToken;
import org.apache.nlpcraft.NCTokenParser;
-import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCOpenNlpImpl;
+import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCEnOpenNlpImpl;
import java.io.File;
import java.util.List;
@@ -37,8 +37,8 @@ import java.util.Set;
/**
*
*/
-public class NCOpenNlpTokenParser implements NCTokenParser {
- private final NCOpenNlpImpl impl;
+public class NCEnOpenNlpTokenParser implements NCTokenParser {
+ private final NCEnOpenNlpImpl impl;
@Override
public void start() {
@@ -57,13 +57,13 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
* @param lemmaDic
* @throws NCException
*/
- public NCOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
+ public NCEnOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
Objects.requireNonNull(tokMdl, "Tonenizer model file cannot be null.");
Objects.requireNonNull(posMdl, "POS model file cannot be null.");
Objects.requireNonNull(lemmaDic, "Lemmatizer model file cannot be
null.");
try {
- impl = NCOpenNlpImpl.apply(tokMdl, posMdl, lemmaDic);
+ impl = NCEnOpenNlpImpl.apply(tokMdl, posMdl, lemmaDic);
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
@@ -77,13 +77,13 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @throws NCException
*/
- public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
+ public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
Objects.requireNonNull(tokMdlSrc, "Tonenizer model path cannot be
null.");
Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
try {
- impl = NCOpenNlpImpl.apply(tokMdlSrc, posMdlSrc, lemmaDicSrc);
+ impl = NCEnOpenNlpImpl.apply(tokMdlSrc, posMdlSrc, lemmaDicSrc);
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
similarity index 95%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
index 867c393..91fd44f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
@@ -31,7 +31,7 @@ import java.util.{Collections, List as JList, Set as JSet}
import scala.concurrent.ExecutionContext
import scala.jdk.CollectionConverters.*
-object NCOpenNlpImpl:
+object NCEnOpenNlpImpl:
/**
*
* @param tokMdlSrc Local filesystem path, resources file path or URL for
OpenNLP tokenizer model.
@@ -39,8 +39,8 @@ object NCOpenNlpImpl:
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @return
*/
- def apply(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc: String):
NCOpenNlpImpl =
- new NCOpenNlpImpl(NCUtils.getStream(tokMdlSrc),
NCUtils.getStream(posMdlSrc), NCUtils.getStream(lemmaDicSrc))
+ def apply(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc: String):
NCEnOpenNlpImpl =
+ new NCEnOpenNlpImpl(NCUtils.getStream(tokMdlSrc),
NCUtils.getStream(posMdlSrc), NCUtils.getStream(lemmaDicSrc))
/**
*
@@ -49,10 +49,10 @@ object NCOpenNlpImpl:
* @param lemmaDicFile Local file for OpenNLP lemmatizer dictionary.
* @return
*/
- def apply(tokMdlFile: File, posMdlFile: File, lemmaDicFile: File):
NCOpenNlpImpl =
+ def apply(tokMdlFile: File, posMdlFile: File, lemmaDicFile: File):
NCEnOpenNlpImpl =
def toStream(f: File) = new BufferedInputStream(new FileInputStream(f))
- new NCOpenNlpImpl(toStream(tokMdlFile), toStream(posMdlFile),
toStream(lemmaDicFile))
+ new NCEnOpenNlpImpl(toStream(tokMdlFile), toStream(posMdlFile),
toStream(lemmaDicFile))
/**
*
@@ -60,7 +60,7 @@ object NCOpenNlpImpl:
* @param posMdlIn
* @param lemmaDicIn
*/
-class NCOpenNlpImpl(
+class NCEnOpenNlpImpl(
tokMdlIn: InputStream,
posMdlIn: InputStream,
lemmaDicIn: InputStream
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index 0a49adf..ecbe1c1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -308,7 +308,7 @@ private[impl] class NCEnStopWordsFinder(addStems:
Set[String], exclStems: Set[St
else
Map.empty
- if !poses.keys.forall(POSES.contains) then throwError(s"Invalid
POSes: ${poses.keys.mkString(", ")}")
+ if !poses.keys.forall(POSES.contains) then throwError(s"Invalid
POSes: ${poses.keys.mkString(", ")}")
val hasPoses = poses.nonEmpty
if hasPoses then s = s.take(idxPos).trim
val isMultiWord = s.contains(' ')
@@ -543,7 +543,7 @@ private[impl] class NCEnStopWordsFinder(addStems:
Set[String], exclStems: Set[St
if quotes.nonEmpty then
val m = toks.zipWithIndex.toMap
- val pairs = quotes.zipWithIndex.drop(1). flatMap {
+ val pairs = quotes.zipWithIndex.drop(1).flatMap {
case (t, idx) => if idx % 2 != 0 then Some(m(t) ->
m(quotes(idx - 1))) else None
}
stops --= stops.filter(t => pairs.exists {
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCOpenNlpTokenParserBenchmark.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
similarity index 83%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCOpenNlpTokenParserBenchmark.java
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index c8d0784..405f452 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCOpenNlpTokenParserBenchmark.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.internal.nlp.benchmark.token.parser.opennlp;
import org.apache.nlpcraft.internal.nlp.benchmark.NCBenchmarkAdapter;
-import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.NCOpenNlpTokenParser;
+import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.NCEnOpenNlpTokenParser;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.infra.Blackhole;
@@ -26,8 +26,8 @@ import org.openjdk.jmh.infra.Blackhole;
/**
*
*/
-public class NCOpenNlpTokenParserBenchmark extends NCBenchmarkAdapter {
- private NCOpenNlpTokenParser parser;
+public class NCEnOpenNlpTokenParserBenchmark extends NCBenchmarkAdapter {
+ private NCEnOpenNlpTokenParser parser;
@Setup
public void setUp() {
@@ -48,8 +48,8 @@ public class NCOpenNlpTokenParserBenchmark extends
NCBenchmarkAdapter {
*
* @return
*/
- private static NCOpenNlpTokenParser prepareParser() {
- NCOpenNlpTokenParser p = new NCOpenNlpTokenParser(
+ private static NCEnOpenNlpTokenParser prepareParser() {
+ NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricherSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricherSpec.scala
index c8f2b5d..abdafcd 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricherSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricherSpec.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.internal.nlp.token.enricher
-import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.NCOpenNlpTokenParser
+import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.NCEnOpenNlpTokenParser
import org.apache.nlpcraft.internal.nlp.util.{NCTestToken, NCTestUtils}
import org.junit.jupiter.api.{BeforeEach, Test}
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
similarity index 95%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
index 81a47d8..7820cf2 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
@@ -29,13 +29,13 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCOpenNlpTokenParserSpec:
- private var parser: NCOpenNlpTokenParser = _
+class NCEnOpenNlpTokenParserSpec:
+ private var parser: NCEnOpenNlpTokenParser = _
@BeforeEach
def start(): Unit =
parser = NCTestUtils.makeAndStart(
- new NCOpenNlpTokenParser(
+ new NCEnOpenNlpTokenParser(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"