This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 63321a3 WIP.
63321a3 is described below
commit 63321a35693ed982036e31f78f5708c2fddcdfa0
Author: Aaron Radzinski <[email protected]>
AuthorDate: Mon Dec 27 12:44:05 2021 -0800
WIP.
---
.../src/main/scala/org/apache/nlpcraft/NCEntity.java | 2 +-
.../main/scala/org/apache/nlpcraft/NCModelConfig.java | 2 +-
.../scala/org/apache/nlpcraft/NCModelConfigAdapter.java | 2 +-
.../{NCParameterized.java => NCPropertyMap.java} | 2 +-
...ameterizedAdapter.java => NCPropertyMapAdapter.java} | 2 +-
.../src/main/scala/org/apache/nlpcraft/NCToken.java | 2 +-
.../nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala | 1 +
.../token/parser/opennlp/NCEnOpenNlpTokenParser.java | 15 +++++----------
.../nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala | 6 +++---
.../token/parser/opennlp/impl/NCEnStopWordsFinder.scala | 2 +-
.../org/apache/nlpcraft/internal/util/NCUtils.scala | 17 +++++------------
.../apache/nlpcraft/internal/version/NCVersion.scala | 3 +++
.../apache/nlpcraft/internal/nlp/util/NCTestToken.scala | 2 +-
13 files changed, 25 insertions(+), 33 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
index 6f2899e..f8e0c8e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java
@@ -22,7 +22,7 @@ import java.util.List;
/**
*
*/
-public interface NCEntity extends NCParameterized {
+public interface NCEntity extends NCPropertyMap {
/**
*
* @return
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index d944dfc..7e5f054 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -22,7 +22,7 @@ import java.util.*;
/**
*
*/
-public interface NCModelConfig extends NCParameterized {
+public interface NCModelConfig extends NCPropertyMap {
/**
* Default value for {@link #getMinTokens()} method.
*/
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
index 8ca392c..f5439d6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
@@ -23,7 +23,7 @@ import java.util.*;
*
*/
// TODO: validation for constructor and all setters.
-public class NCModelConfigAdapter extends NCParameterizedAdapter implements
NCModelConfig {
+public class NCModelConfigAdapter extends NCPropertyMapAdapter implements
NCModelConfig {
private final String id;
private final String name;
private final String version;
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMap.java
similarity index 97%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMap.java
index 3a939d8..13a8119 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterized.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMap.java
@@ -22,7 +22,7 @@ import java.util.Optional;
/**
*
*/
-public interface NCParameterized {
+public interface NCPropertyMap {
/**
*
* @param key
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterizedAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMapAdapter.java
similarity index 96%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterizedAdapter.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMapAdapter.java
index 686393e..a82689d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCParameterizedAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPropertyMapAdapter.java
@@ -22,7 +22,7 @@ import java.util.*;
/**
*
*/
-public class NCParameterizedAdapter implements NCParameterized {
+public class NCPropertyMapAdapter implements NCPropertyMap {
private final Map<String, Object> map = new HashMap<>();
@Override
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
index bcb0aa5..dfef5a0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
@@ -20,7 +20,7 @@ package org.apache.nlpcraft;
/**
*
*/
-public interface NCToken extends NCParameterized {
+public interface NCToken extends NCPropertyMap {
/**
*
* @return
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
index 4c983f3..7d0be8e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.internal.nlp.token.enricher.impl
import org.apache.nlpcraft.*
import java.io.*
+
/**
*
*/
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
index b8659e4..b74d171 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParser.java
@@ -17,16 +17,11 @@
package org.apache.nlpcraft.internal.nlp.token.parser.opennlp;
-import org.apache.nlpcraft.NCException;
-import org.apache.nlpcraft.NCRequest;
-import org.apache.nlpcraft.NCToken;
-import org.apache.nlpcraft.NCTokenParser;
+import org.apache.nlpcraft.*;
import
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCEnOpenNlpImpl;
-import java.io.File;
-import java.util.List;
-import java.util.Objects;
-import java.util.Set;
+import java.io.*;
+import java.util.*;
/*
* Models can be downloaded from the following resources:
@@ -59,7 +54,7 @@ public class NCEnOpenNlpTokenParser implements NCTokenParser {
* @throws NCException
*/
public NCEnOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
- Objects.requireNonNull(tokMdl, "Tonenizer model file cannot be null.");
+ Objects.requireNonNull(tokMdl, "Tokenizer model file cannot be null.");
Objects.requireNonNull(posMdl, "POS model file cannot be null.");
Objects.requireNonNull(lemmaDic, "Lemmatizer model file cannot be
null.");
@@ -79,7 +74,7 @@ public class NCEnOpenNlpTokenParser implements NCTokenParser {
* @throws NCException
*/
public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
- Objects.requireNonNull(tokMdlSrc, "Tonenizer model path cannot be
null.");
+ Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be
null.");
Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
index 38db56a..ad508a4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
@@ -76,7 +76,7 @@ class NCEnOpenNlpImpl(
private var exclStopWords: JSet[String] = _
override def start(): Unit =
- NCUtils.executeParallel(
+ NCUtils.execPar(
() => tokenizer = new TokenizerME(new TokenizerModel(tokMdlIn)),
() => tagger = new POSTaggerME(new POSModel(posMdlIn)),
() => lemmatizer = new DictionaryLemmatizer(lemmaDicIn),
@@ -162,7 +162,7 @@ class NCEnOpenNlpImpl(
}
val res: Seq[NCToken] =
holders.zip(posTags).zip(lemmas).toIndexedSeq.map { case ((h, pos), lemma) =>
- new NCParameterizedAdapter with NCToken:
+ new NCPropertyMapAdapter with NCToken:
override def getOriginalText: String = h.origin
override def getNormalizedText: String = h.normalized
override def getLemma: String = lemma
@@ -178,7 +178,7 @@ class NCEnOpenNlpImpl(
res.map(tok =>
if stops.contains(tok) then
- new NCParameterizedAdapter with NCToken:
+ new NCPropertyMapAdapter with NCToken:
override def getOriginalText: String =
tok.getOriginalText
override def getNormalizedText: String =
tok.getNormalizedText
override def getLemma: String = tok.getLemma
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index be2f1d6..6ba39f4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -208,7 +208,7 @@ private[impl] class NCEnStopWordsFinder(addStems:
Set[String], exclStems: Set[St
@volatile private var nounWords: Set[String] = _
// Stemmatization is done already by generator.
- NCUtils.executeParallel(
+ NCUtils.execPar(
() => firstWords = read("stopwords/first_words.txt.gz"),
() => nounWords = read("stopwords/noun_words.txt.gz")
)(ExecutionContext.Implicits.global)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index e79caa1..5b9bd32 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -812,9 +812,7 @@ object NCUtils extends LazyLogging:
try
val arr = new Array[Byte](f.length().toInt)
- Using.resource(new FileInputStream(f)) { in =>
- in.read(arr)
- }
+ Using.resource(new FileInputStream(f))(_.read(arr))
getAndLog(arr, f, log)
catch
@@ -870,11 +868,9 @@ object NCUtils extends LazyLogging:
* @param log Logger to use.
*/
def readResource(res: String, enc: String = "UTF-8", log: Logger =
logger): List[String] =
- val list =
+ val list =
try
- Using.resource(Source.fromInputStream(getStream(res), enc)) {
src =>
- src.getLines().toList
- }
+ Using.resource(Source.fromInputStream(getStream(res),
enc))(_.getLines()).toList
catch
case e: IOException => throw new NCException(s"Failed to read
stream.", e)
@@ -882,7 +878,6 @@ object NCUtils extends LazyLogging:
list
-
/**
*
* @param in
@@ -902,9 +897,7 @@ object NCUtils extends LazyLogging:
def readTextGzipResource(res: String, enc: String, log: Logger = logger):
List[String] =
val list =
try
- Using.resource(Source.fromInputStream(new
GZIPInputStream(getStream(res)), enc)) { src =>
- readLcTrimFilter(src)
- }
+ Using.resource(Source.fromInputStream(new
GZIPInputStream(getStream(res)), enc))(readLcTrimFilter)
catch
case e: IOException => throw new NCException(s"Failed to read
stream.", e)
@@ -932,7 +925,7 @@ object NCUtils extends LazyLogging:
* @param bodies
* @param ec
*/
- def executeParallel(bodies: (() => Any)*)(ec: ExecutionContext): Unit =
+ def execPar(bodies: (() => Any)*)(ec: ExecutionContext): Unit =
bodies.map(body => Future { body() } (ec)).foreach(Await.result(_,
Duration.Inf))
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/version/NCVersion.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/version/NCVersion.scala
index 8da34c9..adee72a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/version/NCVersion.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/version/NCVersion.scala
@@ -54,6 +54,9 @@ object NCVersion extends LazyLogging:
Version("0.7.5", LocalDate.of(2021, 4, 30)),
Version("0.8.0", LocalDate.of(2021, 5, 30)),
Version("0.9.0", LocalDate.of(2021, 7, 10)),
+
+ // Version '1.0.0+' is incompatible with previous versions.
+ Version("1.0.0", LocalDate.of(2022, 3, 1)),
).sortBy(_.version)
// +=================================================+
// | UPDATE THIS SEQUENCE FOR EACH RELEASE MANUALLY. |
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
index 1f4b6ac..076ffc7 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
@@ -38,7 +38,7 @@ case class NCTestToken(
isStop: Boolean = false,
start: Int = -1,
end: Int = -1
-) extends NCParameterizedAdapter with NCToken:
+) extends NCPropertyMapAdapter with NCToken:
override def getOriginalText: String = txt
override def getNormalizedText: String = txt.toLowerCase
override def getLemma: String = lemma