This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new f97fa9a WIP.
f97fa9a is described below
commit f97fa9a4958e1259d3aeae82b1953c7a60f92f1a
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Dec 28 11:25:25 2021 -0800
WIP.
---
.../scala/org/apache/nlpcraft/NCModelClient.java | 4 +-
.../scala/org/apache/nlpcraft/NCModelConfig.java | 2 +-
.../org/apache/nlpcraft/NCModelConfigAdapter.java | 86 ++++++++++++++++------
.../main/scala/org/apache/nlpcraft/NCRequest.java | 10 +--
.../main/scala/org/apache/nlpcraft/NCToken.java | 8 +-
.../nlp/token/enricher/impl/NCEnBracketsImpl.scala | 6 +-
.../enricher/impl/NCEnLanguageWordsImpl.scala | 2 +-
.../nlp/token/enricher/impl/NCEnQuotesImpl.scala | 2 +-
.../parser/opennlp/impl/NCEnOpenNlpImpl.scala | 8 +-
.../parser/opennlp/impl/NCEnStopWordsFinder.scala | 8 +-
.../opennlp/NCEnOpenNlpTokenParserSpec.scala | 4 +-
.../nlpcraft/internal/nlp/util/NCTestRequest.scala | 3 +-
.../nlpcraft/internal/nlp/util/NCTestToken.scala | 3 +-
.../nlpcraft/internal/nlp/util/NCTestUtils.scala | 13 ++--
14 files changed, 89 insertions(+), 70 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
index 0e4e4fb..7d9b5dc 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java
@@ -57,7 +57,7 @@ public class NCModelClient implements NCLifecycle {
public void start(NCModelConfig cfg) {
verify();
- cfg.getTokenParser().start(cfg);
+ start(cfg.getTokenParsers(), cfg);
start(cfg.getEntityParsers(), cfg);
start(cfg.getEntityEnrichers(), cfg);
start(cfg.getTokenEnrichers(), cfg);
@@ -70,7 +70,7 @@ public class NCModelClient implements NCLifecycle {
stop(cfg.getTokenEnrichers());
stop(cfg.getEntityEnrichers());
stop(cfg.getEntityParsers());
- cfg.getTokenParser().stop();
+ stop(cfg.getTokenParsers());
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index 7e5f054..77116de 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -52,7 +52,7 @@ public interface NCModelConfig extends NCPropertyMap {
*
* @return
*/
- NCTokenParser getTokenParser();
+ List<NCTokenParser> getTokenParsers();
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
index f5439d6..a706a14 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
@@ -27,17 +27,69 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
private final String id;
private final String name;
private final String version;
- private final NCTokenParser tokParser;
- private List<NCTokenEnricher> tokenEnrichers;
- private List<NCEntityEnricher> entityEnrichers;
- private List<NCEntityParser> entityParsers;
+ private List<NCTokenParser> tokParsers = new ArrayList<>();
+ private List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
+ private List<NCEntityEnricher> entEnrichers = new ArrayList<>();
+ private List<NCEntityParser> entParsers = new ArrayList<>();
+
+ /**
+ *
+ * @param id
+ * @param name
+ * @param version
+ * @param tokParser
+ */
+ public NCModelConfigAdapter(String id, String name, String version,
NCTokenParser tokParser, NCEntityParser entParser) {
+ Objects.requireNonNull(tokParser, "Token parser cannot be null.");
+ Objects.requireNonNull(entParser, "Entity parser cannot be null.");
- public NCModelConfigAdapter(String id, String name, String version,
NCTokenParser tokParser) {
this.id = id;
this.name = name;
this.version = version;
- this.tokParser = tokParser;
+
+ tokParsers.add(tokParser);
+ entParsers.add(entParser);
+ }
+
+ /**
+ *
+ * @param tokParser
+ */
+ public void addTokenParser(NCTokenParser tokParser) {
+ Objects.requireNonNull(tokParser, "Token parser cannot be null.");
+
+ tokParsers.add(tokParser);
+ }
+
+ /**
+ *
+ * @param entParser
+ */
+ public void addEntityParser(NCEntityParser entParser) {
+ Objects.requireNonNull(entParser, "Entity parser cannot be null.");
+
+ entParsers.add(entParser);
+ }
+
+ /**
+ *
+ * @param tokEnricher
+ */
+ public void addTokenEnricher(NCTokenEnricher tokEnricher) {
+ Objects.requireNonNull(tokEnricher, "Token enricher cannot be null.");
+
+ tokEnrichers.add(tokEnricher);
+ }
+
+ /**
+ *
+ * @param entEnricher
+ */
+ public void addEntityEnricher(NCEntityEnricher entEnricher) {
+ Objects.requireNonNull(entEnricher, "Entity enricher cannot be null.");
+
+ entEnrichers.add(entEnricher);
}
@Override
@@ -57,33 +109,21 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
@Override
public List<NCTokenEnricher> getTokenEnrichers() {
- return tokenEnrichers;
+ return tokEnrichers;
}
@Override
public List<NCEntityEnricher> getEntityEnrichers() {
- return entityEnrichers;
+ return entEnrichers;
}
@Override
- public NCTokenParser getTokenParser() {
- return tokParser;
+ public List<NCTokenParser> getTokenParsers() {
+ return tokParsers;
}
@Override
public List<NCEntityParser> getEntityParsers() {
- return entityParsers;
- }
-
- public void setTokenEnrichers(List<NCTokenEnricher> tokenEnrichers) {
- this.tokenEnrichers = tokenEnrichers;
- }
-
- public void setEntityEnrichers(List<NCEntityEnricher> entityEnrichers) {
- this.entityEnrichers = entityEnrichers;
- }
-
- public void setEntityParsers(List<NCEntityParser> entityParsers) {
- this.entityParsers = entityParsers;
+ return entParsers;
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
index 8875c23..2b181ae 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCRequest.java
@@ -18,7 +18,6 @@
package org.apache.nlpcraft;
import java.util.Map;
-import java.util.Optional;
/**
* Information about the user request.
@@ -47,17 +46,10 @@ public interface NCRequest {
String getRequestId();
/**
- * Gets normalized text of the user input.
- *
- * @return Normalized text of the user input.
- */
- String getNormalizedText();
-
- /**
*
* @return
*/
- String getOriginalText();
+ String getText();
/**
* Gets UTC/GMT timestamp in millis when user input was received.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
index dfef5a0..8e0a9ee 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java
@@ -25,13 +25,7 @@ public interface NCToken extends NCPropertyMap {
*
* @return
*/
- String getOriginalText();
-
- /**
- *
- * @return
- */
- String getNormalizedText();
+ String getText();
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnBracketsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnBracketsImpl.scala
index cf485b0..acb3a02 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnBracketsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnBracketsImpl.scala
@@ -37,8 +37,8 @@ class NCEnBracketsImpl extends NCTokenEnricher with
LazyLogging:
def mark(t: NCToken): Unit = map += t -> !stack.isEmpty
for (t <- toks.asScala if ok)
- t.getOriginalText match
- case "(" | "{" | "[" | "<" => mark(t);
stack.push(t.getOriginalText)
+ t.getText match
+ case "(" | "{" | "[" | "<" => mark(t); stack.push(t.getText)
case ")" => check("("); mark(t)
case "}" => check("{"); mark(t)
case "]" => check("["); mark(t)
@@ -47,4 +47,4 @@ class NCEnBracketsImpl extends NCTokenEnricher with
LazyLogging:
if ok && stack.isEmpty then map.foreach { (tok, b) =>
tok.put("brackets:en", b) }
else
- logger.trace(s"Invalid brackets: ${req.getOriginalText}")
\ No newline at end of file
+ logger.trace(s"Invalid brackets: ${req.getText}")
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
index 7d0be8e..7cdcf23 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnLanguageWordsImpl.scala
@@ -26,4 +26,4 @@ import java.io.*
*/
class NCEnLanguageWordsImpl extends NCTokenEnricher:
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
java.util.List[NCToken]): Unit =
- toks.forEach(t => t.put("lang:en",
t.getOriginalText.matches("""[\s\w\p{Punct}]+""")))
\ No newline at end of file
+ toks.forEach(t => t.put("lang:en",
t.getText.matches("""[\s\w\p{Punct}]+""")))
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
index e37dce4..17c0048 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnQuotesImpl.scala
@@ -52,4 +52,4 @@ class NCEnQuotesImpl extends NCTokenEnricher with LazyLogging:
tok.put("quoted:en", pairs.exists { case (from, to) => from >
idx && to < idx })
}
else
- logger.warn(s"Invalid quotes: ${req.getOriginalText}")
\ No newline at end of file
+ logger.warn(s"Invalid quotes: ${req.getText}")
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
index 428f08a..097981f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala
@@ -123,7 +123,7 @@ class NCEnOpenNlpImpl(
override def parse(req: NCRequest, cfg: NCModelConfig): JList[NCToken] =
// OpenNLP classes are not thread-safe.
this.synchronized {
- val sen = req.getNormalizedText
+ val sen = req.getText
case class TokenHolder(origin: String, normalized: String, start:
Int, end: Int, length: Int)
@@ -158,8 +158,7 @@ class NCEnOpenNlpImpl(
val res: Seq[NCToken] =
holders.zip(posTags).zip(lemmas).toIndexedSeq.map { case ((h, pos), lemma) =>
new NCPropertyMapAdapter with NCToken:
- override def getOriginalText: String = h.origin
- override def getNormalizedText: String = h.normalized
+ override def getText: String = h.origin
override def getLemma: String = lemma
override def getStem: String = stemmer.stem(h.normalized)
override def getPos: String = pos
@@ -174,8 +173,7 @@ class NCEnOpenNlpImpl(
res.map(tok =>
if stops.contains(tok) then
new NCPropertyMapAdapter with NCToken:
- override def getOriginalText: String =
tok.getOriginalText
- override def getNormalizedText: String =
tok.getNormalizedText
+ override def getText: String = tok.getText
override def getLemma: String = tok.getLemma
override def getStem: String = tok.getStem
override def getPos: String = tok.getPos
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index 6ba39f4..9522168 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -179,8 +179,8 @@ private[impl] object NCEnStopWordsFinder:
private def isQuote(t: NCToken): Boolean = Q_POS.contains(t.getPos)
private def toStemKey(toks: Seq[NCToken]): String =
toks.map(_.getStem).mkString(" ")
private def toLemmaKey(toks: Seq[NCToken]): String =
toks.map(_.getLemma).mkString(" ")
- private def toValueKey(toks: Seq[NCToken]): String =
toks.map(_.getOriginalText.toLowerCase).mkString(" ")
- private def toOriginalKey(toks: Seq[NCToken]): String =
toks.map(_.getOriginalText).mkString(" ")
+ private def toValueKey(toks: Seq[NCToken]): String =
toks.map(_.getText.toLowerCase).mkString(" ")
+ private def toOriginalKey(toks: Seq[NCToken]): String =
toks.map(_.getText).mkString(" ")
/**
*
@@ -562,8 +562,8 @@ private[impl] class NCEnStopWordsFinder(addStems:
Set[String], exclStems: Set[St
def mark(t: NCToken): Unit = if (!stack.isEmpty) set += t
for (t <- toks if ok)
- t.getOriginalText match
- case "(" | "{" | "[" | "<" => mark(t);
stack.push(t.getOriginalText)
+ t.getText match
+ case "(" | "{" | "[" | "<" => mark(t); stack.push(t.getText)
case ")" => check("("); mark(t)
case "}" => check("{"); mark(t)
case "]" => check("["); mark(t)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
index bb52615..fe4b693 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
@@ -89,10 +89,10 @@ class NCEnOpenNlpTokenParserSpec:
test(
// Invalid brackets.
"A ( A A A",
- toks => toks.filter(_.getNormalizedText !=
"(").forall(_.isStopWord)
+ toks => toks.filter(_.getText != "(").forall(_.isStopWord)
)
test(
// Nested brackets.
"< < [ A ] > >",
- toks => require(!toks.find(_.getNormalizedText ==
"a").get.isStopWord)
+ toks => require(!toks.find(_.getText == "a").get.isStopWord)
)
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestRequest.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestRequest.scala
index 190bb77..8665bf3 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestRequest.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestRequest.scala
@@ -41,8 +41,7 @@ case class NCTestRequest(
) extends NCRequest:
override def getUserId: String = userId
override def getRequestId: String = reqId
- override def getNormalizedText: String = txt.toLowerCase
- override def getOriginalText: String = txt
+ override def getText: String = txt
override def getReceiveTimestamp: Long = ts
override def getUserAgent: String = userAgent
override def getRequestData: util.Map[String, AnyRef] = data
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
index 076ffc7..86d3860 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestToken.scala
@@ -39,8 +39,7 @@ case class NCTestToken(
start: Int = -1,
end: Int = -1
) extends NCPropertyMapAdapter with NCToken:
- override def getOriginalText: String = txt
- override def getNormalizedText: String = txt.toLowerCase
+ override def getText: String = txt
override def getLemma: String = lemma
override def getStem: String = stem
override def getPos: String = pos
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala
index f5d624f..06accfd 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala
@@ -26,7 +26,6 @@ import org.apache.nlpcraft.*
object NCTestUtils:
/**
*
- * @param req
* @param toks
* @param props
*/
@@ -34,14 +33,13 @@ object NCTestUtils:
val tbl = new NCAsciiTable()
if props.isEmpty
- then tbl #= ("Text", "Normalized", "POS", "Stem", "Lemma",
"Start", "End", "Length", "Stopword")
- else tbl #= ("Text", "Normalized", "POS", "Stem", "Lemma",
"Start", "End", "Length", "Stopword", "Properties")
+ then tbl #= ("Text", "POS", "Stem", "Lemma", "Start", "End",
"Length", "Stopword")
+ else tbl #= ("Text", "POS", "Stem", "Lemma", "Start", "End",
"Length", "Stopword", "Properties")
toks.foreach(t =>
if props.isEmpty then
tbl += (
- t.getOriginalText,
- t.getNormalizedText,
+ t.getText,
t.getPos,
t.getStem,
t.getLemma,
@@ -52,8 +50,7 @@ object NCTestUtils:
)
else
tbl += (
- t.getOriginalText,
- t.getNormalizedText,
+ t.getText,
t.getPos,
t.getStem,
t.getLemma,
@@ -65,7 +62,7 @@ object NCTestUtils:
)
)
- println(s"Request: ${toks.map(_.getOriginalText).mkString(" ")}")
+ println(s"Request: ${toks.map(_.getText).mkString(" ")}")
println(tbl.toString)
/**