This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 05c8cc57 WIP
05c8cc57 is described below
commit 05c8cc5719ec44031d3d404cb475c80e56471b3d
Author: Aaron Radzinski <[email protected]>
AuthorDate: Wed Dec 7 13:20:25 2022 -0800
WIP
---
.../nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala | 1 +
.../nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala | 1 +
.../nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala | 1 -
.../nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala | 2 +-
.../org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala | 1 -
.../apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala | 10 +++++-----
.../org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala | 1 -
7 files changed, 8 insertions(+), 9 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
index aff54c0d..9d6eceba 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
@@ -32,6 +32,7 @@ import scala.collection.mutable
*
* Note that invalid enclosed brackets are ignored.
*/
+//noinspection DuplicatedCode
class NCEnBracketsTokenEnricher extends NCTokenEnricher with LazyLogging:
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): Unit =
val stack = new java.util.Stack[String]()
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
index a3ecf4b2..cc53bddf 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
@@ -33,6 +33,7 @@ class NCEnQuotesTokenEnricher extends NCTokenEnricher with
LazyLogging:
private def getPos(t: NCToken): String = t.get("pos").getOrElse(throw new
NCException("POS not found in token."))
private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
+ //noinspection DuplicatedCode
override def enrich(req: NCRequest, cfg: NCModelConfig, toks:
List[NCToken]): Unit =
val quotes = toks.filter(isQuote)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index b78ed002..0dca57bb 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -99,7 +99,6 @@ object NCEnStopWordsTokenEnricher:
private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw
new NCException(s"Lemma not found in token: ${t.keysSet}"))
private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
private def toLemmaKey(toks: Seq[NCToken]): String =
toks.map(getLemma).mkString(" ")
- private def toValueKey(toks: Seq[NCToken]): String =
toks.map(_.getText.toLowerCase).mkString(" ")
private def toOriginalKey(toks: Seq[NCToken]): String =
toks.map(_.getText).mkString(" ")
private def isStopWord(t: NCToken): Boolean =
t.get[Boolean]("stopword").getOrElse(false)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
index d11e2641..9127efaa 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
@@ -74,7 +74,7 @@ class NCOpenNLPLemmaPosTokenEnricher(posMdlSrc: String =
null, lemmaDicSrc: Stri
val poses = if tagger != null then tagger.tag(txts) else
txts.map(_ => "")
var lemmas = if lemmatizer != null then lemmatizer.lemmatize(txts,
poses) else txts
- require(toks.length == poses.length && toks.length ==
lemmas.length)
+ require(toks.sizeIs == poses.length && toks.sizeIs ==
lemmas.length)
// For some reasons lemmatizer (en-lemmatizer.dict) marks some
words with non-existent POS 'NNN'
// Valid POS list:
https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index 0cbb408c..a460c929 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -30,7 +30,6 @@ package org.apache.nlpcraft.nlp.parsers
ALl rights reserved.
*/
-// TODO: link on site?
/**
*
* Configuration which helps to detect [[org.apache.nlpcraft.NCEntity
NCEntity]] for
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index d1cc8193..7d0c5a1d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -38,10 +38,10 @@ object NCSemanticEntityParser:
/**
* Creates [[NCSemanticEntityParser]] instance.
*
- * @param stemmer [[NCSemanticStemmer]] implementation.
- * @param parser [[NCTokenParser]] implementation.
- * @param macros Macros map. Empty by default.
- * @param elements [[NCSemanticElement]] list.
+ * @param stemmer [[NCSemanticStemmer]] implementation.
+ * @param parser [[NCTokenParser]] implementation.
+ * @param macros Macros map. Empty by default.
+ * @param elements [[NCSemanticElement]] list.
*/
def apply(
stemmer: NCSemanticStemmer,
@@ -168,7 +168,7 @@ object NCSemanticEntityParser:
* @param tmp
*/
@tailrec private def combine(data1: Seq[String], data2: Seq[String], i:
Int = 0, tmp: Set[List[String]] = Set(List.empty)): Set[List[String]] =
- require(data1.size == data2.size)
+ require(data1.sizeIs == data2.size)
if data1.isEmpty then Set.empty
else if i >= data1.size then tmp
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
index c09d305b..5377e9a7 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
@@ -30,7 +30,6 @@ package org.apache.nlpcraft.nlp.parsers
ALl rights reserved.
*/
-// TODO: link on site?
/**
*
* Stemmer trait. Read more about stemming
[[https://en.wikipedia.org/wiki/Stemming here]].