This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 3bc7c6b CR WIP
3bc7c6b is described below
commit 3bc7c6b2629e2ff7d41d02b7fe0bd304a278b9a7
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Jan 13 14:40:34 2022 -0800
CR WIP
---
.../parser/stanford/impl/NCStanfordNlpImpl.scala | 4 +--
.../nlpcraft/internal/makro/NCMacroCompiler.scala | 7 +++--
.../nlpcraft/internal/makro/NCMacroParser.scala | 10 +++----
.../apache/nlpcraft/internal/util/NCUtils.scala | 20 ++++++-------
.../semantic/impl/NCSemanticSourceReader.scala | 2 +-
.../impl/NCSemanticSynonymsProcessor.scala | 35 ++++++++--------------
.../token/enricher/en/impl/NCStopWordsImpl.scala | 12 ++------
7 files changed, 37 insertions(+), 53 deletions(-)
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
index 7e1be2d..fd29513 100644
---
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
@@ -41,9 +41,7 @@ class NCStanfordNlpImpl(stanford: StanfordCoreNLP) extends
NCTokenParser:
val doc = new CoreDocument(text)
stanford.annotate(doc)
val ann = doc.annotation().get(classOf[SentencesAnnotation])
-
- if ann == null then
- throw new NCException("Sentence annotation not found.") // TODO:
error text.
+ if ann == null then E("Sentence annotation not found.") // TODO: error
text.
val toks =
ann.asScala.flatMap(_.asInstanceOf[ArrayCoreMap].get(classOf[TokensAnnotation]).asScala).
zipWithIndex.map { (t, idx) =>
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
index c9ae012..f6e7ef5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
@@ -17,15 +17,16 @@
package org.apache.nlpcraft.internal.makro
+import org.apache.nlpcraft.*
import com.typesafe.scalalogging.LazyLogging
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.antlr.v4.runtime.*
import org.apache.nlpcraft.internal.*
import org.apache.nlpcraft.internal.ansi.NCAnsi.*
-import org.apache.nlpcraft.internal.antlr4.{NCCompilerUtils, *}
+import org.apache.nlpcraft.internal.antlr4.*
import NCMacroCompiler.FiniteStateMachine
import org.apache.nlpcraft.internal.makro.antlr4.*
-import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.internal.util.*
import org.apache.nlpcraft.NCException
import org.apache.nlpcraft.internal.makro.antlr4.NCMacroDslLexer
@@ -195,7 +196,7 @@ object NCMacroCompiler extends LazyLogging:
line: Int, // 1, 2, ...
charPos: Int, // 1, 2, ...
msg: String,
- e: RecognitionException): Unit = throw new
NCException(mkCompilerError(msg, line, charPos - 1, in))
+ e: RecognitionException): Unit = E(mkCompilerError(msg, line,
charPos - 1, in))
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
index cf6c3e2..5ef0568 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
@@ -126,7 +126,7 @@ class NCMacroParser:
// Expand macros including nested ones.
while (m.isDefined)
val ms = m.get.toString()
- if !macros.keySet.contains(ms) then throw new
NCException(s"Unknown macro [macro=$ms, txt=$txt]")
+ if !macros.keySet.contains(ms) then E(s"Unknown macro [macro=$ms,
txt=$txt]")
// Expand all registered macros.
for ((k, v) <- macros) s = s.replace(k, v)
// Grab another macro match, if any.
@@ -134,7 +134,7 @@ class NCMacroParser:
// Check for potentially invalid macros syntax.
if BROKEN_MACRO_REGEX1.findFirstIn(s).isDefined ||
BROKEN_MACRO_REGEX2.findFirstIn(s).isDefined then
- throw new NCException(s"Suspicious or invalid macro in: $txt")
+ E(s"Suspicious or invalid macro in: $txt")
NCUtils.distinct(NCMacroCompiler.compile(s).toList map trimDupSpaces
map processEscapes)
@@ -152,8 +152,8 @@ class NCMacroParser:
* @param name Macro name.
*/
private def checkName(name: String): Unit =
- if name.head != '<' then throw new NCException(s"Missing macro '<'
opening: $name")
- if name.last != '>' then throw new NCException(s"Missing macro '>'
closing: $name")
+ if name.head != '<' then E(s"Missing macro '<' opening: $name")
+ if name.last != '>' then E(s"Missing macro '>' closing: $name")
/**
* Adds or overrides given macro.
@@ -168,7 +168,7 @@ class NCMacroParser:
checkName(name)
// Check for recursion.
- if str.contains(name) then throw new NCException(s"Recursion is not
supported, macro: $name")
+ if str.contains(name) then E(s"Recursion is not supported, macro:
$name")
macros += name -> str
this
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 653de1e..c3867ca 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -560,9 +560,9 @@ object NCUtils extends LazyLogging:
else if isResource(src) then
getClass.getClassLoader.getResourceAsStream(src) match
case in if in != null => in
- case _ => throw new NCException(s"Resource not found: $src")
+ case _ => E(s"Resource not found: $src")
else if isUrl(src) then new URL(src).openStream()
- else throw new NCException(s"Source not found or unsupported: $src")
+ else E(s"Source not found or unsupported: $src")
/**
* Makes thread.
@@ -779,7 +779,7 @@ object NCUtils extends LazyLogging:
getAndLog(src.getLines().map(p => p).toList, f, log)
}
catch
- case e: IOException => throw new NCException(s"Failed to read GZIP
file: ${f.getAbsolutePath}", e)
+ case e: IOException => E(s"Failed to read GZIP file:
${f.getAbsolutePath}", e)
/**
* Reads bytes from given file.
@@ -793,7 +793,7 @@ object NCUtils extends LazyLogging:
Using.resource(new FileInputStream(f))(_.read(arr))
getAndLog(arr, f, log)
catch
- case e: IOException => throw new NCException(s"Error reading file:
$f", e)
+ case e: IOException => E(s"Error reading file: $f", e)
/**
@@ -820,9 +820,9 @@ object NCUtils extends LazyLogging:
stream.flush()
}
catch
- case e: IOException => throw new NCException(s"Error gzip file:
$f", e)
+ case e: IOException => E(s"Error gzip file: $f", e)
- if !f.delete() then throw new NCException(s"Error while deleting file:
$f")
+ if !f.delete() then E(s"Error while deleting file: $f")
logger.trace(s"File gzipped [source=$f, destination=$gz]")
@@ -847,7 +847,7 @@ object NCUtils extends LazyLogging:
def readResource(res: String, enc: String = "UTF-8", log: Logger =
logger): List[String] =
val list =
try Using.resource(Source.fromInputStream(getStream(res),
enc))(_.getLines().toSeq).toList
- catch case e: IOException => throw new NCException(s"Failed to
read stream: $res", e)
+ catch case e: IOException => E(s"Failed to read stream: $res", e)
log.trace(s"Loaded resource: $res")
@@ -872,7 +872,7 @@ object NCUtils extends LazyLogging:
def readTextGzipResource(res: String, enc: String, log: Logger = logger):
List[String] =
val list =
try Using.resource(Source.fromInputStream(new
GZIPInputStream(getStream(res)), enc))(readLcTrimFilter)
- catch case e: IOException => throw new NCException(s"Failed to
read stream: $res", e)
+ catch case e: IOException => E(s"Failed to read stream: $res", e)
log.trace(s"Loaded resource: $res")
@@ -891,7 +891,7 @@ object NCUtils extends LazyLogging:
readLcTrimFilter(src)
}
catch
- case e: IOException => throw new NCException(s"Failed to read
stream.", e)
+ case e: IOException => E(s"Failed to read stream.", e)
/**
*
@@ -911,7 +911,7 @@ object NCUtils extends LazyLogging:
if !errs.isEmpty then
errs.forEach(e => logger.error("Error during service starting.",
e)) // TODO: error message.
- throw new NCException("Some service cannot be started.") // TODO:
error message.
+ E("Some service cannot be started.") // TODO: error message.
/**
* Shuts down executor service and waits for its finish.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
index 90d16c7..8a0a003 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
@@ -38,7 +38,7 @@ private[impl] object NCSemanticSourceType:
if lc.endsWith(".json") || lc.endsWith(".js") then JSON
else if lc.endsWith(".yaml") || lc.endsWith(".yml") then YAML
- else throw new NCException("Unexpected data type. Expected `yaml` or
`json` formats.") // TODO: error text.
+ else E("Unexpected data type. Expected `yaml` or `json` formats.") //
TODO: error text.
/**
*
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 43c0307..2b57f71 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -75,8 +75,8 @@ private[impl] object NCSemanticSynonymsProcessor extends
LazyLogging:
require(elements != null)
if macros != null then
- if hasNullOrEmpty(macros.keySet) then throw new NCException("Some
macro names are null or empty.") // TODO: error text.
- if hasNullOrEmpty(macros.values) then throw new NCException("Some
macro bodies are null or empty.") // TODO: error text.
+ if hasNullOrEmpty(macros.keySet) then E("Some macro names are null
or empty.") // TODO: error text.
+ if hasNullOrEmpty(macros.values) then E("Some macro bodies are
null or empty.") // TODO: error text.
val set = elements.filter(_.getSynonyms !=
null).flatMap(_.getSynonyms.asScala) ++ macros.values
@@ -107,7 +107,7 @@ private[impl] object NCSemanticSynonymsProcessor extends
LazyLogging:
s"[id=$elemId$valuePart]"
if syns != null then
- if hasNullOrEmpty(syns.asScala) then throw new NCException(s"Some
synonyms are null or empty $mkDesc") // TODO: error text.
+ if hasNullOrEmpty(syns.asScala) then E(s"Some synonyms are null or
empty $mkDesc") // TODO: error text.
val susp = syns.asScala.filter(syn => !syn.contains("//") &&
SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
@@ -121,33 +121,29 @@ private[impl] object NCSemanticSynonymsProcessor extends
LazyLogging:
* @param elems
*/
private def checkElements(elems: Seq[NCSemanticElement]): Unit =
- if elems == null || elems.isEmpty then throw new NCException("Elements
cannot be null or empty.") // TODO: error text.
- if elems.contains(null) then throw new NCException("Some elements are
null.") // TODO: error text.
+ if elems == null || elems.isEmpty then E("Elements cannot be null or
empty.") // TODO: error text.
+ if elems.contains(null) then E("Some elements are null.") // TODO:
error text.
// Duplicates.
val ids = mutable.HashSet.empty[String]
for (id <- elems.map(_.getId))
- if ids.contains(id) then throw new NCException(s"Duplicate element
ID [element=$id]") // TODO: error text.
+ if ids.contains(id) then E(s"Duplicate element ID [element=$id]")
// TODO: error text.
else ids += id
for (e <- elems)
val elemId = e.getId
- if elemId == null || elemId.isEmpty then
- throw new NCException(s"Some element IDs are not provided or
empty.") // TODO: error text.
- else if !elemId.matches(ID_REGEX) then
- throw new NCException(s"Element ID does not match regex
[element=$elemId, regex=$ID_REGEX]") // TODO: error text.
- else if elemId.exists(_.isWhitespace) then
- throw new NCException(s"Element ID cannot have whitespaces
[element=$elemId]") // TODO: error text.
+ if elemId == null || elemId.isEmpty then E(s"Some element IDs are
not provided or empty.") // TODO: error text.
+ else if !elemId.matches(ID_REGEX) then E(s"Element ID does not
match regex [element=$elemId, regex=$ID_REGEX]") // TODO: error text.
+ else if elemId.exists(_.isWhitespace) then E(s"Element ID cannot
have whitespaces [element=$elemId]") // TODO: error text.
checkSynonyms(e.getSynonyms, elemId)
val vals = e.getValues
if vals != null then
- if hasNullOrEmpty(vals.keySet().asScala) then
- throw new NCException(s"Some values names are null or
empty [element=$elemId]") // TODO: error text.
+ if hasNullOrEmpty(vals.keySet().asScala) then E(s"Some values
names are null or empty [element=$elemId]") // TODO: error text.
for ((name, syns) <- vals.asScala)
checkSynonyms(syns, elemId, Some(name))
@@ -175,14 +171,9 @@ private[impl] object NCSemanticSynonymsProcessor extends
LazyLogging:
val ptrn = stripSuffix(REGEX_FIX, text)
if ptrn.nonEmpty then
- try
- NCSemanticSynonymChunk(REGEX, text, regex =
Pattern.compile(ptrn))
- catch
- case e: PatternSyntaxException =>
- // TODO: error text.
- throw new NCException(s"Invalid regex synonym
syntax detected [element=$elemId, chunk=$text]", e)
- else
- throw new NCException(s"Empty regex synonym detected
[element=$elemId]") // TODO: error text.
+ try NCSemanticSynonymChunk(REGEX, text, regex =
Pattern.compile(ptrn))
+ catch case e: PatternSyntaxException => E(s"Invalid regex
synonym syntax detected [element=$elemId, chunk=$text]", e)
+ else E(s"Empty regex synonym detected [element=$elemId]") //
TODO: error text.
val regexes = mutable.HashMap.empty[Int, RegexHolder]
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
index c3ebba9..e7eca55 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
@@ -286,18 +286,13 @@ class NCStopWordsImpl(addStopsSet: JSet[String],
exclStopsSet: JSet[String]) ext
exclStems = if exclStopsSet == null then Set.empty else
exclStopsSet.asScala.toSet.map(stem)
def check(name: String, set: Set[String]): Unit =
- if set.exists(_.exists(_.isWhitespace)) then
- throw new NCException(s"$name contain a strings with
whitespaces.") // TODO: error texts.
+ if set.exists(_.exists(_.isWhitespace)) then E(s"$name contain a
strings with whitespaces.") // TODO: error texts.
check("Additional synonyms", addStems) // TODO: error texts.
check("Excluded synonyms", exclStems) // TODO: error texts.
val dups = addStems.intersect(exclStems)
-
- if dups.nonEmpty then
- throw new NCException(
- s"Duplicate stems detected between additional and excluded
stopwords [dups=${dups.mkString(",")}]"
- )
+ if dups.nonEmpty then E(s"Duplicate stems detected between additional
and excluded stopwords [dups=${dups.mkString(",")}]")
percents = PERCENTS.map(stem)
@@ -370,8 +365,7 @@ class NCStopWordsImpl(addStopsSet: JSet[String],
exclStopsSet: JSet[String]) ext
// 2. Accumulates data of each parsed line.
for (line <- lines)
- def throwError(msg: String): Unit =
- throw new NCException(s"Invalid stop word configuration
[line=$line, reason=$msg]") // TODO: error texts.
+ def throwError(msg: String): Unit = E(s"Invalid stop word
configuration [line=$line, reason=$msg]") // TODO: error texts.
var s = line.trim