[incubator-nlpcraft] branch NLPCRAFT-472 updated: CR WIP

aradzinski Thu, 13 Jan 2022 14:40:47 -0800

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new 3bc7c6b  CR WIP
3bc7c6b is described below

commit 3bc7c6b2629e2ff7d41d02b7fe0bd304a278b9a7
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Jan 13 14:40:34 2022 -0800

    CR WIP
---
 .../parser/stanford/impl/NCStanfordNlpImpl.scala   |  4 +--
 .../nlpcraft/internal/makro/NCMacroCompiler.scala  |  7 +++--
 .../nlpcraft/internal/makro/NCMacroParser.scala    | 10 +++----
 .../apache/nlpcraft/internal/util/NCUtils.scala    | 20 ++++++-------
 .../semantic/impl/NCSemanticSourceReader.scala     |  2 +-
 .../impl/NCSemanticSynonymsProcessor.scala         | 35 ++++++++--------------
 .../token/enricher/en/impl/NCStopWordsImpl.scala   | 12 ++------
 7 files changed, 37 insertions(+), 53 deletions(-)

diff --git 
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
 
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
index 7e1be2d..fd29513 100644
--- 
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
+++ 
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
@@ -41,9 +41,7 @@ class NCStanfordNlpImpl(stanford: StanfordCoreNLP) extends 
NCTokenParser:
         val doc = new CoreDocument(text)
         stanford.annotate(doc)
         val ann = doc.annotation().get(classOf[SentencesAnnotation])
-
-        if ann == null then
-            throw new NCException("Sentence annotation not found.") // TODO: 
error text.
+        if ann == null then E("Sentence annotation not found.") // TODO: error 
text.
 
         val toks = 
ann.asScala.flatMap(_.asInstanceOf[ArrayCoreMap].get(classOf[TokensAnnotation]).asScala).
             zipWithIndex.map { (t, idx) =>
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
index c9ae012..f6e7ef5 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroCompiler.scala
@@ -17,15 +17,16 @@
 
 package org.apache.nlpcraft.internal.makro
 
+import org.apache.nlpcraft.*
 import com.typesafe.scalalogging.LazyLogging
 import org.antlr.v4.runtime.tree.ParseTreeWalker
 import org.antlr.v4.runtime.*
 import org.apache.nlpcraft.internal.*
 import org.apache.nlpcraft.internal.ansi.NCAnsi.*
-import org.apache.nlpcraft.internal.antlr4.{NCCompilerUtils, *}
+import org.apache.nlpcraft.internal.antlr4.*
 import NCMacroCompiler.FiniteStateMachine
 import org.apache.nlpcraft.internal.makro.antlr4.*
-import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.internal.util.*
 import org.apache.nlpcraft.NCException
 import org.apache.nlpcraft.internal.makro.antlr4.NCMacroDslLexer
 
@@ -195,7 +196,7 @@ object NCMacroCompiler extends LazyLogging:
             line: Int, // 1, 2, ...
             charPos: Int, // 1, 2, ...
             msg: String,
-            e: RecognitionException): Unit = throw new 
NCException(mkCompilerError(msg, line, charPos - 1, in))
+            e: RecognitionException): Unit = E(mkCompilerError(msg, line, 
charPos - 1, in))
 
     /**
       *
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
index cf6c3e2..5ef0568 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/makro/NCMacroParser.scala
@@ -126,7 +126,7 @@ class NCMacroParser:
         // Expand macros including nested ones.
         while (m.isDefined)
             val ms = m.get.toString()
-            if !macros.keySet.contains(ms) then throw new 
NCException(s"Unknown macro [macro=$ms, txt=$txt]")
+            if !macros.keySet.contains(ms) then E(s"Unknown macro [macro=$ms, 
txt=$txt]")
             // Expand all registered macros.
             for ((k, v) <- macros) s = s.replace(k, v)
             // Grab another macro match, if any.
@@ -134,7 +134,7 @@ class NCMacroParser:
 
         // Check for potentially invalid macros syntax.
         if BROKEN_MACRO_REGEX1.findFirstIn(s).isDefined || 
BROKEN_MACRO_REGEX2.findFirstIn(s).isDefined then
-            throw new NCException(s"Suspicious or invalid macro in: $txt")
+            E(s"Suspicious or invalid macro in: $txt")
 
         NCUtils.distinct(NCMacroCompiler.compile(s).toList map trimDupSpaces 
map processEscapes)
 
@@ -152,8 +152,8 @@ class NCMacroParser:
       * @param name Macro name.
       */
     private def checkName(name: String): Unit =
-        if name.head != '<' then throw new NCException(s"Missing macro '<' 
opening: $name")
-        if name.last != '>' then throw new NCException(s"Missing macro '>' 
closing: $name")
+        if name.head != '<' then E(s"Missing macro '<' opening: $name")
+        if name.last != '>' then E(s"Missing macro '>' closing: $name")
 
     /**
       * Adds or overrides given macro.
@@ -168,7 +168,7 @@ class NCMacroParser:
 
         checkName(name)
         // Check for recursion.
-        if str.contains(name) then throw new NCException(s"Recursion is not 
supported, macro: $name")
+        if str.contains(name) then E(s"Recursion is not supported, macro: 
$name")
         macros += name -> str
         this
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 653de1e..c3867ca 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -560,9 +560,9 @@ object NCUtils extends LazyLogging:
         else if isResource(src) then
             getClass.getClassLoader.getResourceAsStream(src) match
                 case in if in != null => in
-                case _ => throw new NCException(s"Resource not found: $src")
+                case _ => E(s"Resource not found: $src")
         else if isUrl(src) then new URL(src).openStream()
-        else throw new NCException(s"Source not found or unsupported: $src")
+        else E(s"Source not found or unsupported: $src")
 
     /**
       * Makes thread.
@@ -779,7 +779,7 @@ object NCUtils extends LazyLogging:
                 getAndLog(src.getLines().map(p => p).toList, f, log)
             }
         catch
-            case e: IOException => throw new NCException(s"Failed to read GZIP 
file: ${f.getAbsolutePath}", e)
+            case e: IOException => E(s"Failed to read GZIP file: 
${f.getAbsolutePath}", e)
 
     /**
       * Reads bytes from given file.
@@ -793,7 +793,7 @@ object NCUtils extends LazyLogging:
             Using.resource(new FileInputStream(f))(_.read(arr))
             getAndLog(arr, f, log)
         catch
-            case e: IOException => throw new NCException(s"Error reading file: 
$f", e)
+            case e: IOException => E(s"Error reading file: $f", e)
 
 
     /**
@@ -820,9 +820,9 @@ object NCUtils extends LazyLogging:
                 stream.flush()
             }
         catch
-            case e: IOException => throw new NCException(s"Error gzip file: 
$f", e)
+            case e: IOException => E(s"Error gzip file: $f", e)
 
-        if !f.delete() then throw new NCException(s"Error while deleting file: 
$f")
+        if !f.delete() then E(s"Error while deleting file: $f")
 
         logger.trace(s"File gzipped [source=$f, destination=$gz]")
 
@@ -847,7 +847,7 @@ object NCUtils extends LazyLogging:
     def readResource(res: String, enc: String = "UTF-8", log: Logger = 
logger): List[String] =
         val list =
             try Using.resource(Source.fromInputStream(getStream(res), 
enc))(_.getLines().toSeq).toList
-            catch case e: IOException => throw new NCException(s"Failed to 
read stream: $res", e)
+            catch case e: IOException => E(s"Failed to read stream: $res", e)
     
         log.trace(s"Loaded resource: $res")
 
@@ -872,7 +872,7 @@ object NCUtils extends LazyLogging:
     def readTextGzipResource(res: String, enc: String, log: Logger = logger): 
List[String] =
         val list =
             try Using.resource(Source.fromInputStream(new 
GZIPInputStream(getStream(res)), enc))(readLcTrimFilter)
-            catch case e: IOException => throw new NCException(s"Failed to 
read stream: $res", e)
+            catch case e: IOException => E(s"Failed to read stream: $res", e)
 
         log.trace(s"Loaded resource: $res")
 
@@ -891,7 +891,7 @@ object NCUtils extends LazyLogging:
                 readLcTrimFilter(src)
             }
         catch
-            case e: IOException => throw new NCException(s"Failed to read 
stream.", e)
+            case e: IOException => E(s"Failed to read stream.", e)
 
     /**
       *
@@ -911,7 +911,7 @@ object NCUtils extends LazyLogging:
 
         if !errs.isEmpty then
             errs.forEach(e => logger.error("Error during service starting.", 
e)) // TODO: error message.
-            throw new NCException("Some service cannot be started.")  // TODO: 
error message.
+            E("Some service cannot be started.")  // TODO: error message.
 
     /**
       * Shuts down executor service and waits for its finish.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
index 90d16c7..8a0a003 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
@@ -38,7 +38,7 @@ private[impl] object NCSemanticSourceType:
 
         if lc.endsWith(".json") || lc.endsWith(".js") then JSON
         else if lc.endsWith(".yaml") || lc.endsWith(".yml") then YAML
-        else throw new NCException("Unexpected data type. Expected `yaml` or 
`json` formats.") // TODO: error text.
+        else E("Unexpected data type. Expected `yaml` or `json` formats.") // 
TODO: error text.
 
 /**
   *
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 43c0307..2b57f71 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -75,8 +75,8 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
         require(elements != null)
 
         if macros != null then
-            if hasNullOrEmpty(macros.keySet) then throw new NCException("Some 
macro names are null or empty.") // TODO: error text.
-            if hasNullOrEmpty(macros.values) then throw new NCException("Some 
macro bodies are null or empty.") // TODO: error text.
+            if hasNullOrEmpty(macros.keySet) then E("Some macro names are null 
or empty.") // TODO: error text.
+            if hasNullOrEmpty(macros.values) then E("Some macro bodies are 
null or empty.") // TODO: error text.
 
             val set = elements.filter(_.getSynonyms != 
null).flatMap(_.getSynonyms.asScala) ++ macros.values
 
@@ -107,7 +107,7 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
             s"[id=$elemId$valuePart]"
 
         if syns != null then
-            if hasNullOrEmpty(syns.asScala) then throw new NCException(s"Some 
synonyms are null or empty $mkDesc") // TODO: error text.
+            if hasNullOrEmpty(syns.asScala) then E(s"Some synonyms are null or 
empty $mkDesc") // TODO: error text.
 
             val susp = syns.asScala.filter(syn => !syn.contains("//") && 
SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
 
@@ -121,33 +121,29 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
       * @param elems
       */
     private def checkElements(elems: Seq[NCSemanticElement]): Unit =
-        if elems == null || elems.isEmpty then throw new NCException("Elements 
cannot be null or empty.") // TODO: error text.
-        if elems.contains(null) then throw new NCException("Some elements are 
null.") // TODO: error text.
+        if elems == null || elems.isEmpty then E("Elements cannot be null or 
empty.") // TODO: error text.
+        if elems.contains(null) then E("Some elements are null.") // TODO: 
error text.
 
         // Duplicates.
         val ids = mutable.HashSet.empty[String]
 
         for (id <- elems.map(_.getId))
-            if ids.contains(id) then throw new NCException(s"Duplicate element 
ID [element=$id]") // TODO: error text.
+            if ids.contains(id) then E(s"Duplicate element ID [element=$id]") 
// TODO: error text.
             else ids += id
 
         for (e <- elems)
             val elemId = e.getId
 
-            if elemId == null || elemId.isEmpty then
-                throw new NCException(s"Some element IDs are not provided or 
empty.") // TODO: error text.
-            else if !elemId.matches(ID_REGEX) then
-                throw new NCException(s"Element ID does not match regex 
[element=$elemId, regex=$ID_REGEX]") // TODO: error text.
-            else if elemId.exists(_.isWhitespace) then
-                throw new NCException(s"Element ID cannot have whitespaces 
[element=$elemId]") // TODO: error text.
+            if elemId == null || elemId.isEmpty then E(s"Some element IDs are 
not provided or empty.") // TODO: error text.
+            else if !elemId.matches(ID_REGEX) then E(s"Element ID does not 
match regex [element=$elemId, regex=$ID_REGEX]") // TODO: error text.
+            else if elemId.exists(_.isWhitespace) then E(s"Element ID cannot 
have whitespaces [element=$elemId]") // TODO: error text.
 
             checkSynonyms(e.getSynonyms, elemId)
 
             val vals = e.getValues
 
             if vals != null then
-                if hasNullOrEmpty(vals.keySet().asScala) then
-                    throw new NCException(s"Some values names are null or 
empty [element=$elemId]") // TODO: error text.
+                if hasNullOrEmpty(vals.keySet().asScala) then E(s"Some values 
names are null or empty [element=$elemId]") // TODO: error text.
 
                 for ((name, syns) <- vals.asScala)
                     checkSynonyms(syns, elemId, Some(name))
@@ -175,14 +171,9 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
                 val ptrn = stripSuffix(REGEX_FIX, text)
 
                 if ptrn.nonEmpty then
-                    try
-                        NCSemanticSynonymChunk(REGEX, text, regex = 
Pattern.compile(ptrn))
-                    catch
-                        case e: PatternSyntaxException =>
-                            // TODO: error text.
-                            throw new NCException(s"Invalid regex synonym 
syntax detected [element=$elemId, chunk=$text]", e)
-                else
-                    throw new NCException(s"Empty regex synonym detected 
[element=$elemId]") // TODO: error text.
+                    try NCSemanticSynonymChunk(REGEX, text, regex = 
Pattern.compile(ptrn))
+                    catch case e: PatternSyntaxException => E(s"Invalid regex 
synonym syntax detected [element=$elemId, chunk=$text]", e)
+                else E(s"Empty regex synonym detected [element=$elemId]") // 
TODO: error text.
 
         val regexes = mutable.HashMap.empty[Int, RegexHolder]
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
index c3ebba9..e7eca55 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala
@@ -286,18 +286,13 @@ class NCStopWordsImpl(addStopsSet: JSet[String], 
exclStopsSet: JSet[String]) ext
         exclStems = if exclStopsSet == null then Set.empty else 
exclStopsSet.asScala.toSet.map(stem)
 
         def check(name: String, set: Set[String]): Unit =
-            if set.exists(_.exists(_.isWhitespace)) then
-                throw new NCException(s"$name contain a strings with 
whitespaces.") // TODO: error texts.
+            if set.exists(_.exists(_.isWhitespace)) then E(s"$name contain a 
strings with whitespaces.") // TODO: error texts.
 
         check("Additional synonyms", addStems) // TODO: error texts.
         check("Excluded synonyms", exclStems) // TODO: error texts.
 
         val dups = addStems.intersect(exclStems)
-
-        if dups.nonEmpty then
-            throw new NCException(
-                s"Duplicate stems detected between additional and excluded 
stopwords [dups=${dups.mkString(",")}]"
-            )
+        if dups.nonEmpty then E(s"Duplicate stems detected between additional 
and excluded stopwords [dups=${dups.mkString(",")}]")
 
         percents = PERCENTS.map(stem)
 
@@ -370,8 +365,7 @@ class NCStopWordsImpl(addStopsSet: JSet[String], 
exclStopsSet: JSet[String]) ext
 
         // 2. Accumulates data of each parsed line.
         for (line <- lines)
-            def throwError(msg: String): Unit =
-                throw new NCException(s"Invalid stop word configuration 
[line=$line, reason=$msg]") // TODO: error texts.
+            def throwError(msg: String): Unit = E(s"Invalid stop word 
configuration [line=$line, reason=$msg]") // TODO: error texts.
 
             var s = line.trim

[incubator-nlpcraft] branch NLPCRAFT-472 updated: CR WIP

Reply via email to