This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new 4a9df81  WIP.
4a9df81 is described below

commit 4a9df814779ffb89bdadf8254acb31a55ff4e6ac
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Dec 30 22:36:24 2021 +0300

    WIP.
---
 .../parser/opennlp/NCOpenNlpEntityParser.java      | 12 +-------
 .../opennlp/impl/NCOpenNlpEntityParserImpl.scala   |  7 ++---
 .../parser/semantic/NCSemanticEntityParser.java    | 13 --------
 .../semantic/en/NCEnSemanticEntityParser.java      |  9 ------
 .../semantic/impl/NCSemanticEntityParserImpl.scala | 24 +++------------
 .../enricher/en/NCEnSwearWordsTokenEnricher.java   | 15 +--------
 .../enricher/en/impl/NCEnSwearWordsImpl.scala      | 27 ++--------------
 .../parser/opennlp/en/NCEnOpenNlpTokenParser.java  | 21 +------------
 .../parser/opennlp/en/impl/NCEnOpenNlpImpl.scala   | 36 +++-------------------
 .../nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java  | 18 +----------
 .../opennlp/impl/NCOpenNlpTokenizerImpl.scala      | 11 ++-----
 11 files changed, 20 insertions(+), 173 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
index a725430..1ea0930 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParser.java
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.nlp.entity.parser.opennlp;
 import org.apache.nlpcraft.*;
 import 
org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl;
 
-import java.io.File;
 import java.util.List;
 import java.util.Objects;
 
@@ -45,16 +44,7 @@ public class NCOpenNlpEntityParser implements NCEntityParser 
{
     public NCOpenNlpEntityParser(String mdlSrc) {
         Objects.requireNonNull(mdlSrc, "Model source cannot be null.");
 
-        this.impl = NCOpenNlpEntityParserImpl.apply(mdlSrc);
-    }
-
-    /**
-     * @param mdlFile
-     */
-    public NCOpenNlpEntityParser(File mdlFile) {
-        Objects.requireNonNull(mdlFile, "Model file cannot be null.");
-
-        this.impl = NCOpenNlpEntityParserImpl.apply(mdlFile);
+        this.impl = new NCOpenNlpEntityParserImpl(mdlSrc);
     }
 
     @Override
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
index f35b46e..96519ca 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala
@@ -31,14 +31,11 @@ import scala.jdk.CollectionConverters.*
 import scala.language.postfixOps
 import scala.util.Using
 
-object NCOpenNlpEntityParserImpl:
-    def apply(res: String): NCOpenNlpEntityParserImpl = new 
NCOpenNlpEntityParserImpl(NCUtils.getStream(res), res)
-    def apply(f: File): NCOpenNlpEntityParserImpl = new 
NCOpenNlpEntityParserImpl(new FileInputStream(f), f.getAbsolutePath)
-
 /**
   *
+  * @param res
   */
-class NCOpenNlpEntityParserImpl(is: InputStream, res: String) extends 
NCEntityParser with LazyLogging :
+class NCOpenNlpEntityParserImpl(res: String) extends NCEntityParser with 
LazyLogging :
     @volatile private var finder: NameFinderME = _
 
     private case class Holder(start: Int, end: Int, name: String, probability: 
Double)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
index 223d5dd..9c09111 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java
@@ -24,7 +24,6 @@ import org.apache.nlpcraft.NCRequest;
 import org.apache.nlpcraft.NCToken;
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParserImpl;
 
-import java.io.File;
 import java.util.*;
 
 /**
@@ -61,18 +60,6 @@ public class NCSemanticEntityParser implements 
NCEntityParser {
     /**
      *
      * @param stemmer
-     * @param mdlFile
-     */
-    public NCSemanticEntityParser(NCSemanticTextStemmer stemmer, File mdlFile) 
{
-        Objects.requireNonNull(stemmer, "Stemmer cannot be null");
-        Objects.requireNonNull(mdlFile, "File cannot be null");
-
-        impl = NCSemanticEntityParserImpl.apply(stemmer, mdlFile);
-    }
-
-    /**
-     *
-     * @param stemmer
      * @param mdlSrc
      */
     public NCSemanticEntityParser(NCSemanticTextStemmer stemmer, String 
mdlSrc) {
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
index 6452938..ae21ccb 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/en/NCEnSemanticEntityParser.java
@@ -4,7 +4,6 @@ import 
org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticElement;
 import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser;
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticTextStemmer;
 
-import java.io.File;
 import java.util.List;
 import java.util.Map;
 
@@ -30,14 +29,6 @@ public class NCEnSemanticEntityParser extends 
NCSemanticEntityParser {
     }
 
     /**
-     *
-     * @param mdlFile
-     */
-    public NCEnSemanticEntityParser(File mdlFile) {
-        super(new NCEnSemanticTextStemmer(), mdlFile);
-    }
-
-    /**
      * 
      * @param mdlSrc
      */
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 2169f40..44adac1 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -40,25 +40,11 @@ object NCSemanticEntityParserImpl:
             stemmer, macros = if macros == null then null else 
macros.asScala.toMap, elements = elems.asScala.toSeq
         )
 
-    def apply(stemmer: NCSemanticTextStemmer, mdlFile: File): 
NCSemanticEntityParserImpl =
-        require(stemmer != null)
-        require(mdlFile != null)
-
-        new NCSemanticEntityParserImpl(
-            stemmer,
-            is = new BufferedInputStream(new FileInputStream(mdlFile)),
-            typ = NCSemanticSourceType(mdlFile.getName)
-        )
-
     def apply(stemmer: NCSemanticTextStemmer, mdlSrc: String): 
NCSemanticEntityParserImpl =
         require(stemmer != null)
         require(mdlSrc != null)
 
-        new NCSemanticEntityParserImpl(
-            stemmer,
-            is = new BufferedInputStream(NCUtils.getStream(mdlSrc)),
-            typ = NCSemanticSourceType(mdlSrc)
-        )
+        new NCSemanticEntityParserImpl(stemmer, res = mdlSrc, typ = 
NCSemanticSourceType(mdlSrc))
 
     /**
       * @param baseTokens Tokens.
@@ -133,18 +119,18 @@ class NCSemanticEntityParserImpl(
     stemmer: NCSemanticTextStemmer,
     macros: Map[String, String] = null,
     elements: Seq[NCSemanticElement] = null,
-    is: InputStream = null,
+    res: String = null,
     typ: NCSemanticSourceType = null
 ) extends NCEntityParser with LazyLogging:
     require(stemmer != null)
-    require(macros != null && elements != null || is != null && typ != null)
+    require(macros != null && elements != null || res != null && typ != null)
 
     @volatile private var h: NCSemanticSynonymsHolder = _
 
     override def start(cfg: NCModelConfig): Unit =
         val (macros, elements) =
-            if is != null then
-                val src = NCSemanticDataReader.read(is, typ)
+            if res != null then
+                val src = NCSemanticDataReader.read(new 
BufferedInputStream(NCUtils.getStream(res)), typ)
                 (src.macros, src.elements)
             else
                 (this.macros, this.elements)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
index e5b84b2..aca1786 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCEnSwearWordsTokenEnricher.java
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.nlp.token.enricher.en;
 import org.apache.nlpcraft.*;
 import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCEnSwearWordsImpl;
 
-import java.io.File;
 import java.util.List;
 import java.util.Objects;
 
@@ -31,18 +30,6 @@ public class NCEnSwearWordsTokenEnricher implements 
NCTokenEnricher {
     private final NCEnSwearWordsImpl impl;
 
     /**
-     *
-     * TODO: swear_words.txt
-     *
-     * @param mdlFile
-     */
-    public NCEnSwearWordsTokenEnricher(File mdlFile) {
-        Objects.requireNonNull(mdlFile, "Swear words model file cannot be 
null.");
-
-        impl = NCEnSwearWordsImpl.apply(mdlFile);
-    }
-
-    /**
      * TODO: swear_words.txt
      * 
      * @param mdlSrc
@@ -50,7 +37,7 @@ public class NCEnSwearWordsTokenEnricher implements 
NCTokenEnricher {
     public NCEnSwearWordsTokenEnricher(String mdlSrc) {
         Objects.requireNonNull(mdlSrc, "Swear words model file cannot be 
null.");
 
-        impl = NCEnSwearWordsImpl.apply(mdlSrc);
+        impl = new NCEnSwearWordsImpl(mdlSrc);
     }
 
     @Override
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
index 4aa3a55..ea11dc0 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCEnSwearWordsImpl.scala
@@ -24,35 +24,12 @@ import org.apache.nlpcraft.internal.util.NCUtils
 
 import java.io.*
 
-/**
-  *
-  */
-object NCEnSwearWordsImpl:
-    /**
-      *
-      * @param mdlFile
-      * @return
-      */
-    def apply(mdlFile: File): NCEnSwearWordsImpl =
-        new NCEnSwearWordsImpl(new BufferedInputStream(new 
FileInputStream(mdlFile)), mdlFile.getPath)
-
-    /**
-      *
-      * @param mdlSrc
-      * @return
-      */
-    def apply(mdlSrc: String): NCEnSwearWordsImpl =
-        new NCEnSwearWordsImpl(NCUtils.getStream(mdlSrc), mdlSrc)
-
-/**
-  *
-  */
-class NCEnSwearWordsImpl(is: InputStream, res: String) extends NCTokenEnricher 
with LazyLogging:
+class NCEnSwearWordsImpl(res: String) extends NCTokenEnricher with LazyLogging:
     @volatile private var swearWords: Set[String] = _
 
     override def start(cfg: NCModelConfig): Unit =
         val stemmer = new PorterStemmer
-        swearWords = NCUtils.readTextStream(is, 
"UTF-8").map(stemmer.stem).toSet
+        swearWords = NCUtils.readTextStream(NCUtils.getStream(res), 
"UTF-8").map(stemmer.stem).toSet
         logger.trace(s"Loaded resource: $res")
     override def stop(): Unit = swearWords = null
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: 
java.util.List[NCToken]): Unit =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
index 4e77aa8..cb0a8b4 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java
@@ -48,25 +48,6 @@ public class NCEnOpenNlpTokenParser implements NCTokenParser 
{
 
     /**
      *
-     * @param tokMdl
-     * @param posMdl
-     * @param lemmaDic
-     * @throws NCException
-     */
-    public NCEnOpenNlpTokenParser(File posMdl, File lemmaDic) {
-        Objects.requireNonNull(posMdl, "POS model file cannot be null.");
-        Objects.requireNonNull(lemmaDic, "Lemmatizer model file cannot be 
null.");
-
-        try {
-            impl = NCEnOpenNlpImpl.apply(posMdl, lemmaDic);
-        }
-        catch (Exception e) {
-            throw new NCException("Failed to create OpenNLP token parser.", e);
-        }
-    }
-
-    /**
-     *
      * @param tokMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tokenizer model.
      * @param posMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tagger model.
      * @param lemmaDicSrc Local filesystem path, resources file path or URL 
for OpenNLP lemmatizer dictionary.
@@ -77,7 +58,7 @@ public class NCEnOpenNlpTokenParser implements NCTokenParser {
         Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be 
null.");
 
         try {
-            impl = NCEnOpenNlpImpl.apply(posMdlSrc, lemmaDicSrc);
+            impl = new NCEnOpenNlpImpl(posMdlSrc, lemmaDicSrc);
         }
         catch (Exception e) {
             throw new NCException("Failed to create OpenNLP token parser.", e);
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
index 35b3c7f..6c62be8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala
@@ -31,39 +31,13 @@ import java.util.{Collections, List as JList, Set as JSet}
 import scala.concurrent.ExecutionContext
 import scala.jdk.CollectionConverters.*
 
-/**
-  *
-  */
-object NCEnOpenNlpImpl:
-    /**
-      *
-      * @param posMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tagger model.
-      * @param lemmaDicSrc Local filesystem path, resources file path or URL 
for OpenNLP lemmatizer dictionary.
-      * @return
-      */
-    def apply(posMdlSrc: String, lemmaDicSrc: String): NCEnOpenNlpImpl =
-        new NCEnOpenNlpImpl(NCUtils.getStream(posMdlSrc), 
NCUtils.getStream(lemmaDicSrc))
-
-    /**
-      *
-      * @param posMdlFile Local file for OpenNLP tagger model.
-      * @param lemmaDicFile Local file for OpenNLP lemmatizer dictionary.
-      * @return
-      */
-    def apply(posMdlFile: File, lemmaDicFile: File): NCEnOpenNlpImpl =
-        def toStream(f: File) = new BufferedInputStream(new FileInputStream(f))
-
-        new NCEnOpenNlpImpl(toStream(posMdlFile), toStream(lemmaDicFile))
 
 /**
   *
-  * @param posMdlIn
-  * @param lemmaDicIn
+  * @param posMdlSrc
+  * @param lemmaDicSrc
   */
-class NCEnOpenNlpImpl(
-    posMdlIn: InputStream,
-    lemmaDicIn: InputStream
-) extends NCTokenParser :
+class NCEnOpenNlpImpl(posMdlSrc: String, lemmaDicSrc: String) extends 
NCTokenParser :
     private val stemmer = new PorterStemmer
 
     @volatile var tagger: POSTaggerME = _
@@ -75,8 +49,8 @@ class NCEnOpenNlpImpl(
 
     override def start(cfg: NCModelConfig): Unit =
         NCUtils.execPar(
-            () => tagger = new POSTaggerME(new POSModel(posMdlIn)),
-            () => lemmatizer = new DictionaryLemmatizer(lemmaDicIn),
+            () => tagger = new POSTaggerME(new 
POSModel(NCUtils.getStream(posMdlSrc))),
+            () => lemmatizer = new 
DictionaryLemmatizer(NCUtils.getStream(lemmaDicSrc)),
             () => swFinder = new NCEnStopWordsFinder(stem(addStopWords), 
stem(exclStopWords))
         )(ExecutionContext.Implicits.global)
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
index 88f12e4..bf63f0d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/NCOpenNlpTokenizer.java
@@ -23,7 +23,6 @@ import org.apache.nlpcraft.NCTokenizer;
 import org.apache.nlpcraft.NCWord;
 import org.apache.nlpcraft.nlp.tokenizer.opennlp.impl.NCOpenNlpTokenizerImpl;
 
-import java.io.File;
 import java.util.List;
 import java.util.Objects;
 
@@ -37,26 +36,11 @@ public class NCOpenNlpTokenizer implements NCTokenizer {
      *
      * @param tokMdl
      */
-    public NCOpenNlpTokenizer(File tokMdl) {
-        Objects.requireNonNull(tokMdl, "Tokenizer model file cannot be null.");
-
-        try {
-            impl = NCOpenNlpTokenizerImpl.apply(tokMdl);
-        }
-        catch (Exception e) {
-            throw new NCException("Failed to create OpenNLP tokenizer from: " 
+ tokMdl, e);
-        }
-    }
-
-    /**
-     *
-     * @param tokMdl
-     */
     public NCOpenNlpTokenizer(String tokMdl) {
         Objects.requireNonNull(tokMdl, "Tokenizer model source cannot be 
null.");
 
         try {
-           impl = NCOpenNlpTokenizerImpl.apply(tokMdl);
+           impl = new NCOpenNlpTokenizerImpl(tokMdl);
         }
         catch (Exception e) {
             throw new NCException("Failed to create OpenNLP tokenizer from: " 
+ tokMdl, e);
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
index fe8db46..65a541d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/tokenizer/opennlp/impl/NCOpenNlpTokenizerImpl.scala
@@ -27,19 +27,12 @@ import scala.jdk.CollectionConverters.*
 
 /**
   *
-  */
-object NCOpenNlpTokenizerImpl:
-    def apply(file: File): NCOpenNlpTokenizerImpl = new 
NCOpenNlpTokenizerImpl(new BufferedInputStream(new FileInputStream(file)))
-    def apply(src: String): NCOpenNlpTokenizerImpl = new 
NCOpenNlpTokenizerImpl(NCUtils.getStream(src))
-
-/**
-  *
   * @param is
   */
-class NCOpenNlpTokenizerImpl(is: InputStream) extends NCTokenizer:
+class NCOpenNlpTokenizerImpl(src: String) extends NCTokenizer:
     @volatile var tokenizer: TokenizerME = _
 
-    override def start(cfg: NCModelConfig): Unit = tokenizer = new 
TokenizerME(new TokenizerModel(is))
+    override def start(cfg: NCModelConfig): Unit = tokenizer = new 
TokenizerME(new TokenizerModel(NCUtils.getStream(src)))
     override def stop(): Unit = tokenizer = null
     override def tokenize(cfg: NCModelConfig, txt: String): util.List[NCWord] =
         this.synchronized { tokenizer.tokenizePos(txt) }

Reply via email to