This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master_test
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master_test by this push:
     new f6c60b5  WIP
f6c60b5 is described below

commit f6c60b54a40022dc2447f33829e69d8f17f1750a
Author: Aaron Radzinski <[email protected]>
AuthorDate: Fri Dec 17 11:05:43 2021 -0800

    WIP
---
 .../token/parser/opennlp/NCOpenNlpTokenParser.java | 25 ++++++++++++++---
 .../token/parser/opennlp/impl/NCOpenNlpImpl.scala  | 31 +++++++++++++++-------
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index 3789280..a590d72 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -19,7 +19,10 @@ package 
org.apache.nlpcraft.internal.nlp.token.parser.opennlp;
 
 import org.apache.nlpcraft.*;
 import org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.*;
+
+import java.io.BufferedInputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.InputStream;
 import java.util.List;
 
@@ -42,10 +45,19 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
      * @param tokMdl
      * @param posMdl
      * @param lemmaDic
+     * @throws NCException
      */
     public NCOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
-        // TODO
-        impl = null;
+        try {
+            impl = new NCOpenNlpImpl(
+                new BufferedInputStream(new FileInputStream(tokMdl)),
+                new BufferedInputStream(new FileInputStream(posMdl)),
+                new BufferedInputStream(new FileInputStream(lemmaDic))
+            );
+        }
+        catch (Exception e) {
+            throw new NCException("Failed to create OpenNLP token parser.", e);
+        }
     }
 
     /**
@@ -53,10 +65,15 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
      * @param tokMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tokenizer model.
      * @param posMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tagger model.
      * @param lemmaDicSrc Local filesystem path, resources file path or URL 
for OpenNLP lemmatizer dictionary.
+     * @throws NCException
      */
     public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String 
lemmaDicSrc) {
-        // TODO
-        impl = null;
+        try {
+            impl = NCOpenNlpImpl.apply(tokMdlSrc, posMdlSrc, lemmaDicSrc);
+        }
+        catch (Exception e) {
+            throw new NCException("Failed to create OpenNLP token parser.", e);
+        }
     }
 
     @Override
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index 03d1c32..920efde 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -28,41 +28,54 @@ import opennlp.tools.tokenize.*
 
 import scala.jdk.CollectionConverters.*
 
+object NCOpenNlpImpl:
+    /**
+      *
+      * @param tokMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tokenizer model.
+      * @param posMdlSrc Local filesystem path, resources file path or URL for 
OpenNLP tagger model.
+      * @param lemmaDicSrc Local filesystem path, resources file path or URL 
for OpenNLP lemmatizer dictionary.
+      * @return
+      */
+    def apply(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc: String): 
NCOpenNlpImpl = ???
+
+
 /**
   *
-  * @param tokModelIn
-  * @param posModelIn
+  * @param tokMdlIn
+  * @param posMdlIn
   * @param lemmaDicIn
   */
-class NCOpenNlpImpl(tokModelIn: InputStream, posModelIn: InputStream, 
lemmaDicIn: InputStream):
-    private val tokenizer = new TokenizerME(new TokenizerModel(tokModelIn))
-    private val tagger = new POSTaggerME(new POSModel(posModelIn))
+class NCOpenNlpImpl(tokMdlIn: InputStream, posMdlIn: InputStream, lemmaDicIn: 
InputStream):
+    private val tokenizer = new TokenizerME(new TokenizerModel(tokMdlIn))
+    private val tagger = new POSTaggerME(new POSModel(posMdlIn))
     private val lemmatizer = new DictionaryLemmatizer(lemmaDicIn)
     private val stemmer = new PorterStemmer
+    private var addStopWords = List.empty[String]
+    private var exclStopWords = List.empty[String]
 
     /**
       *
       * @return
       */
-    def getAdditionalStopWords: JList[String] = ???
+    def getAdditionalStopWords: JList[String] = addStopWords.asJava
 
     /**
       *
       * @return
       */
-    def getExcludedStopWords: JList[String] = ???
+    def getExcludedStopWords: JList[String] = exclStopWords.asJava
 
     /**
       *
       * @param addStopWords
       */
-    def setAdditionalStopWords(addStopWords: JList[String]): Unit = ???
+    def setAdditionalStopWords(addStopWords: JList[String]): Unit = 
this.addStopWords = addStopWords.asScala.toList
 
     /**
       *
       * @param exclStopWords
       */
-    def setExcludedStopWords(exclStopWords: JList[String]): Unit = ???
+    def setExcludedStopWords(exclStopWords: JList[String]): Unit = 
this.exclStopWords = exclStopWords.asScala.toList
 
     /**
       *

Reply via email to