This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master_test
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master_test by this push:
new 8142498 WIP
8142498 is described below
commit 81424987169fc3d23c3c5b69150b0f5f9a4809ff
Author: Aaron Radzinski <[email protected]>
AuthorDate: Fri Dec 17 10:56:30 2021 -0800
WIP
---
.../token/parser/opennlp/NCOpenNlpTokenParser.java | 47 +++++++++-------------
.../token/parser/opennlp/impl/NCOpenNlpImpl.scala | 27 ++++++++++++-
.../apache/nlpcraft/internal/util/NCUtils.scala | 28 +++++++++++++
3 files changed, 74 insertions(+), 28 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index 91378e7..3789280 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -39,36 +39,29 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
/**
*
- * @param tokModel
- * @param posModel
+ * @param tokMdl
+ * @param posMdl
* @param lemmaDic
*/
- public NCOpenNlpTokenParser(File tokModel, File posModel, File lemmaDic) {
+ public NCOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
// TODO
+ impl = null;
}
/**
*
- * @param tokMdlPath
- * @param tagger
- * @param lemmatizer
+ * @param tokMdlSrc Local filesystem path, resources file path or URL for
OpenNLP tokenizer model.
+ * @param posMdlSrc Local filesystem path, resources file path or URL for
OpenNLP tagger model.
+ * @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
*/
- public NCOpenNlpTokenParser(String tokMdlPath, String tagger, String
lemmatizer) {
- // TODO
- }
-
- /**
- *
- * @param tokenizer
- * @param tagger
- * @param lemmatizer
- */
- public NCOpenNlpTokenParser(InputStream tokenizer, InputStream tagger,
InputStream lemmatizer) {
- impl = NCOpenNlpImpl.apply(tokenizer, tagger, lemmatizer);
+ public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc) {
+ // TODO
+ impl = null;
}
@Override
public List<NCToken> parse(NCRequest req) {
+ assert impl != null;
return impl.parse(req);
}
@@ -76,16 +69,16 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
*
* @return
*/
- public List<String> getExtraStopWords() {
- return delegate.extraStopWords();
+ public List<String> getAdditionalStopWords() {
+ return impl.getAdditionalStopWords();
}
/**
*
- * @param extraStopWords
+ * @param addStopWords
*/
- public void setExtraStopWords(List<String> extraStopWords) {
- delegate.excludedStopWords_$eq(extraStopWords);
+ public void setAdditionalStopWords(List<String> addStopWords) {
+ impl.setAdditionalStopWords(addStopWords);
}
/**
@@ -93,14 +86,14 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
* @return
*/
public List<String> getExcludedStopWords() {
- return delegate.excludedStopWords();
+ return impl.getExcludedStopWords();
}
/**
*
- * @param excludedStopWords
+ * @param exclStopWords
*/
- public void setExcludedStopWords(List<String> excludedStopWords) {
- delegate.excludedStopWords_$eq(excludedStopWords);
+ public void setExcludedStopWords(List<String> exclStopWords) {
+ impl.setExcludedStopWords(exclStopWords);
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index 6bc721f..03d1c32 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -19,6 +19,7 @@ package
org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl
import org.apache.nlpcraft.*
import java.io.*
+import java.util.List as JList
import opennlp.tools.lemmatizer.*
import opennlp.tools.postag.*
@@ -41,10 +42,34 @@ class NCOpenNlpImpl(tokModelIn: InputStream, posModelIn:
InputStream, lemmaDicIn
/**
*
+ * @return
+ */
+ def getAdditionalStopWords: JList[String] = ???
+
+ /**
+ *
+ * @return
+ */
+ def getExcludedStopWords: JList[String] = ???
+
+ /**
+ *
+ * @param addStopWords
+ */
+ def setAdditionalStopWords(addStopWords: JList[String]): Unit = ???
+
+ /**
+ *
+ * @param exclStopWords
+ */
+ def setExcludedStopWords(exclStopWords: JList[String]): Unit = ???
+
+ /**
+ *
* @param req
* @return
*/
- def parse(req: NCRequest): java.util.List[NCToken] =
+ def parse(req: NCRequest): JList[NCToken] =
val sen = req.getNormalizedText
case class TokenHolder(origin: String, normalized: String, start: Int,
end: Int, length: Int)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index dcdc3e7..ec9b7cd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -534,6 +534,34 @@ object NCUtils extends LazyLogging:
stopped = true
/**
+ * Gets resource existing flag.
+ *
+ * @param res Resource.
+ */
+ def isResource(res: String): Boolean =
getClass.getClassLoader.getResourceAsStream(res) != null
+
+ /**
+ *
+ * @param url URL to check.
+ * @return
+ */
+ def isUrl(url: String): Boolean =
+ try
+ new URL(url)
+ true
+ catch
+ case _: MalformedURLException => false
+
+ /**
+ *
+ * @param path Local file path to check.
+ * @return
+ */
+ def isFile(path: String): Boolean =
+ val f = new File(path)
+ f.exists() && f.isFile
+
+ /**
* Makes thread.
*
* @param name Name.