This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-483-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483-1 by this push:
new bcd84ed WIP.
bcd84ed is described below
commit bcd84ed0f442b2ab95504d9601d00eae9b831ca4
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Mar 2 22:28:59 2022 +0300
WIP.
---
.../apache/nlpcraft/NCModelPipelineBuilder.java | 27 ++++++++++++++--------
1 file changed, 18 insertions(+), 9 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index 02ac728..05f0110 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -71,7 +71,8 @@ public class NCModelPipelineBuilder {
}
/**
- *
+ * TODO:
+ * EN Nlp component set. Note it used OpenNLP token parser implementation.
* @param lang
* @param entParsers
*/
@@ -81,19 +82,27 @@ public class NCModelPipelineBuilder {
if (entParsers.isEmpty())
throw new IllegalArgumentException("At least one entity parser
must be defined.");
- tokParser = new NCENOpenNLPTokenParser();
+ switch (lang) {
+ case EN:
+ tokParser = new NCENOpenNLPTokenParser();
- tokEnrichers.add(new NCENOpenNlpLemmaPosTokenEnricher());
- tokEnrichers.add(new NCENStopWordsTokenEnricher());
- tokEnrichers.add(new
NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
- tokEnrichers.add(new NCENQuotesTokenEnricher());
- tokEnrichers.add(new NCENDictionaryTokenEnricher());
- tokEnrichers.add(new NCENBracketsTokenEnricher());
+ tokEnrichers.add(new NCENOpenNlpLemmaPosTokenEnricher());
+ tokEnrichers.add(new NCENStopWordsTokenEnricher());
+ tokEnrichers.add(new
NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
+ tokEnrichers.add(new NCENQuotesTokenEnricher());
+ tokEnrichers.add(new NCENDictionaryTokenEnricher());
+ tokEnrichers.add(new NCENBracketsTokenEnricher());
+
+ this.entParsers.addAll(entParsers);
+ default:
+ throw new IllegalArgumentException("Unsupported language: " +
lang);
+ }
- this.entParsers.addAll(entParsers);
}
/**
+ * TODO:
+ * EN Nlp component set. Note it used OpenNLP token parser implementation.
*
* @param lang
* @param entParsers