This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push:
new 01d2815 WIP.
01d2815 is described below
commit 01d28158345762a91d0cb4819e6c6082cb6f289a
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Mar 2 15:44:28 2022 +0300
WIP.
---
.../token/enricher/NCEnLemmaPosTokenEnricher.java | 47 ++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
new file mode 100644
index 0000000..aedcf84
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.en.token.enricher;
+
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
+import org.apache.nlpcraft.internal.util.NCResourceReader;
+import
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl;
+import
org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.NCLemmaPosTokenEnricher;
+
+import java.util.List;
+
+/**
+ * TODO: enriches with <code>lemma</code> and <code>pos</code> properties.
+ *
+ * Models can be downloaded from the following resources:
+ * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
+ * - lemmatizer:
https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
+ */
+public class NCEnLemmaPosTokenEnricher extends NCLemmaPosTokenEnricher {
+ /**
+ *
+ */
+ public NCEnLemmaPosTokenEnricher() {
+ super(
+ NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+ NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
+ );
+ }
+}