This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new a071064 WIP.
a071064 is described below
commit a071064239457eae68fded2e52cb8b8e5a678eec
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jan 4 12:50:41 2022 +0300
WIP.
---
.../{NCVariantValidator.java => NCVariant.java} | 11 +---
.../org/apache/nlpcraft/NCVariantValidator.java | 2 +-
.../nlp/entity/parser/nlp/NCNlpEntityParser.java | 65 ++++++++++++++++++++++
.../parser/nlp/impl/NCNlpEntityParserImpl.scala | 49 ++++++++++++++++
4 files changed, 117 insertions(+), 10 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
similarity index 75%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
index 212e242..99f9373 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
@@ -22,13 +22,6 @@ import java.util.List;
/**
*
*/
-public interface NCVariantValidator extends NCLifecycle {
- /**
- * Filters all found entities variants.
- *
- * @param req
- * @param cfg
- * @param toks
- */
- List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg,
List<List<NCEntity>> variants);
+public interface NCVariant {
+ List<NCToken> getTokens();
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
index 212e242..3e0fa0e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
@@ -30,5 +30,5 @@ public interface NCVariantValidator extends NCLifecycle {
* @param cfg
* @param toks
*/
- List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg,
List<List<NCEntity>> variants);
+ List<NCVariant> filter(NCRequest req, NCModelConfig cfg, List<NCVariant>
variants);
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
new file mode 100644
index 0000000..efb3a95
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.nlp;
+
+import org.apache.nlpcraft.NCEntity;
+import org.apache.nlpcraft.NCEntityParser;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl;
+import
org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl;
+
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Umbrella for NLP tokens.
+ *
+ * Each entity with ID 'nlp:token' contans one token and have following
properties copied from its token:
+ * nlp:token:stem, nlp:token:lemma, nlp:token:pos, nlp:token:text,
nlp:token:index
+ *
+ * <p>
+ * Component is language independent.
+ * <p>
+ */
+public class NCNlpEntityParser implements NCEntityParser {
+ private final NCNlpEntityParserImpl impl;
+
+ /**
+ * @param mdlSrc
+ */
+ public NCNlpEntityParser() {
+ this.impl = new NCNlpEntityParserImpl();
+ }
+
+ @Override
+ public void start(NCModelConfig cfg) {
+ impl.start(cfg);
+ }
+
+ @Override
+ public void stop() {
+ impl.stop();
+ }
+
+ @Override
+ public List<NCEntity> parse(NCRequest req, NCModelConfig cfg,
List<NCToken> toks) {
+ return impl.parse(req, cfg, toks);
+ }
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
new file mode 100644
index 0000000..a7e4116
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.nlp.impl
+
+import org.apache.nlpcraft.*
+
+import java.util
+import java.util.stream.Collectors
+
+/**
+ *
+ */
+object NCNlpEntityParserImpl:
+ private def id = "nlp:token"
+
+import NCNlpEntityParserImpl._
+
+/**
+ *
+ */
+class NCNlpEntityParserImpl extends NCEntityParser:
+ override def parse(req: NCRequest, cfg: NCModelConfig, toks:
util.List[NCToken]): util.List[NCEntity] =
+ toks.stream().map(t =>
+ new NCPropertyMapAdapter with NCEntity:
+ put(s"$id:stem", t.getStem)
+ put(s"$id:lemma", t.getLemma)
+ put(s"$id:pos", t.getPos)
+ put(s"$id:text", t.getText)
+ put(s"$id:index", t.getIndex)
+
+ override def getTokens: util.List[NCToken] =
util.Collections.singletonList(t)
+ override def getRequestId: String = req.getRequestId
+ override def getId: String = id
+ ).collect(Collectors.toList)