This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new eb3785e5 NCEntityMapper implementation and related tests added.
eb3785e5 is described below
commit eb3785e56812f0224bbc7a0a7cda5142d604d9da
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Apr 4 13:13:33 2022 +0300
NCEntityMapper implementation and related tests added.
---
.../scala/org/apache/nlpcraft/NCEntityMapper.java | 34 ++++++++++
.../main/scala/org/apache/nlpcraft/NCPipeline.java | 16 ++++-
.../org/apache/nlpcraft/NCPipelineBuilder.java | 31 +++++++++
.../internal/impl/NCModelPipelineManager.scala | 9 ++-
.../apache/nlpcraft/nlp/NCEntityMapperSpec.scala | 75 ++++++++++++++++++++++
.../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 1 +
6 files changed, 163 insertions(+), 3 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java
new file mode 100644
index 00000000..f84434f0
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft;
+
+import java.util.List;
+
+/**
+ * TODO:
+ */
+public interface NCEntityMapper extends NCLifecycle {
+ /**
+ *
+ * @param req Input request descriptor.
+ * @param cfg Configuration of the model this components is associated
with.
+ * @param ents List of entities to enrich.
+ * @return
+ */
+ List<NCEntity> map(NCRequest req, NCModelConfig cfg, List<NCEntity>
entities);
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java
index 1161c298..b7b84da4 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java
@@ -126,11 +126,14 @@ public interface NCPipeline {
NCTokenParser getTokenParser();
/**
+ * TODO: can be empty.
* Gets the list of entity parser. At least one entity parser is required.
*
* @return List of entity parser. List should contain at least one entity
parser.
*/
- List<NCEntityParser> getEntityParsers();
+ default List<NCEntityParser> getEntityParsers() {
+ return Collections.emptyList();
+ }
/**
* Gets optional list of token enrichers.
@@ -176,4 +179,15 @@ public interface NCPipeline {
default Optional<NCVariantFilter> getVariantFilter() {
return Optional.empty();
}
+
+ /**
+ * TODO:
+ *
+ * Gets optional list of entity mappers.
+ *
+ * @return Optional list of entity mappers. Can be empty but never {@code
null}.
+ */
+ default List<NCEntityMapper> getEntityMappers() {
+ return Collections.emptyList();
+ }
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java
index 29cc79a5..ca212503 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java
@@ -46,6 +46,7 @@ public class NCPipelineBuilder {
private final List<NCEntityParser> entParsers = new ArrayList<>();
private final List<NCTokenValidator> tokVals = new ArrayList<>();
private final List<NCEntityValidator> entVals = new ArrayList<>();
+ private final List<NCEntityMapper> entMappers = new ArrayList<>();
private Optional<NCVariantFilter> varFilter = Optional.empty();
/**
@@ -220,6 +221,31 @@ public class NCPipelineBuilder {
return this;
}
+ /**
+ *
+ * @param entMappers
+ * @return This instance for call chaining.
+ */
+ public NCPipelineBuilder withEntityMappers(List<NCEntityMapper>
entMappers) {
+ Objects.requireNonNull(entMappers, "List of entity mappers cannot be
null.");
+ entMappers.forEach(p -> Objects.requireNonNull(p, "Entity mapper
cannot be null."));
+
+ this.entMappers.addAll(entMappers);
+
+ return this;
+ }
+
+ /**
+ * @param entMapper
+ * @return This instance for call chaining.
+ */
+ public NCPipelineBuilder withEntitMapper(NCEntityMapper entMapper) {
+ Objects.requireNonNull(entMapper, "Entity mapper cannot be null.");
+
+ this.entMappers.add(entMapper);
+
+ return this;
+ }
/**
*
*/
@@ -329,6 +355,11 @@ public class NCPipelineBuilder {
@Override public Optional<NCVariantFilter> getVariantFilter() {
return varFilter;
}
+
+ @Override
+ public List<NCEntityMapper> getEntityMappers() {
+ return entMappers;
+ }
};
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
index df3e55d5..3b9ba2b4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
@@ -57,10 +57,11 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline:
NCPipeline) extends L
private val entParsers = nvl(pipeline.getEntityParsers)
private val tokVals = nvl(pipeline.getTokenValidators)
private val entVals = nvl(pipeline.getEntityValidators)
+ private val entMappers = nvl(pipeline.getEntityMappers)
private val varFilterOpt = pipeline.getVariantFilter.toScala
private val allComps: Seq[NCLifecycle] =
- tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++
varFilterOpt.toSeq
+ tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++
entMappers ++ varFilterOpt.toSeq
/**
* Processes pipeline components.
@@ -138,7 +139,7 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline:
NCPipeline) extends L
// NOTE: we run validators regardless of whether token list is empty.
for (v <- tokVals) v.validate(req, cfg, toks)
- val entsList = new util.ArrayList[NCEntity]()
+ var entsList: util.List[NCEntity] = new util.ArrayList[NCEntity]()
for (p <- entParsers) entsList.addAll(p.parse(req, cfg, toks))
@@ -148,6 +149,10 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline:
NCPipeline) extends L
// NOTE: we run validators regardless of whether entity list is empty.
for (v <- entVals) v.validate(req, cfg, entsList)
+ for (m <- entMappers)
+ entsList = m.map(req, cfg, entsList)
+ if entsList == null then E("Entity mapper cannot return null
values.")
+
val entities = entsList.asScala.toSeq
val overlapEnts: Seq[Set[NCEntity]] =
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala
new file mode 100644
index 00000000..1a75cbdf
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticTestElement
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
+import org.apache.nlpcraft.nlp.util.NCTestUtils
+import org.junit.jupiter.api.Test
+import org.apache.nlpcraft.nlp.util.*
+
+import java.util.List as JList
+import scala.util.Using
+import scala.jdk.CollectionConverters.*
+
+/**
+ *
+ */
+class NCEntityMapperSpec:
+ private case class Combiner(ids: String*) extends NCEntityMapper:
+ override def map(req: NCRequest, cfg: NCModelConfig, entities:
JList[NCEntity]): JList[NCEntity] =
+ val es = entities.asScala
+ val replaced = es.filter(p => ids.contains(p.getId))
+
+ if replaced.isEmpty then
+ entities
+ else
+ val newEntity: NCEntity = new NCPropertyMapAdapter with
NCEntity:
+ override val getTokens: JList[NCToken] =
replaced.flatMap(_.getTokens.asScala).sortBy(_.getIndex).asJava
+ override val getRequestId: String = req.getRequestId
+ override val getId: String = ids.mkString
+
+ es --= replaced
+ (es :+
newEntity).sortBy(_.getTokens.asScala.head.getIndex).asJava
+
+ private val mdl = new NCTestModelAdapter:
+ import NCSemanticTestElement as TE
+ override val getPipeline: NCPipeline =
+ val pl = mkEnPipeline
+ val ms = pl.getEntityMappers
+
+ pl.getEntityParsers.add(NCTestUtils.mkEnSemanticParser(TE("a"),
TE("b"), TE("c"), TE("d")))
+
+ // Replaces [a, b] -> [ab]
+ ms.add(Combiner("a", "b"))
+ // Replaces [c, d] -> [cd]
+ ms.add(Combiner("c", "d"))
+ // Replaces [ab, cd] -> [abcd]
+ ms.add(Combiner("ab", "cd"))
+
+ pl
+
+ @NCIntent("intent=abcd term(abcd)={# == 'abcd'}")
+ def onMatch(@NCIntentTerm("abcd") abcd: NCEntity): NCResult = new
NCResult("OK", NCResultType.ASK_RESULT)
+
+ @Test
+ def test(): Unit = Using.resource(new NCModelClient(mdl)) { client =>
+ require(client.ask("a b c d", null, "userId").getIntentId == "abcd")
+ }
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
index 85dec0d7..533590d2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
@@ -38,4 +38,5 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends
NCPropertyMapAdapter
override val getEntityParsers = new JList[NCEntityParser]()
override val getTokenValidators = new JList[NCTokenValidator]()
override val getEntityValidators = new JList[NCEntityValidator]()
+ override val getEntityMappers = new JList[NCEntityMapper]()
override def getVariantFilter: Optional[NCVariantFilter] = variantFilter
\ No newline at end of file