This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-492 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 359b87b973953984564a6aaaa029a30fe8bd04ab Author: Sergey Kamov <[email protected]> AuthorDate: Sun Apr 3 13:43:34 2022 +0300 NCEntityMapper implementation and related test added. --- .../scala/org/apache/nlpcraft/NCEntityMapper.java | 34 +++++++++++ .../main/scala/org/apache/nlpcraft/NCPipeline.java | 13 ++++- .../org/apache/nlpcraft/NCPipelineBuilder.java | 31 ++++++++++ .../internal/impl/NCModelPipelineManager.scala | 9 ++- .../apache/nlpcraft/nlp/NCEntityMapperSpec.scala | 67 ++++++++++++++++++++++ .../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 2 + 6 files changed, 153 insertions(+), 3 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java new file mode 100644 index 0000000..bc2a040 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityMapper.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft; + +import java.util.List; + +/** + * TODO: + */ +public interface NCEntityMapper extends NCLifecycle { + /** + * + * @param req + * @param entities + * @param toks + * @return + */ + List<NCEntity> convert(NCRequest req, List<NCEntity> entities, List<NCToken> toks); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java index 1161c29..a3ad4fd 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java @@ -126,11 +126,14 @@ public interface NCPipeline { NCTokenParser getTokenParser(); /** + * TODO: can be empty. * Gets the list of entity parser. At least one entity parser is required. * * @return List of entity parser. List should contain at least one entity parser. */ - List<NCEntityParser> getEntityParsers(); + default List<NCEntityParser> getEntityParsers() { + return Collections.emptyList(); + } /** * Gets optional list of token enrichers. @@ -176,4 +179,12 @@ public interface NCPipeline { default Optional<NCVariantFilter> getVariantFilter() { return Optional.empty(); } + + /** + * TODO: + * @return + */ + default List<NCEntityMapper> getEntityMappers() { + return Collections.emptyList(); + } } \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java index 29cc79a..ca21250 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.java @@ -46,6 +46,7 @@ public class NCPipelineBuilder { private final List<NCEntityParser> entParsers = new ArrayList<>(); private final List<NCTokenValidator> tokVals = new ArrayList<>(); private final List<NCEntityValidator> entVals = new ArrayList<>(); + private final List<NCEntityMapper> entMappers = new ArrayList<>(); private Optional<NCVariantFilter> varFilter = Optional.empty(); /** @@ -222,6 +223,31 @@ public class NCPipelineBuilder { /** * + * @param entMappers + * @return This instance for call chaining. + */ + public NCPipelineBuilder withEntityMappers(List<NCEntityMapper> entMappers) { + Objects.requireNonNull(entMappers, "List of entity mappers cannot be null."); + entMappers.forEach(p -> Objects.requireNonNull(p, "Entity mapper cannot be null.")); + + this.entMappers.addAll(entMappers); + + return this; + } + + /** + * @param entMapper + * @return This instance for call chaining. + */ + public NCPipelineBuilder withEntitMapper(NCEntityMapper entMapper) { + Objects.requireNonNull(entMapper, "Entity mapper cannot be null."); + + this.entMappers.add(entMapper); + + return this; + } + /** + * */ private void setEnComponents() { tokParser = mkEnOpenNLPTokenParser(); @@ -329,6 +355,11 @@ public class NCPipelineBuilder { @Override public Optional<NCVariantFilter> getVariantFilter() { return varFilter; } + + @Override + public List<NCEntityMapper> getEntityMappers() { + return entMappers; + } }; } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala index df3e55d..2529662 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala @@ -57,10 +57,11 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L private val entParsers = nvl(pipeline.getEntityParsers) private val tokVals = nvl(pipeline.getTokenValidators) private val entVals = nvl(pipeline.getEntityValidators) + private val entMappers = nvl(pipeline.getEntityMappers) private val varFilterOpt = pipeline.getVariantFilter.toScala private val allComps: Seq[NCLifecycle] = - tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ varFilterOpt.toSeq + tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilterOpt.toSeq /** * Processes pipeline components. @@ -138,7 +139,7 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L // NOTE: we run validators regardless of whether token list is empty. for (v <- tokVals) v.validate(req, cfg, toks) - val entsList = new util.ArrayList[NCEntity]() + var entsList: util.List[NCEntity] = new util.ArrayList[NCEntity]() for (p <- entParsers) entsList.addAll(p.parse(req, cfg, toks)) @@ -148,6 +149,10 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L // NOTE: we run validators regardless of whether entity list is empty. for (v <- entVals) v.validate(req, cfg, entsList) + for (m <- entMappers) + entsList = m.convert(req, entsList, toks) + if entsList == null then E("Entity mapper cannot return null values/") + val entities = entsList.asScala.toSeq val overlapEnts: Seq[Set[NCEntity]] = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala new file mode 100644 index 0000000..edd4ef3 --- /dev/null +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityMapperSpec.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp + +import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticTestElement +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser +import org.apache.nlpcraft.nlp.util.NCTestUtils +import org.junit.jupiter.api.Test +import org.apache.nlpcraft.nlp.util.* + +import java.util.List as JList +import scala.util.Using +import scala.jdk.CollectionConverters.* + +/** + * + */ +class NCEntityMapperSpec: + case class UnionMapper(id1: String, id2: String) extends NCEntityMapper: + override def convert(req: NCRequest, entities: JList[NCEntity], toks: JList[NCToken]): JList[NCEntity] = + val es = entities.asScala + + val id1AndId2 = es.filter(e => e.getId == id1 || e.getId == id2) + val other = es.filter(e => !id1AndId2.contains(e)) + + val newEntity = new NCPropertyMapAdapter with NCEntity: + override def getTokens: JList[NCToken] = id1AndId2.flatMap(_.getTokens.asScala).sortBy(_.getIndex).asJava + override def getRequestId: String = req.getRequestId + override def getId: String = s"$id1$id2" + + (other ++ Seq(newEntity)).sortBy(_.getTokens.get(0).getIndex).asJava + + private val mdl: NCTestModelAdapter = new NCTestModelAdapter: + override val getPipeline: NCPipeline = + val pl = mkEnPipeline + import NCSemanticTestElement as TE + pl.getEntityParsers.add(NCTestUtils.mkEnSemanticParser(TE("a"), TE("b"), TE("c"), TE("d"))) + // Replaces [a, b] -> [ab] + pl.getEntityMappers.add(UnionMapper("a", "b")) + // Replaces [c, d] -> [cd] + pl.getEntityMappers.add(UnionMapper("c", "d")) + // Replaces [ab, cd] -> [abcd] + pl.getEntityMappers.add(UnionMapper("ab", "cd")) + pl + + @NCIntent("intent=i term(e)={# == 'abcd'}") + def onMatch(@NCIntentTerm("e") e: NCEntity): NCResult = new NCResult("OK", NCResultType.ASK_RESULT) + + @Test + def test(): Unit = Using.resource(new NCModelClient(mdl)) { client => require(client.ask("a b c d", null, "userId").getBody == "OK")} diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala index 85dec0d..9698d9b 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala @@ -21,6 +21,7 @@ import org.apache.nlpcraft.* import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestPipeline.* +import java.util import java.util.{Optional, ArrayList as JList} /** @@ -38,4 +39,5 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter override val getEntityParsers = new JList[NCEntityParser]() override val getTokenValidators = new JList[NCTokenValidator]() override val getEntityValidators = new JList[NCEntityValidator]() + override val getEntityMappers = new JList[NCEntityMapper]() override def getVariantFilter: Optional[NCVariantFilter] = variantFilter \ No newline at end of file
