This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new ab47e4f WIP.
ab47e4f is described below
commit ab47e4f5a2b075e0597d1f005b32302ae05d9c85
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Jul 7 13:14:03 2021 +0300
WIP.
---
.../model/NCContextWordCategoriesConfig.java | 36 -------------
.../scala/org/apache/nlpcraft/model/NCElement.java | 5 ++
.../apache/nlpcraft/model/NCModelFileAdapter.java | 38 ++------------
.../org/apache/nlpcraft/model/NCModelView.java | 5 --
.../impl/json/NCContextWordModelConfigJson.java | 48 ------------------
.../nlpcraft/model/impl/json/NCElementJson.java | 8 +++
.../nlpcraft/model/impl/json/NCModelJson.java | 7 ---
.../probe/mgrs/conn/NCConnectionManager.scala | 30 ++++++-----
.../probe/mgrs/deploy/NCDeployManager.scala | 59 ++++++++--------------
.../nlpcraft/server/probe/NCProbeManager.scala | 4 +-
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 59 ++++++++++------------
11 files changed, 84 insertions(+), 215 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
deleted file mode 100644
index 0c3b99b..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.model;
-
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-// TODO:
-public interface NCContextWordCategoriesConfig extends Serializable {
- default List<String> getCorpus() {
- return Collections.emptyList();
- }
-
- default boolean useIntentsSamples() {
- return true;
- }
-
- Map<String, Double> getSupportedElements();
-}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 0b7b24d..96cf26d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -382,4 +382,9 @@ public interface NCElement extends NCMetadata, Serializable
{
default Optional<Boolean> isSparse() {
return Optional.empty();
}
+
+ // TODO:
+ default Optional<Double> getCategoryConfidence() {
+ return Optional.empty();
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index 9ed9130..469858e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -66,7 +66,6 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
private final Set<NCElement> elems;
private final List<NCCustomParser> parsers;
private final Map<String, Set<String>> restrictedCombinations;
- private final NCContextWordCategoriesConfig ctxWordMdlCfg;
private final String origin;
@@ -123,7 +122,6 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
this.intents = convertToList(proxy.getIntents(), null);
this.parsers = convertParsers(proxy.getParsers());
this.restrictedCombinations =
convertRestrictedCombinations(proxy.getRestrictedCombinations());
- this.ctxWordMdlCfg = convert(proxy.getContextWordModelConfigJson());
// NOTE: we can only test/check this at this point. Downstream - this
information is lost.
if (proxy.getIntents() != null && intents.size() !=
proxy.getIntents().length)
@@ -266,32 +264,6 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
/**
*
- * @param js
- * @return
- */
- private static NCContextWordCategoriesConfig
convert(NCContextWordModelConfigJson js) {
- return js != null?
- new NCContextWordCategoriesConfig() {
- @Override
- public List<String> getCorpus() {
- return js.getSamples() != null ?
Arrays.asList(js.getSamples()) : null;
- }
-
- @Override
- public boolean useIntentsSamples() {
- return js.isUseIntentsSamples();
- }
-
- @Override
- public Map<String, Double> getSupportedElements() {
- return js.getSupportedElements();
- }
- }:
- null;
- }
-
- /**
- *
* @param proxy
* @param arr
* @return
@@ -385,6 +357,11 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
return nvlOpt(js.isSparse(), proxy.isSparse());
}
+ @Override
+ public Optional<Double> getCategoryConfidence() {
+ return
Optional.ofNullable(js.getCategoryConfidence());
+ }
+
private<T> Optional<T> nvlOpt(T t, T dflt) {
return Optional.of(t != null ? t : dflt);
}
@@ -580,9 +557,4 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
public Map<String, Set<String>> getRestrictedCombinations() {
return restrictedCombinations;
}
-
- @Override
- public Optional<NCContextWordCategoriesConfig>
getContextWordCategoriesConfig() {
- return Optional.ofNullable(ctxWordMdlCfg);
- }
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index d44a1b4..c82ddd2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -1218,9 +1218,4 @@ public interface NCModelView extends NCMetadata {
default Map<String, Set<String>> getRestrictedCombinations() {
return Collections.emptyMap();
}
-
- // TODO:
- default Optional<NCContextWordCategoriesConfig>
getContextWordCategoriesConfig() {
- return Optional.empty();
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
deleted file mode 100644
index 4f7d9a4..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.model.impl.json;
-
-import java.util.Map;
-
-/**
- * TODO:
- */
-public class NCContextWordModelConfigJson {
- private String[] samples = new String[0];
- private boolean useIntentsSamples;
- private Map<String, Double> supportedElements;
-
- public String[] getSamples() {
- return samples;
- }
- public void setSamples(String[] samples) {
- this.samples = samples;
- }
- public boolean isUseIntentsSamples() {
- return useIntentsSamples;
- }
- public void setUseIntentsSamples(boolean useIntentsSamples) {
- this.useIntentsSamples = useIntentsSamples;
- }
- public Map<String, Double> getSupportedElements() {
- return supportedElements;
- }
- public void setSupportedElements(Map<String, Double> supportedElements) {
- this.supportedElements = supportedElements;
- }
-}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index addca45..8217a6a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -36,6 +36,8 @@ public class NCElementJson {
private Boolean isPermutateSynonyms;
// Can be null.
private Boolean isSparse;
+ // Can be null.
+ private Double categoryConfidence;
public String getParentId() {
return parentId;
@@ -97,4 +99,10 @@ public class NCElementJson {
public void setSparse(Boolean sparse) {
isSparse = sparse;
}
+ public Double getCategoryConfidence() {
+ return categoryConfidence;
+ }
+ public void setCategoryConfidence(Double categoryConfidence) {
+ this.categoryConfidence = categoryConfidence;
+ }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index 1c0152d..d2459d3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -40,7 +40,6 @@ public class NCModelJson {
private String[] intents;
private String[] parsers;
private Map<String, String[]> restrictedCombinations;
- private NCContextWordModelConfigJson contextWordModelConfigJson;
private int maxUnknownWords = DFLT_MAX_UNKNOWN_WORDS;
private int maxFreeWords = DFLT_MAX_FREE_WORDS;
@@ -272,10 +271,4 @@ public class NCModelJson {
return restrictedCombinations;
}
public void setRestrictedCombinations(Map<String, String[]>
restrictedCombinations) { this.restrictedCombinations = restrictedCombinations;}
- public NCContextWordModelConfigJson getContextWordModelConfigJson() {
- return contextWordModelConfigJson;
- }
- public void setContextWordModelConfigJson(NCContextWordModelConfigJson
contextWordModelConfigJson) {
- this.contextWordModelConfigJson = contextWordModelConfigJson;
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 53da1c4..3d6a72f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.probe.mgrs.conn
+import scala.compat.java8.OptionConverters._
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.config.NCConfigurable
@@ -217,26 +218,28 @@ object NCConnectionManager extends NCService {
val (
values,
corpus,
- supported
+ categoriesElements
): (
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
java.util.Set[String],
java.util.Map[String, lang.Double]
- ) =
- if (mdl.getContextWordCategoriesConfig.isEmpty)
+ ) = {
+ val ctxCatElems =
mdl.getElements.asScala.flatMap(e =>
+ e.getCategoryConfidence.asScala match {
+ case Some(v) => Some(e.getId -> v)
+ case None => None
+ }
+ ).toMap
+
+ if (ctxCatElems.isEmpty)
(Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap())
else {
- val cfg =
mdl.getContextWordCategoriesConfig.get()
-
- var corpus = if (cfg.getCorpus == null)
Seq.empty else cfg.getCorpus.asScala
-
- if (cfg.useIntentsSamples)
- corpus = corpus ++
wrapper.samples.flatMap(_._2.flatMap(p => p))
+ var corpus =
wrapper.samples.flatMap(_._2.flatMap(p => p))
val values =
mdl.getElements.
asScala.
- filter(p =>
cfg.getSupportedElements.containsKey(p.getId)).
+ filter(p =>
ctxCatElems.contains(p.getId)).
map(e =>
e.getId ->
e.getValues.asScala.map(p =>
p.getName -> {
@@ -248,10 +251,11 @@ object NCConnectionManager extends NCService {
(
values.asJava,
- corpus.toSet.asJava,
- cfg.getSupportedElements
+ corpus.asJava,
+ ctxCatElems.asJava
)
}
+ }
// Model already validated.
@@ -264,7 +268,7 @@ object NCConnectionManager extends NCService {
new
util.HashSet[String](mdl.getEnabledBuiltInTokens),
values,
corpus,
- supported
+ categoriesElements
)
})
), cryptoKey)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 86b0cc4..741ac11 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -421,10 +421,15 @@ object NCDeployManager extends NCService {
s"]")
// Validates context words parameters.
- // TODO:
- val ctxCfgOpt = mdl.getContextWordCategoriesConfig;
+ val elems = mdl.getElements.asScala
+ val ctxCatElems = elems.flatMap(e =>
+ e.getCategoryConfidence.asScala match {
+ case Some(v) => Some(e.getId -> v)
+ case None => None
+ }
+ ).toMap
- if (ctxCfgOpt.isPresent) {
+ if (ctxCatElems.nonEmpty) {
val cnt =
mdl.getElements.asScala.map(_.getValues.asScala.map(_.getSynonyms.size()).sum).sum
if (cnt > MAX_CTXWORD_VALS_CNT)
@@ -437,26 +442,16 @@ object NCDeployManager extends NCService {
s"]"
)
-
- val ctxCfg = ctxCfgOpt.get()
-
- if (ctxCfg.getSupportedElements == null ||
ctxCfg.getSupportedElements.isEmpty)
- // TODO:
- throw new NCE(s"Model doesn't contain supported context word
elements.")
-
- val supportedElems = ctxCfg.getSupportedElements.asScala
- val valsElems = mdl.getElements.asScala.filter(p => p.getValues !=
null && !p.getValues.isEmpty).
+ val valsElems = elems.filter(p => p.getValues != null &&
!p.getValues.isEmpty).
map(p => p.getId -> p.getValues.size()).toMap
- var ids = supportedElems.filter { case (elemId, _) =>
!valsElems.keySet.contains(elemId) }.keys
+ var ids = ctxCatElems.filter { case (elemId, _) =>
!valsElems.keySet.contains(elemId) }.keys
- if (ids.nonEmpty) {
+ if (ids.nonEmpty)
// TODO:
throw new NCE(s"Model doesn't contain values elements with
following identifiers: ${ids.mkString(", ")}")
- }
-
- ids = supportedElems.filter { case (_, conf) => conf < 0 || conf >
1 }.keys
+ ids = ctxCatElems.filter { case (_, conf) => conf < 0 || conf > 1
}.keys
if (ids.nonEmpty)
// TODO:
@@ -556,27 +551,15 @@ object NCDeployManager extends NCService {
val samples = scanSamples(mdl)
- if (ctxCfgOpt.isPresent) {
- if (samples.isEmpty) {
- if (ctxCfgOpt.get.getCorpus.isEmpty)
- // TODO:
- throw new NCE("Model should contains samples for intents
or in context word config.")
-
- }
- else {
- val cnt = samples.size + ctxCfgOpt.get.getCorpus.size()
-
- if (cnt > MAX_CTXWORD_SAMPLES_CNT)
- // TODO: do we need print recommended value.?
- logger.warn(
- s"Too many samples detected for context words elements
[" +
- s"mdlId=$mdlId, " +
- s"cnt=$cnt," +
- s"recommended=$MAX_CTXWORD_SAMPLES_CNT" +
- s"]"
- )
- }
- }
+ if (ctxCatElems.nonEmpty && samples.size > MAX_CTXWORD_SAMPLES_CNT)
+ // TODO: do we need print recommended value.?
+ logger.warn(
+ s"Too many samples detected for context words elements [" +
+ s"mdlId=$mdlId, " +
+ s"cnt=${samples.size}," +
+ s"recommended=$MAX_CTXWORD_SAMPLES_CNT" +
+ s"]"
+ )
NCProbeModel(
model = mdl,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 72d28eb..a6cbd57 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -630,7 +630,7 @@ object NCProbeManager extends NCService {
enabledBuiltInToks,
values,
corpus,
- supported
+ categoriesElements
) =>
require(mdlId != null)
require(mdlName != null)
@@ -657,7 +657,7 @@ object NCProbeManager extends NCService {
}.toMap
}.toMap,
corpus =
corpus.asScala.toSet,
- elements =
supported.asScala.toMap
+ elements =
categoriesElements.asScala.toMap
)
)
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index cc28e6f..ad52e34 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -17,14 +17,14 @@
package org.apache.nlpcraft.model.ctxword
-import org.apache.nlpcraft.model.{NCContext, NCContextWordCategoriesConfig,
NCElement, NCModel, NCResult, NCValue}
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCIntent,
NCIntentSample, NCModel, NCResult, NCValue}
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
-import java.util
import java.util.{Collections, Optional}
+import java.{lang, util}
import scala.collection.mutable.ArrayBuffer
-import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava,
SeqHasAsJava, SetHasAsJava}
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava,
SetHasAsJava}
object NCContextWordSpecModel {
case class Value(name: String, syns: String*) extends NCValue {
@@ -36,6 +36,7 @@ object NCContextWordSpecModel {
override def getId: String = id
override def getValues: util.List[NCValue] = values.asJava
override def getGroups: util.List[String] =
Collections.singletonList("testGroup")
+ override def getCategoryConfidence: Optional[lang.Double] =
Optional.of(level)
}
var expected: String = _
@@ -50,37 +51,29 @@ class NCContextWordSpecModel extends NCModel {
val MDL_LEVEL: java.lang.Double = 0.68
- override def getContextWordCategoriesConfig:
Optional[NCContextWordCategoriesConfig] = {
- Optional.of(
- new NCContextWordCategoriesConfig() {
- override def getSupportedElements: util.Map[String,
java.lang.Double] =
- getElements.asScala.map(e => e.getId ->
MDL_LEVEL).toMap.asJava
-
- override def useIntentsSamples(): Boolean = false
-
- override def getCorpus: util.List[String] =
- Seq(
- "I like drive my new BMW",
- "BMW has the best engine",
- "Luxury cars like Mercedes and BMW are prime targets",
- "BMW will install side air bags up front",
- "I want to change BMW engine",
- "I want to try BMW driver dynamics",
- "BMW has excellent driver protection",
- "BMW pricing are going up",
- "BMW drivers have the highest loyalty",
-
- "A wild cat is very dangerous",
- "A fox eat hens",
- "The fox was already in your chicken house",
-
- "What is the local temperature?",
- "This is the first day of heavy rain",
- "It is the beautiful day, the sun is shining"
- ).asJava
- }
+ @NCIntentSample(
+ Array(
+ "I like drive my new BMW",
+ "BMW has the best engine",
+ "Luxury cars like Mercedes and BMW are prime targets",
+ "BMW will install side air bags up front",
+ "I want to change BMW engine",
+ "I want to try BMW driver dynamics",
+ "BMW has excellent driver protection",
+ "BMW pricing are going up",
+ "BMW drivers have the highest loyalty",
+
+ "A wild cat is very dangerous",
+ "A fox eat hens",
+ "The fox was already in your chicken house",
+
+ "What is the local temperature?",
+ "This is the first day of heavy rain",
+ "It is the beautiful day, the sun is shining"
)
- }
+ )
+ @NCIntent("intent=i term(t)={false}")
+ def x(): NCResult = NCResult.text("OK")
override def getElements: util.Set[NCElement] =
Set(