This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new c020934 WIP.
c020934 is described below
commit c02093405aa3447acc4512bd7ac8dfb36ba1b5ea
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Jun 30 16:27:36 2021 +0300
WIP.
---
.../nlpcraft/model/NCContextWordElementConfig.java | 30 ++++++
.../nlpcraft/model/NCContextWordModelConfig.java | 36 ++++++++
.../scala/org/apache/nlpcraft/model/NCElement.java | 6 --
.../apache/nlpcraft/model/NCModelFileAdapter.java | 77 ++++++++++++++--
.../org/apache/nlpcraft/model/NCModelView.java | 5 +-
.../impl/json/NCContextWordElementScoreJson.java | 42 +++++++++
.../impl/json/NCContextWordModelConfigJson.java | 48 ++++++++++
.../nlpcraft/model/impl/json/NCElementJson.java | 8 --
.../nlpcraft/model/impl/json/NCModelJson.java | 11 +--
.../probe/mgrs/conn/NCConnectionManager.scala | 59 ++++++++----
.../probe/mgrs/deploy/NCDeployManager.scala | 101 ++++++++++++++-------
.../nlpcraft/server/mdo/NCProbeModelMdo.scala | 3 +-
.../enrichers/ctxword/NCContextWordEnricher.scala | 31 ++++++-
.../nlpcraft/server/probe/NCProbeManager.scala | 27 ++++--
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 31 +++++--
15 files changed, 417 insertions(+), 98 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
new file mode 100644
index 0000000..def58e8
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import java.io.Serializable;
+
+// TODO:
+public interface NCContextWordElementConfig extends Serializable {
+ enum NCContextWordElementPolicy {
+ MAX, MIN, AVERAGE, ANY
+ }
+
+ NCContextWordElementPolicy getPolicy();
+ double getScore();
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
new file mode 100644
index 0000000..fc5ac8d
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+// TODO:
+public interface NCContextWordModelConfig extends Serializable {
+ default List<String> getSamples() {
+ return Collections.emptyList();
+ }
+
+ default boolean useIntentsSamples() {
+ return true;
+ }
+
+ Map<String, NCContextWordElementConfig> getSupportedElements();
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 774484f..0b7b24d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -382,10 +382,4 @@ public interface NCElement extends NCMetadata,
Serializable {
default Optional<Boolean> isSparse() {
return Optional.empty();
}
-
- // TODO: 0 .. 1
- // Empty - means disabled.
- default Optional<Double> getContextWordStrictLevel() {
- return Optional.empty();
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index 3310252..83f0e87 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -66,6 +66,7 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
private final Set<NCElement> elems;
private final List<NCCustomParser> parsers;
private final Map<String, Set<String>> restrictedCombinations;
+ private final NCContextWordModelConfig ctxWordMdlCfg;
private final String origin;
@@ -122,6 +123,7 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
this.intents = convertToList(proxy.getIntents(), null);
this.parsers = convertParsers(proxy.getParsers());
this.restrictedCombinations =
convertRestrictedCombinations(proxy.getRestrictedCombinations());
+ this.ctxWordMdlCfg = convert(proxy.getContextWordModelConfigJson());
// NOTE: we can only test/check this at this point. Downstream - this
information is lost.
if (proxy.getIntents() != null && intents.size() !=
proxy.getIntents().length)
@@ -264,6 +266,69 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
/**
*
+ * @param js
+ * @return
+ */
+ private static NCContextWordElementConfig
convert(NCContextWordElementScoreJson js) {
+ return new NCContextWordElementConfig() {
+ @Override
+ public NCContextWordElementPolicy getPolicy() {
+ String policy = js.getPolicy();
+
+ if (policy == null) {
+ // TODO:
+ throw new NCException("Element score policy cannot be
null.");
+ }
+
+ try {
+ return NCContextWordElementPolicy.valueOf(js.getPolicy());
+ }
+ catch (IllegalArgumentException e) {
+ // TODO:
+ throw new NCException("Element score policy invalid
value:" + policy, e);
+ }
+ }
+
+ @Override
+ public double getScore() {
+ // TODO: check here ?
+ return js.getScore();
+ }
+ };
+ }
+
+ /**
+ *
+ * @param js
+ * @return
+ */
+ private static NCContextWordModelConfig
convert(NCContextWordModelConfigJson js) {
+ return js != null?
+ new NCContextWordModelConfig() {
+ @Override
+ public List<String> getSamples() {
+ return js.getSamples() != null ?
Arrays.asList(js.getSamples()) : null;
+ }
+
+ @Override
+ public boolean useIntentsSamples() {
+ return js.isUseIntentsSamples();
+ }
+
+ @Override
+ public Map<String, NCContextWordElementConfig>
getSupportedElements() {
+ Map<String, NCContextWordElementScoreJson> m =
js.getSupportedElements();
+
+ return m != null ?
+
m.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, p ->
convert(p.getValue()))) :
+ null;
+ }
+ }:
+ null;
+ }
+
+ /**
+ *
* @param proxy
* @param arr
* @return
@@ -357,13 +422,6 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
return nvlOpt(js.isSparse(), proxy.isSparse());
}
- @Override
- public Optional<Double> getContextWordStrictLevel() {
- return js.getContextWordStrictLevel() != null ?
- Optional.of(js.getContextWordStrictLevel()) :
-
Optional.ofNullable(proxy.getContextWordStrictLevel());
- }
-
private<T> Optional<T> nvlOpt(T t, T dflt) {
return Optional.of(t != null ? t : dflt);
}
@@ -559,4 +617,9 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
public Map<String, Set<String>> getRestrictedCombinations() {
return restrictedCombinations;
}
+
+ @Override
+ public Optional<NCContextWordModelConfig> getContextWordModelConfig() {
+ return Optional.ofNullable(ctxWordMdlCfg);
+ }
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index f4ba31f..4cf7046 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -1219,9 +1219,8 @@ public interface NCModelView extends NCMetadata {
return Collections.emptyMap();
}
- // TODO: 0 .. 1
- // Empty - means disabled. default. Can be overridden by each elements.
- default Optional<Double> getContextWordStrictLevel() {
+ // TODO:
+ default Optional<NCContextWordModelConfig> getContextWordModelConfig() {
return Optional.empty();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementScoreJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementScoreJson.java
new file mode 100644
index 0000000..df5a8c7
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementScoreJson.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.impl.json;
+
+/**
+ * TODO:
+ */
+public class NCContextWordElementScoreJson {
+ private String policy;
+ private double score;
+
+ public String getPolicy() {
+ return policy;
+ }
+
+ public void setPolicy(String policy) {
+ this.policy = policy;
+ }
+
+ public double getScore() {
+ return score;
+ }
+
+ public void setScore(double score) {
+ this.score = score;
+ }
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
new file mode 100644
index 0000000..2588b87
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.impl.json;
+
+import java.util.Map;
+
+/**
+ * TODO:
+ */
+public class NCContextWordModelConfigJson {
+ private String[] samples = new String[0];
+ private boolean useIntentsSamples;
+ private Map<String, NCContextWordElementScoreJson> supportedElements;
+
+ public String[] getSamples() {
+ return samples;
+ }
+ public void setSamples(String[] samples) {
+ this.samples = samples;
+ }
+ public boolean isUseIntentsSamples() {
+ return useIntentsSamples;
+ }
+ public void setUseIntentsSamples(boolean useIntentsSamples) {
+ this.useIntentsSamples = useIntentsSamples;
+ }
+ public Map<String, NCContextWordElementScoreJson> getSupportedElements() {
+ return supportedElements;
+ }
+ public void setSupportedElements(Map<String,
NCContextWordElementScoreJson> supportedElements) {
+ this.supportedElements = supportedElements;
+ }
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index 992b1c1..addca45 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -36,8 +36,6 @@ public class NCElementJson {
private Boolean isPermutateSynonyms;
// Can be null.
private Boolean isSparse;
- // Can be null.
- private Double contextWordStrictLevel;
public String getParentId() {
return parentId;
@@ -99,10 +97,4 @@ public class NCElementJson {
public void setSparse(Boolean sparse) {
isSparse = sparse;
}
- public Double getContextWordStrictLevel() {
- return contextWordStrictLevel;
- }
- public void setContextWordStrictLevel(Double contextWordStrictLevel) {
- this.contextWordStrictLevel = contextWordStrictLevel;
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index 599c6c4..1c0152d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -40,8 +40,7 @@ public class NCModelJson {
private String[] intents;
private String[] parsers;
private Map<String, String[]> restrictedCombinations;
- // Can be null.
- private Double contextWordStrictLevel;
+ private NCContextWordModelConfigJson contextWordModelConfigJson;
private int maxUnknownWords = DFLT_MAX_UNKNOWN_WORDS;
private int maxFreeWords = DFLT_MAX_FREE_WORDS;
@@ -273,10 +272,10 @@ public class NCModelJson {
return restrictedCombinations;
}
public void setRestrictedCombinations(Map<String, String[]>
restrictedCombinations) { this.restrictedCombinations = restrictedCombinations;}
- public Double getContextWordStrictLevel() {
- return contextWordStrictLevel;
+ public NCContextWordModelConfigJson getContextWordModelConfigJson() {
+ return contextWordModelConfigJson;
}
- public void setContextWordStrictLevel(Double contextWordStrictLevel) {
- this.contextWordStrictLevel = contextWordStrictLevel;
+ public void setContextWordModelConfigJson(NCContextWordModelConfigJson
contextWordModelConfigJson) {
+ this.contextWordModelConfigJson = contextWordModelConfigJson;
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index df9378c..19a0af3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -24,17 +24,18 @@ import org.apache.nlpcraft.common.crypto._
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.socket._
import org.apache.nlpcraft.common.version.NCVersion
+import org.apache.nlpcraft.model.NCContextWordElementConfig
import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.probe.mgrs.cmd.NCCommandManager
import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
import java.io.{EOFException, IOException, InterruptedIOException}
import java.net.{InetAddress, NetworkInterface}
-import java.{lang, util}
+import java.util
import java.util.concurrent.CountDownLatch
import java.util.{Collections, Properties, TimeZone}
import scala.collection.mutable
-import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava,
SetHasAsJava, SetHasAsScala}
+import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava,
MapHasAsScala, SetHasAsJava, SetHasAsScala}
/**
* Probe down/up link connection manager.
@@ -214,31 +215,50 @@ object NCConnectionManager extends NCService {
NCModelManager.getAllModels().map(wrapper => {
val mdl = wrapper.model
- val ctxWordElems =
mdl.getElements.asScala.filter(_.getContextWordStrictLevel.isPresent)
-
val (
values,
samples,
- levels
+ policies,
+ scores
): (
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
java.util.Set[String],
- java.util.Map[String, lang.Double]
+ java.util.Map[String, String],
+ java.util.Map[String, Double]
) =
- if (ctxWordElems.isEmpty)
- (Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap())
+ if (mdl.getContextWordModelConfig.isEmpty)
+ (Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap(), Collections.emptyMap())
else {
+ val cfg =
mdl.getContextWordModelConfig.get()
+
+ var samples = if (cfg.getSamples == null)
Seq.empty else cfg.getSamples.asScala
+
+ if (cfg.useIntentsSamples)
+ samples = samples ++
wrapper.samples.flatMap(_._2.flatMap(p => p))
+
+ val values =
+ mdl.getElements.
+ asScala.
+ filter(p =>
cfg.getSupportedElements.containsKey(p.getId)).
+ map(e =>
+ e.getId ->
+ e.getValues.asScala.map(p =>
p.getName -> {
+ val set: util.Set[String] =
new util.HashSet(p.getSynonyms)
+
+ set
+ }).toMap.asJava
+ ).toMap
+
+ val supported =
cfg.getSupportedElements.asScala
+
+ def getData[T](exract:
NCContextWordElementConfig => T): util.Map[String, T] =
+ supported.map(p => p._1 ->
exract(p._2)).asJava
+
(
- ctxWordElems.map(e =>
- e.getId ->
- e.getValues.asScala.map(p =>
p.getName -> {
- val set: util.Set[String]
= new util.HashSet(p.getSynonyms)
-
- set
- }).toMap.asJava
- ).toMap.asJava,
- wrapper.samples.flatMap(_._2.flatMap(p
=> p)).asJava,
- ctxWordElems.map(e => e.getId ->
e.getContextWordStrictLevel.get()).toMap.asJava
+ values.asJava,
+ samples.toSet.asJava,
+ getData(_.getPolicy.toString),
+ getData(_.getScore)
)
}
@@ -253,7 +273,8 @@ object NCConnectionManager extends NCService {
new
util.HashSet[String](mdl.getEnabledBuiltInTokens),
values,
samples,
- levels
+ policies,
+ scores
)
})
), cryptoKey)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 6cd7cdb..e56f883 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -83,7 +83,8 @@ object NCDeployManager extends NCService {
private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
- private final val MAX_CTXWORD_VALS_CNT = 1000
+ private final val MAX_CTXWORD_VALS_CNT = 10000
+ private final val MAX_CTXWORD_SAMPLES_CNT = 1000
@volatile private var data: mutable.ArrayBuffer[NCProbeModel] = _
@volatile private var mdlFactory: NCModelFactory = _
@@ -420,20 +421,51 @@ object NCDeployManager extends NCService {
s"]")
// Validates context words parameters.
- val ctxWordElems =
mdl.getElements.asScala.filter(_.getContextWordStrictLevel.isPresent)
+ // TODO:
+ val ctxCfgOpt = mdl.getContextWordModelConfig;
- if (ctxWordElems.nonEmpty) {
- val valsSynsCnt =
ctxWordElems.toSeq.map(_.getValues.asScala.map(_.getSynonyms.size()).sum).sum
+ if (ctxCfgOpt.isPresent) {
+ val cnt =
mdl.getElements.asScala.map(_.getValues.asScala.map(_.getSynonyms.size()).sum).sum
- if (valsSynsCnt > MAX_CTXWORD_VALS_CNT) {
+ if (cnt > MAX_CTXWORD_VALS_CNT)
// TODO: do we need print recommended value.?
logger.warn(
s"Too many values synonyms detected for context words
elements [" +
s"mdlId=$mdlId, " +
- s"cnt=$valsSynsCnt," +
+ s"cnt=$cnt," +
s"recommended=$MAX_CTXWORD_VALS_CNT" +
- s"]")
+ s"]"
+ )
+
+
+ val ctxCfg = ctxCfgOpt.get()
+
+ if (ctxCfg.getSupportedElements == null ||
ctxCfg.getSupportedElements.isEmpty)
+ // TODO:
+ throw new NCE(s"Model doesn't contain supported context word
elements.")
+
+ val supportedElems = ctxCfg.getSupportedElements.asScala
+ val valsElems = mdl.getElements.asScala.filter(p => p.getValues !=
null && !p.getValues.isEmpty).
+ map(p => p.getId -> p.getValues.size()).toMap
+
+ var ids = supportedElems.filter { case (elemId, _) =>
!valsElems.keySet.contains(elemId) }.keys
+
+ if (ids.nonEmpty) {
+ // TODO:
+ throw new NCE(s"Model doesn't contain values elements with
following identifiers: ${ids.mkString(", ")}")
}
+
+ ids = supportedElems.filter { case (_, score) => score.getPolicy
== null }.keys
+
+ if (ids.nonEmpty)
+ // TODO:
+ throw new NCE(s"Context word policies are null for elements :
${ids.mkString(", ")}")
+
+ ids = supportedElems.filter { case (_, score) => score.getScore <
0 || score.getScore > 1 }.keys
+
+ if (ids.nonEmpty)
+ // TODO:
+ throw new NCE(s"Context word score are out of range (0..1) for
elements : ${ids.mkString(", ")}")
}
// Discard value loaders.
@@ -527,6 +559,30 @@ object NCDeployManager extends NCService {
val simple = idl(syns.toSet, idl = false)
+ val samples = scanSamples(mdl)
+
+ if (ctxCfgOpt.isPresent) {
+ if (samples.isEmpty) {
+ if (ctxCfgOpt.get.getSamples.isEmpty)
+ // TODO:
+ throw new NCE("Model should contains samples for intents
or in context word config.")
+
+ }
+ else {
+ val cnt = samples.size + ctxCfgOpt.get.getSamples.size()
+
+ if (cnt > MAX_CTXWORD_SAMPLES_CNT)
+ // TODO: do we need print recommended value.?
+ logger.warn(
+ s"Too many samples detected for context words elements
[" +
+ s"mdlId=$mdlId, " +
+ s"cnt=$cnt," +
+ s"recommended=$MAX_CTXWORD_SAMPLES_CNT" +
+ s"]"
+ )
+ }
+ }
+
NCProbeModel(
model = mdl,
solver = solver,
@@ -538,7 +594,7 @@ object NCDeployManager extends NCService {
exclStopWordsStems = exclStopWords,
suspWordsStems = suspWords,
elements = mdl.getElements.asScala.map(elm => (elm.getId,
elm)).toMap,
- samples = scanSamples(mdl, hasCtxWordElems = ctxWordElems.nonEmpty)
+ samples = samples
)
}
@@ -679,7 +735,7 @@ object NCDeployManager extends NCService {
val mf = makeModelFactory(mft)
mf.initialize(Config.modelFactoryProps.getOrElse(Map.empty[String,
String]).asJava)
-
+
mf
case None => new NCBasicModelFactory
@@ -868,18 +924,6 @@ object NCDeployManager extends NCService {
s"elmId=$elmId" +
s"]")
- if (elm.getContextWordStrictLevel.isPresent) {
- val level = elm.getContextWordStrictLevel.get()
-
- if (level < 0 || level > 1) {
- // TODO:
- throw new NCE(s"Model element context word strict level is
out of range [" +
- s"mdlId=${mdl.getId}, " +
- s"elmId=$elmId, " +
- s"level=$level" +
- s"]")
- }
- }
}
/**
@@ -1009,7 +1053,7 @@ object NCDeployManager extends NCService {
@throws[NCE]
private def mkChunk(mdl: NCModel, chunk: String): NCProbeSynonymChunk = {
def stripSuffix(fix: String, s: String): String = s.slice(fix.length,
s.length - fix.length)
-
+
val mdlId = mdl.getId
// Regex synonym.
@@ -1524,7 +1568,7 @@ object NCDeployManager extends NCService {
(claxx.getDeclaredMethods ++ claxx.getMethods).toSet
}
-
+
/**
*
* @param mdl
@@ -1624,7 +1668,7 @@ object NCDeployManager extends NCService {
s"mdlOrigin=${mdl.getOrigin}, " +
s"intentIds=${unusedIntents.map(_.id).mkString("(", ", ",
")")}]"
)
-
+
intents.toSet
}
@@ -1632,10 +1676,9 @@ object NCDeployManager extends NCService {
* Scans given model for intent samples.
*
* @param mdl Model to scan.
- * @param hasCtxWordElems Flag.
*/
@throws[NCE]
- private def scanSamples(mdl: NCModel, hasCtxWordElems: Boolean):
Set[Sample] = {
+ private def scanSamples(mdl: NCModel): Set[Sample] = {
val mdlId = mdl.getId
val samples = mutable.Buffer.empty[Sample]
@@ -1707,12 +1750,6 @@ object NCDeployManager extends NCService {
}
}
- if (hasCtxWordElems && samples.isEmpty) {
- // TODO:
- // TODO: we don't check samples count, and their validity
(provided samples can for another elements.)
- throw new NCE(s"Model with context word elements should contains
samples [id=${mdl.getId}]")
- }
-
samples.toSet
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index adecd40..6abce64 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.server.mdo
+import org.apache.nlpcraft.model.NCContextWordElementConfig
import org.apache.nlpcraft.server.mdo.impl._
@@ -26,7 +27,7 @@ case class NCModelMLConfigMdo(
@NCMdoField modelId: String,
@NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String,
/*Synonym*/Set[String]]],
@NCMdoField samples: Set[String],
- @NCMdoField levels: Map[String /*Element ID*/, Double]
+ @NCMdoField elements: Map[String /*Element ID*/,
NCContextWordElementConfig]
)
/**
* Probe model MDO.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
index 5b1b1c7..81633e0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -22,6 +22,7 @@ import
org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager.stem
import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank._
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.model.NCContextWordElementConfig
import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
import org.apache.nlpcraft.server.nlp.core.{NCNlpParser, NCNlpServerManager,
NCNlpWord}
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
@@ -315,6 +316,30 @@ object NCContextWordEnricher extends NCServerEnricher {
Map.empty[String, ScoreHolder]
}
+ /**
+ *
+ * @param elemScore
+ * @param scores
+ * @return
+ */
+ private def isMatched(elemScore: NCContextWordElementConfig, scores:
Double*): Boolean = {
+ require(scores.nonEmpty)
+
+ import NCContextWordElementConfig.NCContextWordElementPolicy._
+
+ val policy = elemScore.getPolicy
+ val elemScoreVal = elemScore.getScore
+
+ policy match {
+ case MAX => scores.max >= elemScoreVal
+ case MIN => scores.min >= elemScoreVal
+ case AVERAGE => scores.sum / scores.size >= elemScoreVal
+ case ANY => scores.exists(_ >= elemScoreVal)
+
+ case _ => throw new AssertionError(s"Unexpected policy: $policy")
+ }
+ }
+
override def enrich(ns: NCNlpSentence, parent: Span): Unit =
ns.mlConfig match {
case Some(cfg) =>
@@ -370,7 +395,7 @@ object NCContextWordEnricher extends NCServerEnricher {
suggs.lemma.getOrElse(nounTok.lemma,
EXCL_MIN_SCORE),
suggs.normal.getOrElse(nounTok.normText,
EXCL_MIN_SCORE)
).max
- if score >= cfg.levels(elemId)
+ if isMatched(cfg.elements(elemId), score)
)
add(nounTok, elemId, score, score)
@@ -399,14 +424,14 @@ object NCContextWordEnricher extends NCServerEnricher {
(sugg, req) <- resps;
senScore = normalizeScore(sugg.score);
(elemId, mdlSamplesSuggs) <- mdlSamples;
- elemScore = cfg.levels(elemId);
+ elemScore = cfg.elements(elemId);
sampleScore =
Seq(
mdlSamplesSuggs.stems.getOrElse(stem(sugg.word), EXCL_MIN_SCORE),
mdlSamplesSuggs.normal.getOrElse(sugg.word.toLowerCase, EXCL_MIN_SCORE),
mdlSamplesSuggs.lemma.getOrElse(getSuggestionLemma(req, sugg), EXCL_MIN_SCORE)
).max
- if sampleScore >= elemScore && senScore >= elemScore
// TODO: logic
+ if isMatched(elemScore, sampleScore, senScore)
)
add(ns.tokens(req.index), elemId, senScore,
sampleScore)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 8870ffe..3f865ed 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -29,6 +29,8 @@ import org.apache.nlpcraft.common.pool.NCThreadPoolManager
import org.apache.nlpcraft.common.socket.NCSocket
import org.apache.nlpcraft.common.version.NCVersion
import org.apache.nlpcraft.common.{NCService, _}
+import org.apache.nlpcraft.model.NCContextWordElementConfig
+import NCContextWordElementConfig.NCContextWordElementPolicy
import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.server.company.NCCompanyManager
import org.apache.nlpcraft.server.mdo._
@@ -596,7 +598,8 @@ object NCProbeManager extends NCService {
s"probeToken=$probeTkn, " +
s"probeId=$probeId, " +
s"proveGuid=$probeGuid" +
- s"]")
+ s"]"
+ )
if (isMultipleProbeRegistrations(probeKey))
respond("S2P_PROBE_MULTIPLE_INSTANCES")
@@ -616,6 +619,7 @@ object NCProbeManager extends NCService {
java.util.Set[String],
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
java.util.Set[String],
+ java.util.Map[String, String],
java.util.Map[String, Double]
)]]("PROBE_MODELS").
map {
@@ -626,16 +630,18 @@ object NCProbeManager extends NCService {
enabledBuiltInToks,
values,
samples,
- levels
+ policies,
+ scores
) =>
require(mdlId != null)
require(mdlName != null)
require(mdlVer != null)
require(enabledBuiltInToks != null)
require(
- values.isEmpty && samples.isEmpty &&
levels.isEmpty ||
- !values.isEmpty && !samples.isEmpty &&
!levels.isEmpty
+ values.isEmpty && samples.isEmpty &&
policies.isEmpty ||
+ !values.isEmpty && !samples.isEmpty &&
!policies.isEmpty
)
+ require(policies.size() == scores.size())
NCProbeModelMdo(
id = mdlId,
@@ -643,7 +649,9 @@ object NCProbeManager extends NCService {
version = mdlVer,
enabledBuiltInTokens =
enabledBuiltInToks.asScala.toSet,
mlConfig =
- if (!values.isEmpty)
+ if (!values.isEmpty) {
+ val scoresMap = scores.asScala
+
Some(
NCModelMLConfigMdo(
probeId = probeId,
@@ -656,9 +664,16 @@ object NCProbeManager extends NCService {
}.toMap
}.toMap,
samples =
samples.asScala.toSet,
- levels.asScala.toMap
+ policies.asScala.map {
case (elemId, policy) =>
+ elemId -> new
NCContextWordElementConfig() {
+ override def
getPolicy: NCContextWordElementPolicy =
+
NCContextWordElementPolicy.valueOf(policy)
+ override def
getScore: Double = scoresMap(elemId)
+ }
+ }.toMap
)
)
+ }
else
None
)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index cd5dfdc..9f200dd 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -17,17 +17,15 @@
package org.apache.nlpcraft.model.ctxword
-import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch,
NCIntentSample, NCIntentTerm, NCModel, NCResult, NCToken, NCValue}
+import
org.apache.nlpcraft.model.NCContextWordElementConfig.NCContextWordElementPolicy
+import org.apache.nlpcraft.model.{NCContextWordElementConfig,
NCContextWordModelConfig, NCElement, NCIntent, NCIntentMatch, NCIntentSample,
NCIntentTerm, NCModel, NCResult, NCToken, NCValue}
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
+import java.util
import java.util.{Collections, Optional}
-import java.{lang, util}
-import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
+import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava,
SeqHasAsJava, SetHasAsJava}
-/**
- * Test model.
- */
class NCContextWordSpecModel extends NCModel {
override def getId: String = this.getClass.getSimpleName
override def getName: String = this.getClass.getSimpleName
@@ -40,10 +38,15 @@ class NCContextWordSpecModel extends NCModel {
override def getSynonyms: util.List[String] = (Seq(name) ++
syns).asJava
}
+ case class CtxModelConfig(getSupportedElements: util.Map[String,
NCContextWordElementConfig]) extends NCContextWordModelConfig {
+ override def useIntentsSamples(): Boolean = true
+ }
+
+ case class CtxElementConfig(override val getScore: Double, getPolicy:
NCContextWordElementPolicy) extends NCContextWordElementConfig
+
case class Element(id: String, level: Double, values: NCValue*) extends
NCElement {
override def getId: String = id
override def getValues: util.List[NCValue] = values.asJava
- override def getContextWordStrictLevel: Optional[lang.Double] =
Optional.of(level)
override def getGroups: util.List[String] =
Collections.singletonList("testGroup")
}
@@ -51,6 +54,20 @@ class NCContextWordSpecModel extends NCModel {
def apply(id: String, values: NCValue*): Element = new Element(id,
level, values: _*)
}
+ override def getContextWordModelConfig: Optional[NCContextWordModelConfig]
= {
+ Optional.of(
+ CtxModelConfig(
+ getElements.asScala.map(e =>
+ e.getId -> {
+ val score: NCContextWordElementConfig =
CtxElementConfig(level, NCContextWordElementPolicy.MIN)
+
+ score
+ }
+ ).toMap.asJava
+ )
+ )
+ }
+
override def getElements: util.Set[NCElement] =
Set(
Element("class:cars", Value("BMW")),