This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new 85937db WIP.
85937db is described below
commit 85937dba59aaef68157a7d9ac66c7ed4701bc89f
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jul 6 16:50:51 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 2 +-
...fig.java => NCContextWordCategoriesConfig.java} | 4 +-
.../nlpcraft/model/NCContextWordElementConfig.java | 30 -
.../apache/nlpcraft/model/NCModelFileAdapter.java | 49 +-
.../org/apache/nlpcraft/model/NCModelView.java | 2 +-
.../apache/nlpcraft/model/impl/NCTokenLogger.scala | 4 +-
.../impl/json/NCContextWordElementConfigJson.java | 42 --
.../impl/json/NCContextWordModelConfigJson.java | 6 +-
.../probe/mgrs/conn/NCConnectionManager.scala | 28 +-
.../probe/mgrs/deploy/NCDeployManager.scala | 11 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 5 +-
.../nlpcraft/server/mdo/NCProbeModelMdo.scala | 3 +-
.../nlp/enrichers/NCServerEnrichmentManager.scala | 8 +-
.../ctxword/NCContextWordCategoriesEnricher.scala | 664 +++++++++++++++++++++
.../enrichers/ctxword/NCContextWordEnricher.scala | 557 -----------------
.../nlpcraft/server/probe/NCProbeManager.scala | 26 +-
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 32 +-
.../model/ctxword/NCContextWordSpec2.scala | 4 +-
18 files changed, 716 insertions(+), 761 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 6b93614..38c6e46 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -53,7 +53,7 @@ class NCNlpSentence(
val text: String,
val enabledBuiltInToks: Set[String],
val ctxWordConfig: Option[NCCtxWordConfigMdo] = None,
- var ctxWordData: Map[Int, Map[String, java.util.List[Double]]] = Map.empty,
+ var ctxWordCategories: Map[/** Token index*/Int, Map[/** Elements
ID*/String, /** Confidence*/Double]] = Map.empty,
override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new
mutable.ArrayBuffer[NCNlpSentenceToken](32),
var firstProbePhase: Boolean = true,
private val deletedNotes: mutable.HashMap[NCNlpSentenceNote,
Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
similarity index 89%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
index 7f856c4..0c3b99b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordModelConfig.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordCategoriesConfig.java
@@ -23,7 +23,7 @@ import java.util.List;
import java.util.Map;
// TODO:
-public interface NCContextWordModelConfig extends Serializable {
+public interface NCContextWordCategoriesConfig extends Serializable {
default List<String> getCorpus() {
return Collections.emptyList();
}
@@ -32,5 +32,5 @@ public interface NCContextWordModelConfig extends
Serializable {
return true;
}
- Map<String, NCContextWordElementConfig> getSupportedElements();
+ Map<String, Double> getSupportedElements();
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
deleted file mode 100644
index 6e28fd5..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContextWordElementConfig.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.model;
-
-import java.io.Serializable;
-
-// TODO:
-public interface NCContextWordElementConfig extends Serializable {
- enum NCContextWordElementPolicy {
- ALL, ANY, AVERAGE, MEDIAN
- }
-
- NCContextWordElementPolicy getPolicy();
- double getScore();
-}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index fdf8b48..9ed9130 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -66,7 +66,7 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
private final Set<NCElement> elems;
private final List<NCCustomParser> parsers;
private final Map<String, Set<String>> restrictedCombinations;
- private final NCContextWordModelConfig ctxWordMdlCfg;
+ private final NCContextWordCategoriesConfig ctxWordMdlCfg;
private final String origin;
@@ -269,42 +269,9 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
* @param js
* @return
*/
- private static NCContextWordElementConfig
convert(NCContextWordElementConfigJson js) {
- return new NCContextWordElementConfig() {
- @Override
- public NCContextWordElementPolicy getPolicy() {
- String policy = js.getPolicy();
-
- if (policy == null) {
- // TODO:
- throw new NCException("Element score policy cannot be
null.");
- }
-
- try {
- return NCContextWordElementPolicy.valueOf(js.getPolicy());
- }
- catch (IllegalArgumentException e) {
- // TODO:
- throw new NCException("Element score policy invalid
value:" + policy, e);
- }
- }
-
- @Override
- public double getScore() {
- // TODO: check here ?
- return js.getScore();
- }
- };
- }
-
- /**
- *
- * @param js
- * @return
- */
- private static NCContextWordModelConfig
convert(NCContextWordModelConfigJson js) {
+ private static NCContextWordCategoriesConfig
convert(NCContextWordModelConfigJson js) {
return js != null?
- new NCContextWordModelConfig() {
+ new NCContextWordCategoriesConfig() {
@Override
public List<String> getCorpus() {
return js.getSamples() != null ?
Arrays.asList(js.getSamples()) : null;
@@ -316,12 +283,8 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
- public Map<String, NCContextWordElementConfig>
getSupportedElements() {
- Map<String, NCContextWordElementConfigJson> m =
js.getSupportedElements();
-
- return m != null ?
-
m.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, p ->
convert(p.getValue()))) :
- null;
+ public Map<String, Double> getSupportedElements() {
+ return js.getSupportedElements();
}
}:
null;
@@ -619,7 +582,7 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
- public Optional<NCContextWordModelConfig> getContextWordModelConfig() {
+ public Optional<NCContextWordCategoriesConfig>
getContextWordCategoriesConfig() {
return Optional.ofNullable(ctxWordMdlCfg);
}
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index 4cf7046..d44a1b4 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -1220,7 +1220,7 @@ public interface NCModelView extends NCMetadata {
}
// TODO:
- default Optional<NCContextWordModelConfig> getContextWordModelConfig() {
+ default Optional<NCContextWordCategoriesConfig>
getContextWordCategoriesConfig() {
return Optional.empty();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 0e4c541..f9b07f9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -618,9 +618,9 @@ object NCTokenLogger extends LazyLogging {
if (parts.nonEmpty)
s = s"$s, parts=[$parts]"
-
t.meta(s"${t.getId}:scores").asInstanceOf[java.util.List[Double]] match {
+
t.meta(s"${t.getId}:confidence").asInstanceOf[java.lang.Double] match {
case null => // No-op.
- case scores => s = s"$s,
scores='${scores.asScala.map(FMT_NUM.format).mkString(",")}'"
+ case conf => s = s"$s,
confidence=${FMT_NUM.format(conf)}"
}
s
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementConfigJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementConfigJson.java
deleted file mode 100644
index cb701ad..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordElementConfigJson.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.model.impl.json;
-
-/**
- * TODO:
- */
-public class NCContextWordElementConfigJson {
- private String policy;
- private double score;
-
- public String getPolicy() {
- return policy;
- }
-
- public void setPolicy(String policy) {
- this.policy = policy;
- }
-
- public double getScore() {
- return score;
- }
-
- public void setScore(double score) {
- this.score = score;
- }
-}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
index 43e846d..4f7d9a4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCContextWordModelConfigJson.java
@@ -25,7 +25,7 @@ import java.util.Map;
public class NCContextWordModelConfigJson {
private String[] samples = new String[0];
private boolean useIntentsSamples;
- private Map<String, NCContextWordElementConfigJson> supportedElements;
+ private Map<String, Double> supportedElements;
public String[] getSamples() {
return samples;
@@ -39,10 +39,10 @@ public class NCContextWordModelConfigJson {
public void setUseIntentsSamples(boolean useIntentsSamples) {
this.useIntentsSamples = useIntentsSamples;
}
- public Map<String, NCContextWordElementConfigJson> getSupportedElements() {
+ public Map<String, Double> getSupportedElements() {
return supportedElements;
}
- public void setSupportedElements(Map<String,
NCContextWordElementConfigJson> supportedElements) {
+ public void setSupportedElements(Map<String, Double> supportedElements) {
this.supportedElements = supportedElements;
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 9d731db..99d48e2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -24,18 +24,17 @@ import org.apache.nlpcraft.common.crypto._
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.socket._
import org.apache.nlpcraft.common.version.NCVersion
-import org.apache.nlpcraft.model.NCContextWordElementConfig
import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.probe.mgrs.cmd.NCCommandManager
import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
import java.io.{EOFException, IOException, InterruptedIOException}
import java.net.{InetAddress, NetworkInterface}
-import java.util
+import java.{lang, util}
import java.util.concurrent.CountDownLatch
import java.util.{Collections, Properties, TimeZone}
import scala.collection.mutable
-import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava,
MapHasAsScala, SetHasAsJava, SetHasAsScala}
+import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava,
SetHasAsJava, SetHasAsScala}
/**
* Probe down/up link connection manager.
@@ -218,18 +217,16 @@ object NCConnectionManager extends NCService {
val (
values,
corpus,
- policies,
- scores
+ supported
): (
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
java.util.Set[String],
- java.util.Map[String, String],
- java.util.Map[String, Double]
+ java.util.Map[String, lang.Double]
) =
- if (mdl.getContextWordModelConfig.isEmpty)
- (Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap(), Collections.emptyMap())
+ if (mdl.getContextWordCategoriesConfig.isEmpty)
+ (Collections.emptyMap(),
Collections.emptySet(), Collections.emptyMap())
else {
- val cfg =
mdl.getContextWordModelConfig.get()
+ val cfg =
mdl.getContextWordCategoriesConfig.get()
var corpus = if (cfg.getCorpus == null)
Seq.empty else cfg.getCorpus.asScala
@@ -249,16 +246,10 @@ object NCConnectionManager extends NCService {
}).toMap.asJava
).toMap
- val supported =
cfg.getSupportedElements.asScala
-
- def getData[T](exract:
NCContextWordElementConfig => T): util.Map[String, T] =
- supported.map(p => p._1 ->
exract(p._2)).asJava
-
(
values.asJava,
corpus.toSet.asJava,
- getData(_.getPolicy.toString),
- getData(_.getScore)
+ cfg.getSupportedElements
)
}
@@ -273,8 +264,7 @@ object NCConnectionManager extends NCService {
new
util.HashSet[String](mdl.getEnabledBuiltInTokens),
values,
corpus,
- policies,
- scores
+ supported
)
})
), cryptoKey)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 39ea521..d1ece20 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -422,7 +422,7 @@ object NCDeployManager extends NCService {
// Validates context words parameters.
// TODO:
- val ctxCfgOpt = mdl.getContextWordModelConfig;
+ val ctxCfgOpt = mdl.getContextWordCategoriesConfig;
if (ctxCfgOpt.isPresent) {
val cnt =
mdl.getElements.asScala.map(_.getValues.asScala.map(_.getSynonyms.size()).sum).sum
@@ -455,17 +455,12 @@ object NCDeployManager extends NCService {
throw new NCE(s"Model doesn't contain values elements with
following identifiers: ${ids.mkString(", ")}")
}
- ids = supportedElems.filter { case (_, score) => score.getPolicy
== null }.keys
- if (ids.nonEmpty)
- // TODO:
- throw new NCE(s"Context word policies are null for elements :
${ids.mkString(", ")}")
-
- ids = supportedElems.filter { case (_, score) => score.getScore <
0 || score.getScore > 1 }.keys
+ ids = supportedElems.filter { case (_, conf) => conf < 0 || conf >
1 }.keys
if (ids.nonEmpty)
// TODO:
- throw new NCE(s"Context word score are out of range (0..1) for
elements : ${ids.mkString(", ")}")
+ throw new NCE(s"Context word confidences are out of range
(0..1) for elements : ${ids.mkString(", ")}")
}
// Discard value loaders.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 9e1ffdb..efce5fc 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -29,6 +29,7 @@ import
org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants,
NCTokenPartKey, NCProbeSynonym => Synonym}
import java.io.Serializable
+import java.lang
import java.util.{List => JList}
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
@@ -448,7 +449,7 @@ object NCModelEnricher extends NCProbeEnricher {
"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" ->
mdl.model.getId, "txt" -> ns.text
) { span =>
if (ns.firstProbePhase)
- for ((tokIdx, map) <- ns.ctxWordData; (elemId, score) <- map)
+ for ((tokIdx, map) <- ns.ctxWordCategories; (elemId, conf) <-
map)
mark(
ns = ns,
elem =
@@ -456,7 +457,7 @@ object NCModelEnricher extends NCProbeEnricher {
getOrElse(throw new NCE(s"Element not found:
$elemId"))._2,
toks = Seq(ns.tokens(tokIdx)),
direct = true,
- metaOpt = Some(Map("scores" -> score))
+ metaOpt = Some(Map("confidence" ->
lang.Double.valueOf(conf)))
)
val req = NCRequestImpl(senMeta, ns.srvReqId)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index 5ed0ae4..93825f8 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -17,7 +17,6 @@
package org.apache.nlpcraft.server.mdo
-import org.apache.nlpcraft.model.NCContextWordElementConfig
import org.apache.nlpcraft.server.mdo.impl._
@@ -27,7 +26,7 @@ case class NCCtxWordConfigMdo(
@NCMdoField modelId: String,
@NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String,
/*Synonym*/Set[String]]],
@NCMdoField corpus: Set[String],
- @NCMdoField elements: Map[String /*Element ID*/,
NCContextWordElementConfig]
+ @NCMdoField supportedElements: Map[String /*Element ID*/, /*Confidence*/
Double]
)
/**
* Probe model MDO.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 022fad9..2eb600e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -30,7 +30,7 @@ import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher,
NCNlpServerManager}
import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher
import
org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher
-import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordEnricher
+import
org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordCategoriesEnricher
import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateEnricher
import org.apache.nlpcraft.server.nlp.enrichers.geo.NCGeoEnricher
import org.apache.nlpcraft.server.nlp.enrichers.numeric.NCNumericEnricher
@@ -126,7 +126,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
NCCoordinatesEnricher.enrich(s, span)
}
- NCContextWordEnricher.enrich(s, span)
+ NCContextWordCategoriesEnricher.enrich(s, span)
ner(s, enabledBuiltInToks)
@@ -285,7 +285,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
() => NCNumericEnricher.start(span),
() => NCGeoEnricher.start(span),
() => NCCoordinatesEnricher.start(span),
- () => NCContextWordEnricher.start(span)
+ () => NCContextWordCategoriesEnricher.start(span)
)
}
@@ -303,7 +303,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
ackStopping()
if (Config.isBuiltInEnrichers) {
- NCContextWordEnricher.stop(span)
+ NCContextWordCategoriesEnricher.stop(span)
NCCoordinatesEnricher.stop(span)
NCGeoEnricher.stop(span)
NCNumericEnricher.stop(span)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
new file mode 100644
index 0000000..bb8a418
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
@@ -0,0 +1,664 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.nlp.enrichers.ctxword
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.ascii.NCAsciiTable
+import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager.stem
+import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank._
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
+import org.apache.nlpcraft.server.nlp.core.{NCNlpParser, NCNlpServerManager,
NCNlpWord}
+import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
+import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager,
NCSuggestionRequest, NCWordSuggestion}
+import org.jibx.schema.codegen.extend.DefaultNameConverter
+
+import java.text.DecimalFormat
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration.Duration
+
+/**
+ * ContextWord enricher.
+ * TODO: check plurals
+ * TODO: check empty lemma
+ */
+object NCContextWordCategoriesEnricher extends NCServerEnricher {
+ private final val MAX_CTXWORD_SCORE = 2
+ private final val INCL_MAX_CONFIDENCE = 1.0
+
+ private final val DEBUG_MODE = true
+
+ private final val CONVERTER = new DefaultNameConverter
+ private final val FMT = new DecimalFormat("#0.00000")
+
+ private case class Reason(word: String, suggestionConfidence: Double,
corpusConfidence: Double) {
+ override def toString: String =
+ s"Word: $word, suggestionConf=${FMT.format(suggestionConfidence)},
corpusConf=${FMT.format(corpusConfidence)}"
+ }
+
+ private case class Confidence(value: Double, reason: Option[Reason] =
None) {
+ override def toString: String = {
+ val s =
+ reason match {
+ case Some(r) => s"via:'$r'"
+ case None => "direct"
+ }
+
+ s"${FMT.format(value)}($s)}"
+ }
+ }
+
+ private case class ModelProbeKey(probeId: String, modelId: String)
+
+ private case class ElementConfidence(elementId: String, confidence:
Confidence) {
+ override def toString: String = s"Element [id=$elementId,
confidence=$confidence]]"
+ }
+
+ case class ValuesHolder(normal: Map[String, Set[String]], stems:
Map[String, Set[String]]) {
+ private def map2Str(m: Map[String, Set[String]]): String =
+ m.toSeq.flatMap(p => p._2.toSeq.map(x => x -> p._1)).
+ groupBy(_._1).map(p => p._1 -> p._2.map(_._2).
+ mkString("{ ", ", ", " }")).mkString(", ")
+
+ override def toString: String = s"Values [normal=${map2Str(normal)},
stems=${map2Str(stems)}]"
+ }
+
+ case class ElementData(normals: Map[String, Double], stems: Map[String,
Double], lemmas: Map[String, Double]) {
+ def get(norm: String, stem: String, lemma: String): Option[Double] =
+ normals.get(norm) match {
+ case Some(v) => Some(v)
+ case None =>
+ stems.get(stem) match {
+ case Some(v) => Some(v)
+ case None => lemmas.get(lemma)
+ }
+ }
+ }
+
+ // Service which responsible for all confidences calculations.
+ object ConfMath {
+ /**
+ *
+ * @param confs
+ * @return
+ */
+ def calculate(confs: Seq[Double]): Option[Double] =
+ // Drops if there is not enough data.
+ if (confs.length < 3)
+ None
+ else {
+ def avg(seq: Seq[Double]): Double = seq.sum / seq.length
+
+ // Takes 50% of most important (or first 2 at least) and
calculates average value.
+ val n = Math.max((confs.length * 0.5).intValue(), 2)
+
+ Some(avg(confs.sortBy(-_).take(n)))
+ }
+
+ private def calcWeightedGeoMean(vals2Weights: Map[Double, Double]):
Double =
+ Math.pow(
+ vals2Weights.map { case (value, weight) => Math.pow(value,
weight) }.product, 1.0 / vals2Weights.values.sum
+ )
+
+ /**
+ *
+ * @param suggConf
+ * @param corpusConf
+ * @return
+ */
+ def calculate(suggConf: Double, corpusConf: Double): Double =
+ // Corpus data is more important. 1:4 is empirical factor.
+ calcWeightedGeoMean(Map(suggConf -> 1, corpusConf -> 5))
+ }
+
+ @volatile private var valuesStems: mutable.HashMap[ModelProbeKey,
ValuesHolder] = _
+ @volatile private var elemsCorpuses: mutable.HashMap[ModelProbeKey,
Map[String, ElementData]] = _
+ @volatile private var parser: NCNlpParser = _
+
+ override def start(parent: Span = null): NCService =
startScopedSpan("start", parent) { _ =>
+ ackStarting()
+
+ valuesStems = mutable.HashMap.empty
+ elemsCorpuses = mutable.HashMap.empty
+ parser = NCNlpServerManager.getParser
+
+ ackStarted()
+ }
+
+ override def stop(parent: Span = null): Unit =
+ startScopedSpan("stop", parent) { _ =>
+ ackStopping()
+
+ parser = null
+ elemsCorpuses = null
+ valuesStems = null
+
+ ackStopped()
+ }
+
+ /**
+ *
+ * @param s
+ * @return
+ */
+ private def normCase(s: String): String = s.toLowerCase
+
+ /**
+ *
+ * @param awaitable
+ * @tparam T
+ * @return
+ */
+ private def syncExec[T](awaitable : scala.concurrent.Awaitable[T]): T =
Await.result(awaitable, Duration.Inf)
+
+ /**
+ *
+ * @param nlpWords
+ * @param corpusWords
+ * @param corpusWordsStems
+ * @param corpusWordsNorm
+ * @param elemValsSyns
+ * @param elemValuesSynsStems
+ * @param elemValuesSynsNorm
+ * @return
+ */
+ private def mkRequests(
+ nlpWords: Seq[Seq[NCNlpWord]],
+ corpusWords: Seq[Seq[String]],
+ corpusWordsStems: Seq[Seq[String]],
+ corpusWordsNorm: Seq[Seq[String]],
+ elemValsSyns: Set[String],
+ elemValuesSynsStems: Set[String],
+ elemValuesSynsNorm: Set[String]
+ ): Iterable[NCSuggestionRequest] = {
+ require(nlpWords.size == corpusWords.size)
+ require(corpusWords.size == corpusWordsStems.size)
+ require(corpusWords.size == corpusWordsNorm.size)
+ require(elemValsSyns.size == elemValuesSynsStems.size)
+ require(elemValsSyns.size == elemValuesSynsNorm.size)
+
+ corpusWordsStems.
+ zip(corpusWords).
+ zip(corpusWordsNorm).
+ zip(nlpWords).
+ flatMap {
+ case (((corpusWordsStem, corpusWords), corpusWordsNorm),
nlpWords) =>
+ def getIndexes(elemValuesData: Set[String], corpusData:
Seq[String]): Set[Int] =
+ elemValuesData.flatMap(vd => {
+ val i = corpusData.indexOf(vd)
+
+ if (i >= 0) Some(i) else None
+ })
+
+ val idxs =
+ getIndexes(elemValuesSynsStems, corpusWordsStem) ++
getIndexes(elemValuesSynsNorm, corpusWordsNorm)
+
+ def mkRequest(idx: Int, syn: String): NCSuggestionRequest = {
+ var newSen = substitute(corpusWords, syn, idx)
+
+ val nlpWordsNew = parser.parse(newSen.mkString(" "))
+
+ require(nlpWords.size == nlpWordsNew.size)
+
+ val pos = nlpWords(idx).pos
+ val posNew = nlpWordsNew(idx).pos
+
+ if (NOUNS_POS_SINGULAR.contains(pos) &&
NOUNS_POS_PLURALS.contains(posNew)) {
+ println(s"newSen1=$newSen")
+
+ newSen = substitute(corpusWords,
CONVERTER.depluralize(syn), idx)
+
+ println(s"newSen2=$newSen")
+ }
+ else if (NOUNS_POS_PLURALS.contains(pos) &&
NOUNS_POS_SINGULAR.contains(posNew)) {
+ println(s"newSen1=$newSen")
+
+ newSen = substitute(corpusWords,
CONVERTER.pluralize(syn), idx)
+
+ println(s"newSen3=$newSen")
+ }
+
+ NCSuggestionRequest(newSen, idx)
+ }
+
+ for (idx <- idxs; syn <- elemValsSyns)
+ yield mkRequest(idx, syn)
+ }
+ }
+
+ /**
+ *
+ * @param confValue
+ * @return
+ */
+ private def normalizeConfidence(confValue: Double): Double = confValue /
MAX_CTXWORD_SCORE
+
+ /**
+ *
+ * @param cfg
+ * @param key
+ * @return
+ */
+ private def getCorpusData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey,
parent: Span = null):
+ Map[/** Element ID */String, ElementData] =
+ elemsCorpuses.synchronized { elemsCorpuses.get(key) } match {
+ case Some(cache) => cache
+ case None =>
+ val res = askSamples(cfg, parent)
+
+ elemsCorpuses.synchronized { elemsCorpuses += key -> res }
+
+ res
+ }
+
+ /**
+ *
+ * @param cfg
+ * @param key
+ * @return
+ */
+ private def getValuesData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey):
ValuesHolder =
+ valuesStems.synchronized { valuesStems.get(key) } match {
+ case Some(cache) => cache
+ case None =>
+ def mkMap(convert: String => String): Map[String, Set[String]]
=
+ cfg.values.
+ flatMap { case (elemId, vals) => vals.map { case (_,
vals) => vals.map(convert(_) -> elemId) } }.
+ flatten.
+ groupBy { case (converted, _) => converted }.
+ map { case (converted, map) => converted -> map.map
{case (_, elemId) => elemId }.toSet }
+
+ val normsMap = mkMap(normCase)
+ val stemsMap = mkMap(stem)
+
+ val h = ValuesHolder(normal = normsMap, stems =
stemsMap.filter(p => !normsMap.keySet.contains(p._1)))
+
+ valuesStems.synchronized { valuesStems += key -> h }
+
+ h
+ }
+
+ /**
+ *
+ * @param words
+ * @param word
+ * @param index
+ * @return
+ */
+ private def substitute(words: Seq[String], word: String, index: Int):
Seq[String] = {
+ require(index < words.length)
+
+ words.zipWithIndex.map { case (w, i) => if (i != index) w else word }
+ }
+
+ /**
+ *
+ * @param req
+ * @param sugg
+ * @return
+ */
+ private def getLemma(req: NCSuggestionRequest, sugg: NCWordSuggestion):
String =
+ parser.parse(substitute(req.words, sugg.word, req.index).mkString("
"))(req.index).lemma
+
+ /**
+ *
+ * @param cfg
+ * @return
+ */
+ @throws[NCE]
+ private def askSamples(cfg: NCCtxWordConfigMdo, parent: Span = null):
Map[/** Element ID */String, ElementData] = {
+ val corpusSeq = cfg.corpus.toSeq
+ val corpusWords = corpusSeq.map(parser.parse(_).map(_.word))
+ val nlpWords = corpusSeq.map(s => parser.parse(s))
+
+ val corpusWordsStems = corpusWords.map(_.map(stem))
+ val corpusWordsNorm = corpusWords.map(_.map(normCase))
+
+ val recs: Map[String, Seq[NCSuggestionRequest]] =
+ (
+ for (
+ (elemId, elemValues) <- cfg.values.toSeq;
+ // Uses single words synonyms only.
+ elemValuesSyns =
elemValues.flatMap(_._2).toSet.filter(!_.contains(' '));
+ suggReq <- mkRequests(
+ nlpWords = nlpWords,
+ corpusWords = corpusWords,
+ corpusWordsStems = corpusWordsStems,
+ corpusWordsNorm = corpusWordsNorm,
+ elemValsSyns = elemValuesSyns,
+ elemValuesSynsStems = elemValuesSyns.map(stem),
+ elemValuesSynsNorm = elemValuesSyns.map(normCase)
+ )
+ )
+ yield (elemId, suggReq)
+ ).
+ groupBy { case (elemId, _) => elemId }.
+ map { case (elemId, m) => elemId -> m.map(_._2) }
+
+ if (recs.nonEmpty) {
+ val resps =
syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq, parent
= parent))
+
+ if (DEBUG_MODE) {
+ val t = NCAsciiTable()
+
+ t #= ("Request", "Responses")
+
+ for ((req, resp) <- resps) {
+ t += (
+ req,
+ s"${resp.map(p =>
s"${p.word}=${FMT.format(normalizeConfidence(p.score))}").mkString(", ")}"
+ )
+ }
+
+ t.info(logger, Some("Corpus requests:"))
+ }
+
+ val req2Elem = recs.flatMap { case (elemId, recs) => recs.map(p =>
p -> elemId) }
+ val respsSeq: Seq[(NCSuggestionRequest, Seq[NCWordSuggestion])] =
resps.toSeq
+
+ def mkMap(convert: (NCSuggestionRequest, NCWordSuggestion) =>
String):
+ Map[/** Element ID*/ String, /** Word key*/ Map[String, /**
Confidences*/ Seq[Double]]] = {
+ val seq: Seq[(String, Map[String, Double])] =
+ respsSeq.
+ map { case (req, suggs) =>
+ (
+ req2Elem(req),
+ suggs.groupBy(sygg => convert(req, sygg)).
+ // If different word forms have different
confidence (`Abc`- 0.9, `abc`- 0.7),
+ // we use maximum (0.9).
+ map { case (key, suggs) => key ->
suggs.map(p => normalizeConfidence(p.score)).max }
+ )
+ }
+ seq.
+ groupBy { case (elemId, _) => elemId }.
+ map { case (elemId, data) =>
+ elemId ->
+ data.flatMap(_._2).
+ groupBy { case (word, _) => word }.
+ map { case (word, data) => word -> data.map {
case (_, confs) => confs } }
+ }
+ }
+
+ val normals = mkMap { (_, sugg ) => normCase(sugg.word) }
+ val stems = mkMap { (_, sugg ) => stem(sugg.word) }
+ val lemmas = mkMap { (req, sugg ) => getLemma(req, sugg) }
+
+ def mkTable(): NCAsciiTable =
+ if (DEBUG_MODE) {
+ val t = NCAsciiTable()
+
+ t #= ("Element", "Confidences")
+
+ t
+ }
+ else
+ null
+
+ val (tabAll, tabNorm) = (mkTable(), mkTable())
+
+ val res =
+ (normals.keySet ++ stems.keySet ++ lemmas.keySet).map(elemId =>
+ elemId -> {
+ def get[T, K](m: Map[String, Map[T, K]]): Map[T, K] =
m.getOrElse(elemId, Map.empty)
+
+ (get(normals), get(stems), get(lemmas))
+ }
+ ).
+ toMap.
+ map { case (elemId, (normals, stems, lemmas)) =>
+ val normalsAll = normals
+ val stemsAll = stems -- normals.keySet
+ val lemmasAll = lemmas -- normals.keySet -- stems.keySet
+
+ if (DEBUG_MODE)
+ tabAll += (
+ elemId,
+ normalsAll.toSeq.
+ sortBy(p => (-p._2.max, -p._2.size)).map(
+ { case (k, confs) =>
+ s"$k=${confs.sortBy(-_).map(p =>
FMT.format(p)).mkString("{ ", ", ", " }")}" }
+ ).mkString("{ ", ", ", " }"))
+
+ def squeeze(map: Map[String, Seq[Double]]): Map[String,
Double] =
+ map.flatMap { case (wordKey, confs) =>
+ ConfMath.calculate(confs) match {
+ case Some(conf) => Some(wordKey -> conf)
+ case None => None
+ }
+ }
+
+ val normalsNorm = squeeze(normalsAll)
+ val stemsNorm = squeeze(stemsAll)
+ val lemmasNorm = squeeze(lemmasAll)
+
+ if (DEBUG_MODE)
+ tabNorm += (
+ elemId,
+ normalsNorm.toSeq.sortBy(-_._2).
+ map({ case (k, factor) =>
s"$k=${FMT.format(factor)}" }).mkString("{ ", ", ", " }")
+ )
+
+ elemId -> ElementData(normalsNorm, stemsNorm, lemmasNorm)
+ }
+
+ if (DEBUG_MODE) {
+ tabAll.info(logger, Some("Model corpus all confidences"))
+ tabNorm.info(logger, Some("Model corpus normalized
confidences"))
+ }
+
+ res
+ }
+ else
+ Map.empty[String, ElementData]
+ }
+
+ override def enrich(ns: NCNlpSentence, parent: Span): Unit =
+ startScopedSpan("stop", parent) { _ =>
+ ns.ctxWordConfig match {
+ case Some(cfg) =>
+ val detected = mutable.HashMap.empty[NCNlpSentenceToken,
mutable.HashSet[ElementConfidence]]
+
+ def add(nounTok: NCNlpSentenceToken, elemId: String, conf:
Confidence): Unit = {
+ val tokElems = detected.getOrElseUpdate(nounTok,
mutable.HashSet.empty[ElementConfidence])
+
+ tokElems.find(_.elementId == elemId) match {
+ case Some(exConf) =>
+ if (conf.value > exConf.confidence.value) {
+ tokElems += ElementConfidence(elemId, conf)
+ tokElems -= exConf
+ }
+ case None =>
+ tokElems += ElementConfidence(elemId, conf)
+ }
+ }
+
+ val nouns = ns.tokens.filter(t =>
NOUNS_POS.contains(t.pos))
+
+ if (nouns.nonEmpty) {
+ val key = ModelProbeKey(cfg.probeId, cfg.modelId)
+
+ // 1. Values. Direct.
+ val vd = getValuesData(cfg, key)
+
+ val (vNorms, vStems) = (vd.normal, vd.stems)
+
+ if (DEBUG_MODE)
+ logger.info(
+ s"Model loaded [" +
+ s"key=$key, elements: " +
+ s"${cfg.supportedElements.mkString(" ,")},
" +
+ s"values data=$vd]"
+ )
+
+ def get(m: Map[String, Set[String]], key: String):
Set[String] = m.getOrElse(key, Set.empty)
+
+ for (
+ n <- nouns;
+ elemId <- get(vNorms, n.normText) ++ get(vNorms,
normCase(n.lemma)) ++ get(vStems, n.stem)
+ )
+ add(n, elemId, Confidence(INCL_MAX_CONFIDENCE))
+
+ // 2. Via examples.
+ val mdlCorpusData: Map[String, ElementData] =
getCorpusData(cfg, key, parent)
+
+ for (
+ nounTok <- nouns;
+ (elemId, elemData) <- mdlCorpusData;
+ confOpt = elemData.get(nounTok.normText,
nounTok.stem, nounTok.lemma)
+ if confOpt.isDefined && confOpt.get >=
cfg.supportedElements(elemId)
+ )
+ add(nounTok, elemId, Confidence(confOpt.get))
+
+ // 3. Ask for sentence.
+ val idxs = ns.tokens.flatMap(p => if
(p.pos.startsWith("N")) Some(p.index)else None).toSeq
+ val reqs = idxs.map(idx =>
NCSuggestionRequest(ns.tokens.map(_.origText).toSeq, idx))
+
+ val resps: Map[NCWordSuggestion, NCSuggestionRequest] =
+ syncExec(
+ NCSuggestSynonymManager.suggestWords(reqs,
parent = parent)).
+ flatMap { case (req, suggs) => suggs.map(_ ->
req)
+ }
+
+ if (DEBUG_MODE) {
+ val t = NCAsciiTable()
+
+ t #= ("Request", "Responses")
+
+ resps.toSeq.groupBy(_._2.index).foreach { case (_,
seq) =>
+ val sorted = seq.sortBy(-_._1.score)
+
+ t += (
+ sorted.head._2,
+ s"${sorted.map(_._1).
+ map(p =>
s"${p.word}=${FMT.format(normalizeConfidence(p.score))}").
+ mkString(", ")}"
+ )
+ }
+
+ t.info(logger, Some(s"Sentence requests processing
[key=$key, sentence=${ns.text}]"))
+ }
+
+ case class Key(elementId: String, token:
NCNlpSentenceToken)
+
+ val miss = if (DEBUG_MODE) mutable.HashMap.empty[Key,
ArrayBuffer[Confidence]] else null
+
+ for (
+ // Token index (tokIdx) should be correct because
request created from original words,
+ // separated by space, and Suggestion Manager uses
space tokenizer.
+ (sugg, req) <- resps.toSeq.sortBy(_._2.index);
+ suggConf = normalizeConfidence(sugg.score);
+ (elemId, elemData) <- mdlCorpusData;
+ elemConf = cfg.supportedElements(elemId);
+ corpConfOpt = elemData.get(normCase(sugg.word),
stem(sugg.word), getLemma(req, sugg))
+ if corpConfOpt.isDefined;
+ corpConf = corpConfOpt.get;
+ normConf = ConfMath.calculate(suggConf, corpConf)
+ ) {
+ def mkConf(): Confidence = Confidence(normConf,
Some(Reason(sugg.word, suggConf, corpConf)))
+ def getToken: NCNlpSentenceToken =
ns.tokens(req.index)
+
+ if (normConf >= elemConf)
+ add(getToken, elemId, mkConf())
+ else if (DEBUG_MODE)
+ miss.getOrElseUpdate(Key(elemId, getToken),
mutable.ArrayBuffer.empty[Confidence]) +=
+ mkConf()
+ }
+
+ ns.ctxWordCategories = detected.map {
+ case (tok, confs) => tok.index -> confs.map(p =>
p.elementId -> p.confidence.value).toMap
+ }.toMap
+
+ if (DEBUG_MODE) {
+ require(miss != null)
+
+ miss.filter { case (key,_) =>
+ !detected.exists {
+ case (tok, confs) => confs.exists(conf =>
Key(conf.elementId, tok) == key)
+ }
+ }.sortBy(p => (p._1.token.index, p._1.elementId)).
+ foreach { case (key, confs) =>
+ logger.info(
+ s"Unsuccessful attempt [" +
+ s"elementId=${key.elementId}, " +
+
s"tokenWordIndexes=${key.token.wordIndexes.mkString(",")}, " +
+
s"confidences=${confs.sortBy(-_.value).mkString(", ")}" +
+ s"]"
+ )
+ }
+
+ logger.info("Sentence detected elements:")
+
+ for ((tok, elems) <- detected)
+ logger.info(s"${tok.origText}:
${elems.mkString(", ")}")
+ }
+ }
+
+ case None => // No-op.
+ }
+ }
+
+ /**
+ *
+ * @param probeId
+ * @param parent
+ */
+ def onDisconnectProbe(probeId: String, parent: Span = null): Unit =
+ startScopedSpan("onDisconnectProbe", parent) { _ =>
+ valuesStems.synchronized { valuesStems --=
valuesStems.keySet.filter(_.probeId == probeId) }
+ elemsCorpuses.synchronized { elemsCorpuses --=
elemsCorpuses.keySet.filter(_.probeId == probeId) }
+ }
+}
+//
+//object x extends App {
+// /**
+// *
+// * @param vals2Weights
+// * @return
+// */
+// private def calcWeightedGeoMean(vals2Weights: Map[Double, Double]):
Double =
+// Math.pow(
+// vals2Weights.map { case (value, weight) => Math.pow(value,
weight) }.product, 1.0 / vals2Weights.values.sum
+// )
+//
+// lazy val V1 = 1
+// lazy val V2 = 3
+//
+// Seq(
+// 1.0->0.2,
+// 0.4->0.8
+//// 0.29057 -> 0.82184,
+//// 0.18316 -> 0.71606,
+//// 0.23394 -> 0.48252,
+//// 0.29362 -> 0.32973,
+//// 0.23451 -> 0.65216,
+//// 0.63658 -> 0.21005,
+//// 0.25097 -> 0.36217,
+//// 0.51310 -> 0.37854,
+//// 0.40631 -> 0.81649,
+//// 0.21673 -> 0.25714,
+//// 1.0 -> 0.37183,
+//// 0.52308 -> 0.35263,
+//// 0.35516 -> 0.26770,
+// )
+// .foreach { case (v1, v2) => println(calcWeightedGeoMean(Map(v1 ->
V1, v2 -> V2)))}
+//
+//
+//}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
deleted file mode 100644
index f19a342..0000000
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.server.nlp.enrichers.ctxword
-
-import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.ascii.NCAsciiTable
-import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager.stem
-import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceToken}
-import org.apache.nlpcraft.common.{NCE, NCService}
-import org.apache.nlpcraft.model.NCContextWordElementConfig
-import org.apache.nlpcraft.server.mdo.NCCtxWordConfigMdo
-import org.apache.nlpcraft.server.nlp.core.{NCNlpParser, NCNlpServerManager,
NCNlpWord}
-import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
-import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager,
NCSuggestionRequest, NCWordSuggestion}
-import org.jibx.schema.codegen.extend.DefaultNameConverter
-
-import java.text.DecimalFormat
-import scala.collection.mutable
-import scala.concurrent.Await
-import scala.concurrent.duration.Duration
-import scala.jdk.CollectionConverters.SeqHasAsJava
-
-/**
- * ContextWord enricher.
- * TODO: check plurals
- * TODO: check empty lemma
- */
-object NCContextWordEnricher extends NCServerEnricher {
- private final val MAX_CTXWORD_SCORE = 2
- private final val INCL_MAX_SCORE = 1.0
-
- private final val DEBUG_MODE = true
-
- private final val CONVERTER = new DefaultNameConverter
- private final val FMT = new DecimalFormat("#0.00000")
-
- private case class Score(score: Double, reason: Option[String] = None) {
- override def toString: String = {
- val s =
- reason match {
- case Some(v) => s"via:'$v'"
- case None => "direct"
- }
-
- s"${FMT.format(score)}($s)}"
- }
- }
- private case class ModelProbeKey(probeId: String, modelId: String)
- private case class ElementScore(elementId: String, scores: Score*) {
- override def toString: String =
- s"Element [id=$elementId, scores=${scores.sortBy(p =>
-p.score).mkString("{ ", ", ", " }")}]"
- }
-
- // Key - word form (origin, stem). Value - Element IDs set.
- type ElementsByKey = Map[/** Key */ String, /** Element ID */ Set[String]]
-
- object ValuesHolder {
- def apply(normal: ElementsByKey, stems: ElementsByKey): ValuesHolder =
new ValuesHolder(
- normal, stems.filter(p => !normal.keySet.contains(p._1))
- )
- }
-
- class ValuesHolder(val normal: ElementsByKey, val stems: ElementsByKey) {
- private def map2Str(m: Map[String, Set[String]]): String =
- m.toSeq.flatMap(p => p._2.toSeq.map(x => x -> p._1)).
- groupBy(_._1).map(p => p._1 -> p._2.map(_._2).
- mkString("{ ", ", ", " }")).mkString(", ")
-
- override def toString: String = s"Values [normal=${map2Str(normal)},
stems=${map2Str(stems)}]"
- }
-
- // Key - word form (origin, stem, lemma).
- // Scores list which extracted from suggestions for each example (direct
or artificial)
- type ScoreFactors = Map[String, Seq[Double]]
-
- object ScoreHolder {
- def apply(normals: ScoreFactors, stems: ScoreFactors, lemmas:
ScoreFactors): ScoreHolder =
- new ScoreHolder(normals, stems -- normals.keySet, lemmas --
normals.keySet -- stems.keySet)
- }
-
- class ScoreHolder(normals: ScoreFactors, stems: ScoreFactors, lemmas:
ScoreFactors) {
- def get(m: ScoreFactors, key: String): Seq[Double] = m.getOrElse(key,
Seq.empty)
-
- def get(norm: String, stem: String, lemma: String): Seq[Double] =
- get(normals, norm) ++ get(stems, stem) ++ get(lemmas, lemma)
-
- private def sort(m: ScoreFactors): String =
- m.toSeq.
- sortBy(p => (-p._2.max, -p._2.size)).map(
- { case (k, factors) => s"$k=${factors.sortBy(-_).map(p =>
FMT.format(p)).mkString("{ ", ", ", " }")}" }
- ).mkString("{ ", ", ", " }")
-
- override def toString: String = s"Score: ${sort(normals)}"
- }
-
- @volatile private var valuesStems: mutable.HashMap[ModelProbeKey,
ValuesHolder] = _
- @volatile private var corpuses: mutable.HashMap[ModelProbeKey, Map[/**
Element ID */String, ScoreHolder]] = _
-
- @volatile private var parser: NCNlpParser = _
-
- override def start(parent: Span = null): NCService =
startScopedSpan("start", parent) { _ =>
- ackStarting()
-
- valuesStems = mutable.HashMap.empty
- corpuses = mutable.HashMap.empty
- parser = NCNlpServerManager.getParser
-
- ackStarted()
- }
-
- override def stop(parent: Span = null): Unit =
- startScopedSpan("stop", parent) { _ =>
- ackStopping()
-
- parser = null
- corpuses = null
- valuesStems = null
-
- ackStopped()
- }
-
- /**
- *
- * @param awaitable
- * @tparam T
- * @return
- */
- private def syncExec[T](awaitable : scala.concurrent.Awaitable[T]): T =
Await.result(awaitable, Duration.Inf)
-
- /**
- *
- * @param nlpWords
- * @param corpusWords
- * @param corpusWordsStems
- * @param corpusWordsNorm
- * @param elemValuesSyns
- * @param elemValuesSynsStems
- * @param elemValuesSynsNorm
- * @return
- */
- private def mkRequests(
- nlpWords: Seq[Seq[NCNlpWord]],
- corpusWords: Seq[Seq[String]],
- corpusWordsStems: Seq[Seq[String]],
- corpusWordsNorm: Seq[Seq[String]],
- elemValuesSyns: Set[String],
- elemValuesSynsStems: Set[String],
- elemValuesSynsNorm: Set[String]
- ): Iterable[NCSuggestionRequest] = {
- require(nlpWords.size == corpusWords.size)
- require(corpusWords.size == corpusWordsStems.size)
- require(corpusWords.size == corpusWordsNorm.size)
- require(elemValuesSyns.size == elemValuesSynsStems.size)
- require(elemValuesSyns.size == elemValuesSynsNorm.size)
-
- corpusWordsStems.
- zip(corpusWords).
- zip(corpusWordsNorm).
- zip(nlpWords).
- flatMap {
- case (((corpusWordsStem, corpusWords), corpusWordsNorm),
nlpWords) =>
- def getIndexes(elemValuesData: Set[String], corpusData:
Seq[String]): Set[Int] =
- elemValuesData.flatMap(vd => {
- val i = corpusData.indexOf(vd)
-
- if (i >= 0) Some(i) else None
- })
-
- val idxs =
- getIndexes(elemValuesSynsStems, corpusWordsStem) ++
getIndexes(elemValuesSynsNorm, corpusWordsNorm)
-
- def mkRequest(idx: Int, syn: String): NCSuggestionRequest = {
- var newSen = substitute(corpusWords, syn, idx)
-
- val nlpWordsNew = parser.parse(newSen.mkString(" "))
-
- require(nlpWords.size == nlpWordsNew.size)
-
- val pos = nlpWords(idx).pos
- val posNew = nlpWordsNew(idx).pos
-
- if (NOUNS_POS_SINGULAR.contains(pos) &&
NOUNS_POS_PLURALS.contains(posNew)) {
- println(s"newSen1=$newSen")
-
- newSen = substitute(corpusWords,
CONVERTER.depluralize(syn), idx)
-
- println(s"newSen2=$newSen")
- }
- else if (NOUNS_POS_PLURALS.contains(pos) &&
NOUNS_POS_SINGULAR.contains(posNew)) {
- println(s"newSen1=$newSen")
-
- newSen = substitute(corpusWords,
CONVERTER.pluralize(syn), idx)
-
- println(s"newSen3=$newSen")
- }
-
- NCSuggestionRequest(newSen, idx)
- }
-
- for (idx <- idxs; syn <- elemValuesSyns)
- yield mkRequest(idx, syn)
- }
- }
-
- /**
- *
- * @param score
- * @return
- */
- private def normalize(score: Double): Double = score / MAX_CTXWORD_SCORE
-
- /**
- *
- * @param cfg
- * @param key
- * @return
- */
- private def getCorpusData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey,
parent: Span = null):
- Map[/** Element ID */String, ScoreHolder] =
- corpuses.synchronized { corpuses.get(key) } match {
- case Some(cache) => cache
- case None =>
- val res = askSamples(cfg, parent)
-
- corpuses.synchronized { corpuses += key -> res }
-
- res
- }
-
- /**
- *
- * @param cfg
- * @param key
- * @return
- */
- private def getValuesData(cfg: NCCtxWordConfigMdo, key: ModelProbeKey):
ValuesHolder =
- valuesStems.synchronized { valuesStems.get(key) } match {
- case Some(cache) => cache
- case None =>
- def mkMap(convert: String => String): Map[String, Set[String]]
=
- cfg.values.
- flatMap { case (elemId, vals) => vals.map { case (_,
vals) => vals.map(convert(_) -> elemId) } }.
- flatten.
- groupBy { case (converted, _) => converted }.
- map { case (converted, map) => converted -> map.map
{case (_, elemId) => elemId }.toSet }
-
- val res = ValuesHolder(normal = mkMap(_.toLowerCase), stems =
mkMap(stem))
-
- valuesStems.synchronized { valuesStems += key -> res }
-
- res
- }
-
- /**
- *
- * @param words
- * @param word
- * @param index
- * @return
- */
- private def substitute(words: Seq[String], word: String, index: Int):
Seq[String] = {
- require(index < words.length)
-
- words.zipWithIndex.map { case (w, i) => if (i != index) w else word }
- }
-
- /**
- *
- * @param req
- * @param sugg
- * @return
- */
- private def getSuggestionLemma(req: NCSuggestionRequest, sugg:
NCWordSuggestion): String =
- parser.parse(substitute(req.words, sugg.word, req.index).mkString("
"))(req.index).lemma
-
- /**
- *
- * @param cfg
- * @return
- */
- @throws[NCE]
- private def askSamples(cfg: NCCtxWordConfigMdo, parent: Span = null):
Map[/** Element ID */String, ScoreHolder] = {
- val corpusSeq = cfg.corpus.toSeq
- val corpusWords = corpusSeq.map(parser.parse(_).map(_.word))
- val nlpWords = corpusSeq.map(s => parser.parse(s))
-
- val corpusWordsStems = corpusWords.map(_.map(stem))
- val corpusWordsNorm = corpusWords.map(_.map(_.toLowerCase))
-
- val recs: Map[String, Seq[NCSuggestionRequest]] =
- (
- for (
- (elemId, elemValues) <- cfg.values.toSeq;
- // Uses single words synonyms only.
- elemValuesSyns =
elemValues.flatMap(_._2).toSet.filter(!_.contains(' '));
- suggReq <- mkRequests(
- nlpWords = nlpWords,
- corpusWords = corpusWords,
- corpusWordsStems = corpusWordsStems,
- corpusWordsNorm = corpusWordsNorm,
- elemValuesSyns = elemValuesSyns,
- elemValuesSynsStems = elemValuesSyns.map(stem),
- elemValuesSynsNorm = elemValuesSyns.map(_.toLowerCase)
- )
- )
- yield (elemId, suggReq)
- ).
- groupBy { case (elemId, _) => elemId }.
- map { case (elemId, m) => elemId -> m.map(_._2) }
-
- if (recs.nonEmpty) {
- val resps =
syncExec(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2).toSeq, parent
= parent))
-
- if (DEBUG_MODE) {
- val t = NCAsciiTable()
-
- t #= ("Request", "Responses")
-
- for ((req, resp) <- resps) {
- t += (
- req,
- s"${resp.map(p =>
s"${p.word}=${FMT.format(normalize(p.score))}").mkString(", ")}"
- )
- }
-
- t.info(logger, Some("Corpus requests:"))
- }
-
- val respsSeq = resps.toSeq
-
- val req2Elem = recs.flatMap { case (elemId, recs) => recs.map(p =>
p -> elemId) }
-
- def mkMap(convert: (NCSuggestionRequest, NCWordSuggestion) =>
String) = {
- val seq: Seq[(String, Map[String, Seq[Double]])] = respsSeq.
- map { case (req, suggs) =>
- (
- req2Elem(req),
- suggs.groupBy(sygg => convert(req, sygg)).
- map { case (key, suggs) => key -> suggs.map(p
=> normalize(p.score)) }
- )
- }
-
- seq.
- groupBy { case (elemId, _) => elemId }.
- map { case (elemId, data) => elemId -> {
- val factors: Seq[(String, Seq[Double])] =
data.flatMap(_._2)
-
- factors.
- groupBy{ case (word, _) => word }.
- map { case (word, factors) => word ->
factors.flatMap { case (_, factor) => factor } }
- } }
- }
-
- val normalMap: Map[String, Map[String, Seq[Double]]] = mkMap { (_,
sugg ) => sugg.word.toLowerCase }
- val stemMap = mkMap { (_, sugg ) => stem(sugg.word) }
- val lemmaMap = mkMap { (req, sugg ) => getSuggestionLemma(req,
sugg) }
-
- (normalMap.keySet ++ stemMap.keySet ++ lemmaMap.keySet).map(elemId
=>
- elemId ->
- ScoreHolder(
- normals = normalMap.getOrElse(elemId, Map.empty),
- stems = stemMap.getOrElse(elemId, Map.empty),
- lemmas = lemmaMap.getOrElse(elemId, Map.empty)
- )
- ).toMap
- }
- else
- Map.empty[String, ScoreHolder]
- }
-
- /**
- *
- * @param elemScore
- * @param scores
- * @return
- */
- private def isMatched(elemScore: NCContextWordElementConfig, scores:
Double*): Boolean =
- if (scores.nonEmpty) {
- import NCContextWordElementConfig.NCContextWordElementPolicy._
-
- val policy = elemScore.getPolicy
- val elemScoreVal = elemScore.getScore
-
- policy match {
- case MEDIAN =>
- val sorted = scores.sorted
- val mid = sorted.length / 2
- val median = if (sorted.length % 2 == 0) (sorted(mid) +
sorted(mid - 1)) / 2
- else sorted(mid)
-
- median >= elemScoreVal
- case ALL => scores.forall(_ >= elemScoreVal)
- case AVERAGE => scores.sum / scores.size >= elemScoreVal
- case ANY => scores.exists(_ >= elemScoreVal)
-
- case _ => throw new AssertionError(s"Unexpected policy:
$policy")
- }
- }
- else
- false
-
- override def enrich(ns: NCNlpSentence, parent: Span): Unit =
- startScopedSpan("stop", parent) { _ =>
- ns.ctxWordConfig match {
- case Some(cfg) =>
- val detected = mutable.HashMap.empty[NCNlpSentenceToken,
mutable.HashSet[ElementScore]]
-
- def add(
- nounTok: NCNlpSentenceToken, elemId: String, scores:
Score*
- ): Unit = {
- val tokElems = detected.getOrElseUpdate(nounTok,
mutable.HashSet.empty[ElementScore])
-
- tokElems.find(_.elementId == elemId) match {
- case Some(ex) =>
- tokElems += ElementScore(elemId, scores ++
ex.scores:_*)
- tokElems -= ex
- case None =>
- tokElems += ElementScore(elemId, scores:_*)
- }
- }
-
- val nounToks = ns.tokens.filter(t =>
NOUNS_POS.contains(t.pos))
-
- if (nounToks.nonEmpty) {
- val key = ModelProbeKey(cfg.probeId, cfg.modelId)
-
- // 1. Values. Direct.
- val valsData = getValuesData(cfg, key)
-
- if (DEBUG_MODE)
- logger.info(s"Values loaded [key=$key,
data=$valsData]")
-
- def get(m: Map[String, Set[String]], key: String):
Set[String] = m.getOrElse(key, Set.empty)
-
- for (
- nounTok <- nounToks;
- elemId <-
- get(valsData.normal, nounTok.normText) ++
- get(valsData.normal,
nounTok.lemma.toLowerCase) ++
- get(valsData.stems, nounTok.stem)
- )
- add(nounTok, elemId, Score(INCL_MAX_SCORE))
-
- // 2. Via examples.
- val mdlCorpusData: Map[String, ScoreHolder] =
getCorpusData(cfg, key, parent)
-
- if (DEBUG_MODE) {
- val t = NCAsciiTable()
-
- t #= ("Element", "Detailed")
-
- for ((elemId, sh) <- mdlCorpusData)
- t += (elemId, sh)
-
- t.info(logger, Some(s"Model corpus processed
[key=$key]"))
- }
-
- for (
- nounTok <- nounToks;
- (elemId, suggs) <- mdlCorpusData;
- scores = suggs.get(nounTok.normText, nounTok.stem,
nounTok.lemma)
- if isMatched(cfg.elements(elemId), scores :_*);
- score <- scores
- )
- add(nounTok, elemId, Score(score))
-
- // 3. Ask for sentence.
- val idxs = ns.tokens.flatMap(p => if
(p.pos.startsWith("N")) Some(p.index)else None).toSeq
- val reqs = idxs.map(idx =>
NCSuggestionRequest(ns.tokens.map(_.origText).toSeq, idx))
-
- val resps: Map[NCWordSuggestion, NCSuggestionRequest] =
- syncExec(
- NCSuggestSynonymManager.suggestWords(reqs,
parent = parent)).
- flatMap { case (req, suggs) => suggs.map(_ ->
req)
- }
-
- if (DEBUG_MODE) {
- val t = NCAsciiTable()
-
- t #= ("Request", "Responses")
-
- resps.toSeq.groupBy(_._2.index).foreach { case (_,
seq) =>
- val sorted = seq.sortBy(-_._1.score)
-
- t += (
- sorted.head._2,
- s"${sorted.map(_._1).
- map(p =>
s"${p.word}=${FMT.format(normalize(p.score))}").
- mkString(", ")}"
- )
- }
-
- t.info(logger, Some(s"Sentence requests processing
[key=$key, sentence=${ns.text}]"))
- }
-
- for (
- // Token index (tokIdx) should be correct because
request created from original words,
- // separated by space, and Suggestion Manager uses
space tokenizer.
- (sugg, req) <- resps;
- senScore = normalize(sugg.score);
- (elemId, mdlCorpusSuggs) <- mdlCorpusData;
- elemCfg = cfg.elements(elemId);
- corpusScores =
- mdlCorpusSuggs.get(
- sugg.word.toLowerCase,
stem(sugg.word), getSuggestionLemma(req, sugg)
- )
- // TODO:
- if isMatched(elemCfg, senScore) &&
isMatched(elemCfg, corpusScores :_*)
- ) {
- add(ns.tokens(req.index), elemId, Score(senScore,
Some(sugg.word)))
-//
-// for (corpusScore <- corpusScores)
-// add(ns.tokens(req.index), elemId,
Score(corpusScore, Some(sugg.word)))
- }
- }
-
- ns.ctxWordData = detected.map {
- case (tok, scores) => tok.index -> scores.map(p =>
p.elementId -> p.scores.map(_.score).asJava).toMap
- }.toMap
-
- if (DEBUG_MODE) {
- logger.info("Sentence detected elements:")
-
- for ((tok, elems) <- detected)
- logger.info(s"${tok.origText}:
${elems.sortBy(-_.scores.map(_.score).max).mkString(", ")}")
- }
- case None => // No-op.
- }
- }
-
- /**
- *
- * @param probeId
- * @param parent
- */
- def onDisconnectProbe(probeId: String, parent: Span = null): Unit =
- startScopedSpan("onDisconnectProbe", parent) { _ =>
- valuesStems.synchronized { valuesStems --=
valuesStems.keySet.filter(_.probeId == probeId) }
- corpuses.synchronized { corpuses --=
corpuses.keySet.filter(_.probeId == probeId) }
- }
-}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 7551f14..265e398 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -29,13 +29,11 @@ import org.apache.nlpcraft.common.pool.NCThreadPoolManager
import org.apache.nlpcraft.common.socket.NCSocket
import org.apache.nlpcraft.common.version.NCVersion
import org.apache.nlpcraft.common.{NCService, _}
-import org.apache.nlpcraft.model.NCContextWordElementConfig
-import NCContextWordElementConfig.NCContextWordElementPolicy
import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.server.company.NCCompanyManager
import org.apache.nlpcraft.server.mdo._
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager
-import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordEnricher
+import
org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordCategoriesEnricher
import org.apache.nlpcraft.server.proclog.NCProcessLogManager
import org.apache.nlpcraft.server.query.NCQueryManager
import org.apache.nlpcraft.server.sql.NCSql
@@ -261,7 +259,7 @@ object NCProbeManager extends NCService {
mdls --= mdls.keys.filter(id => !probes.exists { case
(_, p) => p.probe.models.exists(_.id == id) })
// TODO: add new interface for server enrichers?
(services)
-
NCContextWordEnricher.onDisconnectProbe(probeKey.probeId)
+
NCContextWordCategoriesEnricher.onDisconnectProbe(probeKey.probeId)
}
case Some(hld) =>
@@ -622,7 +620,6 @@ object NCProbeManager extends NCService {
java.util.Set[String],
java.util.Map[String, java.util.Map[String,
java.util.Set[String]]],
java.util.Set[String],
- java.util.Map[String, String],
java.util.Map[String, Double]
)]]("PROBE_MODELS").
map {
@@ -633,18 +630,13 @@ object NCProbeManager extends NCService {
enabledBuiltInToks,
values,
corpus,
- policies,
- scores
+ supported
) =>
require(mdlId != null)
require(mdlName != null)
require(mdlVer != null)
require(enabledBuiltInToks != null)
- require(
- values.isEmpty && corpus.isEmpty &&
policies.isEmpty ||
- !values.isEmpty && !corpus.isEmpty &&
!policies.isEmpty
- )
- require(policies.size() == scores.size())
+ require(values.isEmpty && corpus.isEmpty ||
!values.isEmpty && !corpus.isEmpty)
NCProbeModelMdo(
id = mdlId,
@@ -653,8 +645,6 @@ object NCProbeManager extends NCService {
enabledBuiltInTokens =
enabledBuiltInToks.asScala.toSet,
ctxWordConfig =
if (!values.isEmpty) {
- val scoresMap = scores.asScala
-
Some(
NCCtxWordConfigMdo(
probeId = probeId,
@@ -667,13 +657,7 @@ object NCProbeManager extends NCService {
}.toMap
}.toMap,
corpus =
corpus.asScala.toSet,
- policies.asScala.map {
case (elemId, policy) =>
- elemId -> new
NCContextWordElementConfig() {
- override def
getPolicy: NCContextWordElementPolicy =
-
NCContextWordElementPolicy.valueOf(policy)
- override def
getScore: Double = scoresMap(elemId)
- }
- }.toMap
+ supportedElements =
supported.asScala.toMap
)
)
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index 3dfeee3..1fb072f 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -17,9 +17,7 @@
package org.apache.nlpcraft.model.ctxword
-import
org.apache.nlpcraft.model.NCContextWordElementConfig.NCContextWordElementPolicy
-import
org.apache.nlpcraft.model.NCContextWordElementConfig.NCContextWordElementPolicy._
-import org.apache.nlpcraft.model.{NCContext, NCContextWordElementConfig,
NCContextWordModelConfig, NCElement, NCModel, NCResult, NCValue}
+import org.apache.nlpcraft.model.{NCContext, NCContextWordCategoriesConfig,
NCElement, NCModel, NCResult, NCValue}
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
@@ -50,20 +48,13 @@ class NCContextWordSpecModel extends NCModel {
override def getName: String = this.getClass.getSimpleName
override def getVersion: String = "1.0.0"
- val MDL_LEVEL = 0.8
- val MDL_POLICY = AVERAGE
+ val MDL_LEVEL: java.lang.Double = 0.7
- override def getContextWordModelConfig: Optional[NCContextWordModelConfig]
= {
+ override def getContextWordCategoriesConfig:
Optional[NCContextWordCategoriesConfig] = {
Optional.of(
- new NCContextWordModelConfig() {
- override def getSupportedElements: util.Map[String,
NCContextWordElementConfig] =
- getElements.asScala.map(e =>
- e.getId ->
- new NCContextWordElementConfig() {
- override def getPolicy: NCContextWordElementPolicy
= MDL_POLICY
- override def getScore: Double = MDL_LEVEL
- }
- ).toMap.asJava
+ new NCContextWordCategoriesConfig() {
+ override def getSupportedElements: util.Map[String,
java.lang.Double] =
+ getElements.asScala.map(e => e.getId ->
MDL_LEVEL).toMap.asJava
override def useIntentsSamples(): Boolean = false
@@ -149,12 +140,11 @@ class NCContextWordSpec extends NCTestContext {
//
// check("I want to have a dog and fox", "class:animal", "dog", "fox")
// check("I fed your fish", "class:animal", "fish")
-
- // check("I like to drive my Porsche and Volkswagen", "class:cars",
"Porsche", "Volkswagen")
-// check("Peugeot added motorcycles to its range in 1901",
"class:cars", "Peugeot", "motorcycles")
//
- //check("The frost is possible today", "class:weather", "frost")
- check("Is vehicle a reliable car ?", "class:weather", "frost")
- //check("There's a very strong wind from the east now",
"class:weather", "wind")
+// check("I like to drive my Porsche and Volkswagen", "class:cars",
"Porsche", "Volkswagen")
+ check("Peugeot added motorcycles to its range in 1901", "class:cars",
"Peugeot")
+
+// check("The frost is possible today", "class:weather", "frost")
+// check("There's a very strong wind from the east now",
"class:weather", "wind")
}
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
index e99e326..9a6577f 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
@@ -17,7 +17,6 @@
package org.apache.nlpcraft.model.ctxword
-import
org.apache.nlpcraft.model.NCContextWordElementConfig.NCContextWordElementPolicy.ANY
import org.apache.nlpcraft.model.{NCContext, NCResult}
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Test
@@ -26,8 +25,7 @@ import org.junit.jupiter.api.Test
* Test model.
*/
class NCContextWordSpecModel2 extends NCContextWordSpecModel {
- override val MDL_LEVEL: Double = 0
- override val MDL_POLICY = ANY
+ override val MDL_LEVEL = 0
override def onContext(ctx: NCContext): NCResult = NCResult.text("OK")
}