This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-369 by this push:
new 58d85dc WIP on NLPCRAFT-369.
58d85dc is described below
commit 58d85dc1a29da9a8da09587059acdfabca06a822
Author: Aaron Radzinski <[email protected]>
AuthorDate: Thu Jul 22 23:22:31 2021 -0700
WIP on NLPCRAFT-369.
---
.../alarm/src/main/resources/alarm_samples.txt | 2 +-
.../nlpcraft/model/intent/NCIdlIntentOptions.scala | 38 ++++++--------
.../model/intent/compiler/NCIdlCompiler.scala | 12 +++--
.../model/intent/solver/NCIntentSolverEngine.scala | 58 +++++++++++++++-------
.../intent/idl/compiler/NCIdlCompilerSpec.scala | 7 ++-
5 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
index 599387c..188d1d6 100644
--- a/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
+++ b/nlpcraft-examples/alarm/src/main/resources/alarm_samples.txt
@@ -19,7 +19,7 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
-Ping me in 3 minutes
+Ping me in 3 minutes tomorrow
Buzz me in an hour and 15mins
Set my alarm for 30s
Please, wake me up in twenty five minutes!
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 262fa1d..4640657 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
@@ -21,28 +21,20 @@ package org.apache.nlpcraft.model.intent
* Intent options container.
*/
class NCIdlIntentOptions {
- /**
- * Whether to ignore unused free words for intent match.
- */
- var ignoreUnusedFreeWords: Boolean = true
-
- /**
- * Whether to ignore unused system tokens for intent match.
- */
- var ignoreUnusedSystemTokens: Boolean = true
-
- /**
- * Whether to ignore unused user tokens for intent match.
- */
- var ignoreUnusedUserTokens: Boolean = false
-
- /**
- * Whether or not to allow intent to match if all matching tokens came
from STM only.
- */
- var allowStmTokenOnly: Boolean = false
+ var ignoreUnusedFreeWords: Boolean = true // Whether to ignore unused free
words for intent match.
+ var ignoreUnusedSystemTokens: Boolean = true // Whether to ignore unused
system tokens for intent match.
+ var ignoreUnusedUserTokens: Boolean = false // Whether to ignore unused
user tokens for intent match.
+ var allowStmTokenOnly: Boolean = false // Whether or not to allow intent
to match if all matching tokens came from STM only.
+ var ordered: Boolean = false // Whether or not the order of term is
important for intent match.
+}
- /**
- * Whether or not the order of term is important for intent match.
- */
- var ordered: Boolean = false
+object NCIdlIntentOptions {
+ /*
+ * JSON field names.
+ */
+ final val JSON_UNUSED_FREE_WORDS = "unused_free_words"
+ final val JSON_UNUSED_SYS_TOKS = "unused_sys_toks"
+ final val JSON_UNUSED_USER_TOKS = "unused_user_toks"
+ final val JSON_ALLOW_STM_ONLY = "allow_stm_only"
+ final val JSON_ORDERED = "ordered"
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index 2066f18..0324f57 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -138,16 +138,18 @@ object NCIdlCompiler extends LazyLogging {
case _ => throw newSyntaxError(s"Expecting boolean value
for intent option: $k")(ctx)
}
+ import NCIdlIntentOptions._
+
for ((k, v) <- json) {
- if (k == "ordered")
+ if (k == JSON_ORDERED)
opts.ordered = boolVal(k, v)
- else if (k == "unused_free_words")
+ else if (k == JSON_UNUSED_FREE_WORDS)
opts.ignoreUnusedFreeWords = boolVal(k, v)
- else if (k == "unused_sys_toks")
+ else if (k == JSON_UNUSED_SYS_TOKS)
opts.ignoreUnusedSystemTokens = boolVal(k, v)
- else if (k == "unused_user_toks")
+ else if (k == JSON_UNUSED_USER_TOKS)
opts.ignoreUnusedUserTokens = boolVal(k, v)
- else if (k == "allow_stm_only")
+ else if (k == JSON_ALLOW_STM_ONLY)
opts.allowStmTokenOnly = boolVal(k, v)
else
throw newSyntaxError(s"Unknown intent option: $k")(ctx)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 8612482..c939a05 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -24,13 +24,12 @@ import org.apache.nlpcraft.common.debug.{NCLogGroupToken,
NCLogHolder}
import org.apache.nlpcraft.common.opencensus.NCOpenCensusTrace
import org.apache.nlpcraft.model.impl.NCTokenLogger
import org.apache.nlpcraft.model.impl.NCTokenPimp._
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction,
NCIdlIntent, NCIdlTerm, NCIdlStackItem => Z}
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction,
NCIdlIntent, NCIdlIntentOptions, NCIdlTerm, NCIdlStackItem => Z}
import org.apache.nlpcraft.model.{NCContext, NCDialogFlowItem, NCIntentMatch,
NCResult, NCToken}
import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager
import java.util.function.Function
import java.util.{List => JList}
-
import scala.collection.mutable
import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsScala,
SeqHasAsJava}
@@ -548,28 +547,49 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
var res: Option[IntentMatch] = None
+ import NCIdlIntentOptions._
+
if (!opts.allowStmTokenOnly && usedSenToks.isEmpty &&
usedConvToks.nonEmpty)
- logger.info(s"Intent '$intentId' ${bo(r("did not match"))}
because all its matched tokens came from STM $varStr.")
+ logger.info(
+ s"Intent '$intentId' ${bo(r("did not match"))} because
all its matched tokens came from STM $varStr. " +
+ s"See intent '${c(JSON_ALLOW_STM_ONLY)}' option."
+ )
else if (!opts.ignoreUnusedFreeWords &&
unusedSenToks.exists(_.token.isFreeWord))
- logger.info(s"Intent '$intentId' ${bo(r("did not match"))}
because of unused free words $varStr.")
+ NCTokenLogger.prepareTable(
+ unusedSenToks.filter(_.token.isFreeWord).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))}
because of unused free words $varStr. " +
+ s"See intent '${c(JSON_UNUSED_FREE_WORDS)}'
option. " +
+ s"Unused free words:"
+ )
+ )
else if (!opts.ignoreUnusedUserTokens &&
unusedSenToks.exists(_.token.isUserDefined))
-
NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
- info(
- logger,
- Some(
- s"Intent '$intentId' ${bo(r("did not match"))}
because of unused user tokens $varStr." +
- s"\nUnused user tokens for intent '$intentId'
$varStr:"
- )
+ NCTokenLogger.prepareTable(
+
unusedSenToks.filter(_.token.isUserDefined).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))}
because of unused user tokens $varStr. " +
+ s"See intent '${c(JSON_UNUSED_USER_TOKS)}' option.
" +
+ s"Unused user tokens:"
)
- else if (!opts.ignoreUnusedSystemTokens &&
unusedSenToks.exists(_.token.isSystemDefined))
-
NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
- info(
- logger,
- Some(
- s"Intent '$intentId' ${bo(r("did not match"))}
because of unused system tokens $varStr." +
- s"\nUnused system tokens for intent
'$intentId' $varStr:"
- )
+ )
+ else if (!opts.ignoreUnusedSystemTokens &&
unusedSenToks.exists(tok => !tok.token.isFreeWord && tok.token.isSystemDefined))
+ NCTokenLogger.prepareTable(
+ unusedSenToks.filter(tok => !tok.token.isFreeWord &&
tok.token.isSystemDefined).map(_.token)
+ ).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))}
because of unused system tokens $varStr. " +
+ s"See intent '${c(JSON_UNUSED_SYS_TOKS)}' option.
" +
+ s"Unused system tokens:"
)
+ )
else {
if (usedSenToks.isEmpty && usedConvToks.isEmpty)
logger.warn(s"Intent '$intentId' ${bo(y("matched"))}
but no tokens were used $varStr.")
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index d1a624f..3106911 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -102,8 +102,11 @@ class NCIdlCompilerSpec {
| */
|intent=i1
| options={
- | 'ordered': true,
- | 'unused_free_words': false
+ | 'ordered': false,
+ | 'unused_free_words': true,
+ | 'unused_sys_toks': true,
+ | 'unused_usr_toks': false,
+ | 'allow_stm_only': false
| }
| flow="a[^0-9]b" // Flow comment.
| term(t1)={has(json("{'a': true, 'b\'2': {'arr': [1, 2,
3]}}"), list("موسكو\"", 'v1\'v1', "k2", "v2"))}