This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch NLPCRAFT-369 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 4418fa774e166f98e19142ed605a608ba723f5f2 Author: Aaron Radzinski <[email protected]> AuthorDate: Thu Jul 22 22:39:12 2021 -0700 WIP on NLPCRAFT-369. --- .../scala/org/apache/nlpcraft/model/NCToken.java | 9 ++++ .../nlpcraft/model/intent/NCIdlIntentOptions.scala | 5 +++ .../model/intent/compiler/NCIdlCompiler.scala | 23 ++++++++-- .../model/intent/solver/NCIntentSolverEngine.scala | 50 +++++++++++++--------- .../intent/idl/compiler/NCIdlCompilerSpec.scala | 20 +++++++++ 5 files changed, 83 insertions(+), 24 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java index f6d8937..374e667 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java @@ -488,6 +488,15 @@ public interface NCToken extends NCMetadata { } /** + * Tests whether or not this token is not for a user-defined model element. + * + * @return {@code True} if this token is not defined by the user model element, {@code false} otherwise. + */ + default boolean isSystemDefined() { + return !isUserDefined(); + } + + /** * Whether or not this token is abstract. * <p> * An abstract token is only detected when it is either a constituent part of some other non-abstract token diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala index 3249483..262fa1d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala @@ -37,6 +37,11 @@ class NCIdlIntentOptions { var ignoreUnusedUserTokens: Boolean = false /** + * Whether or not to allow intent to match if all matching tokens came from STM only. + */ + var allowStmTokenOnly: Boolean = false + + /** * Whether or not the order of term is important for intent match. */ var ordered: Boolean = false diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala index e5d2653..2066f18 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala @@ -98,6 +98,19 @@ object NCIdlCompiler extends LazyLogging { */ def getCompiledSynonym: NCIdlSynonym = synonym + /** + * + * @param json + * @param ctx + * @return + */ + private def json2Obj(json: String)(ctx: ParserRuleContext): Map[String, Object] = + try + U.jsonToScalaMap(json) + catch { + case e: Exception => throw newSyntaxError(s"Invalid JSON (${e.getMessage})")(ctx) + } + /* * Shared/common implementation. */ @@ -110,9 +123,9 @@ object NCIdlCompiler extends LazyLogging { override def exitCallExpr(ctx: IDP.CallExprContext): Unit = expr += parseCallExpr(ctx.FUN_NAME())(ctx) override def exitAtom(ctx: IDP.AtomContext): Unit = expr += parseAtom(ctx.getText)(ctx) override def exitTermEq(ctx: IDP.TermEqContext): Unit = termConv = ctx.TILDA() != null - override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = U.jsonToScalaMap(ctx.jsonObj().getText) - override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = U.jsonToScalaMap(ctx.jsonObj().getText) - override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(U.jsonToScalaMap(ctx.jsonObj().getText))(ctx) + override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = json2Obj(ctx.jsonObj().getText)(ctx) + override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = json2Obj(ctx.jsonObj().getText)(ctx) + override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(json2Obj(ctx.jsonObj().getText)(ctx))(ctx) override def exitIntentId(ctx: IDP.IntentIdContext): Unit = intentId = ctx.id().getText override def exitAlias(ctx: IDP.AliasContext): Unit = alias = ctx.id().getText @@ -122,7 +135,7 @@ object NCIdlCompiler extends LazyLogging { def boolVal(k: String, v: Object): Boolean = v match { case b: java.lang.Boolean if b != null => b - case _ => throw newSyntaxError(s"Invalid intent option value: $k")(ctx) + case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx) } for ((k, v) <- json) { @@ -134,6 +147,8 @@ object NCIdlCompiler extends LazyLogging { opts.ignoreUnusedSystemTokens = boolVal(k, v) else if (k == "unused_user_toks") opts.ignoreUnusedUserTokens = boolVal(k, v) + else if (k == "allow_stm_only") + opts.allowStmTokenOnly = boolVal(k, v) else throw newSyntaxError(s"Unknown intent option: $k")(ctx) } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala index 65e5280..8612482 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala @@ -129,7 +129,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { * @param used * @param token */ - private case class UsedToken( + private case class UseToken( var used: Boolean, var conv: Boolean, token: NCToken @@ -142,7 +142,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { */ private case class TermMatch( termId: Option[String], - usedTokens: List[UsedToken], + usedTokens: List[UseToken], weight: Weight ) { lazy val maxIndex: Int = usedTokens.maxBy(_.token.index).token.index @@ -155,7 +155,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { */ private case class TermTokensGroup( term: NCIdlTerm, - usedTokens: List[UsedToken] + usedTokens: List[UseToken] ) /** @@ -210,13 +210,13 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { val callback = pair._2 // Isolated sentence tokens. - val senToks = Seq.empty[UsedToken] ++ availToks.map(UsedToken(false, false, _)) + val senToks = Seq.empty[UseToken] ++ availToks.map(UseToken(false, false, _)) val senTokGroups = availToks.map(t => if (t.getGroups != null) t.getGroups.asScala.sorted else Seq.empty) // Isolated conversation tokens. val convToks = if (intent.terms.exists(_.conv)) - Seq.empty[UsedToken] ++ + Seq.empty[UseToken] ++ // We shouldn't mix tokens with same group from conversation // history and processed sentence. ctx.getConversation.getTokens.asScala. @@ -225,9 +225,9 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { !senTokGroups.exists(convTokGroups.containsSlice) }). - map(UsedToken(used = false, conv = true, _)) + map(UseToken(used = false, conv = true, _)) else - Seq.empty[UsedToken] + Seq.empty[UseToken] // Solve intent in isolation. solveIntent(ctx, intent, senToks, convToks, vrnIdx) match { @@ -408,15 +408,15 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { * @param convToks * @return */ - //noinspection DuplicatedCode private def solveIntent( ctx: NCContext, intent: NCIdlIntent, - senToks: Seq[UsedToken], - convToks: Seq[UsedToken], + senToks: Seq[UseToken], + convToks: Seq[UseToken], varIdx: Int ): Option[IntentMatch] = { val intentId = intent.id + val opts = intent.options val flow = NCDialogFlowManager.getDialogFlow(ctx.getRequest.getUser.getId, ctx.getModel.getId) val varStr = s"(variant #${varIdx + 1})" val flowRegex = intent.flowRegex @@ -479,7 +479,6 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { val intentW = new Weight() val intentGrps = mutable.ListBuffer.empty[TermTokensGroup] var abort = false - val opts = intent.options var lastTermMatch: TermMatch = null // Conversation metadata (shared across all terms). @@ -549,17 +548,28 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { var res: Option[IntentMatch] = None - if (usedSenToks.isEmpty && usedConvToks.nonEmpty) + if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty) logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.") - else if (unusedSenToks.exists(_.token.isUserDefined)) + else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord)) + logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.") + else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined)) NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)). info( logger, Some( - s"Intent '$intentId' ${bo(r("did not match"))} because of remaining unused user tokens $varStr." + + s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." + s"\nUnused user tokens for intent '$intentId' $varStr:" ) ) + else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined)) + NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)). + info( + logger, + Some( + s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." + + s"\nUnused system tokens for intent '$intentId' $varStr:" + ) + ) else { if (usedSenToks.isEmpty && usedConvToks.isEmpty) logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.") @@ -598,8 +608,8 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { private def solveTerm( term: NCIdlTerm, ctx: NCIdlContext, - senToks: Seq[UsedToken], - convToks: Seq[UsedToken] + senToks: Seq[UseToken], + convToks: Seq[UseToken] ): Option[TermMatch] = { if (senToks.isEmpty && convToks.isEmpty) logger.warn(s"No tokens available to match on for term '${term.toAnsiString}'.") @@ -648,14 +658,14 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace { ctx: NCIdlContext, min: Int, max: Int, - senToks: Seq[UsedToken], - convToks: Seq[UsedToken] - ): Option[(List[UsedToken], Weight)] = { + senToks: Seq[UseToken], + convToks: Seq[UseToken] + ): Option[(List[UseToken], Weight)] = { // Algorithm is "hungry", i.e. it will fetch all tokens satisfying item's predicate // in entire sentence even if these tokens are separated by other already used tokens // and conversation will be used only to get to the 'max' number of the item. - var usedToks = List.empty[UsedToken] + var usedToks = List.empty[UseToken] var matches = 0 var tokUses = 0 diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala index adb717c..d1a624f 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala @@ -178,6 +178,26 @@ class NCIdlCompilerSpec { checkCompileError( """ |intent=i1 + | options={'ordered': null} + | flow="a[^0-9]b" + | meta={'a': true, 'b': {'Москва': [1, 2, 3]}} + | term(t1)={2 == 2 && size(tok_id()) != -25} + |""".stripMargin + ) + + checkCompileError( + """ + |intent=i1 + | options={'ordered': false, 'ordered': true} + | flow="a[^0-9]b" + | meta={'a': true, 'b': {'Москва': [1, 2, 3]}} + | term(t1)={2 == 2 && size(tok_id()) != -25} + |""".stripMargin + ) + + checkCompileError( + """ + |intent=i1 |/* | * +=====================+ | * | block comments......|
