This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-385
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-385 by this push:
new 9c863a2 WIP on NLPCRAFT-385
9c863a2 is described below
commit 9c863a251d135c31caa92e9d1d6512c0b9ecb503
Author: Aaron Radzinski <[email protected]>
AuthorDate: Sat Jul 31 22:58:01 2021 -0700
WIP on NLPCRAFT-385
---
.../nlpcraft/model/intent/NCIdlContext.scala | 16 +--
.../model/intent/compiler/NCIdlCompilerBase.scala | 109 ++++++++++++++-------
.../model/intent/solver/NCIntentSolverEngine.scala | 25 ++---
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 1 +
4 files changed, 98 insertions(+), 53 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlContext.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlContext.scala
index ca38140..435bbf3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlContext.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlContext.scala
@@ -18,23 +18,25 @@
package org.apache.nlpcraft.model.intent
import org.apache.nlpcraft.common.ScalaMeta
-import org.apache.nlpcraft.model.NCRequest
+import org.apache.nlpcraft.model.{NCRequest, NCToken}
import scala.collection.mutable
/**
*
+ * @param toks User input tokens.
* @param intentMeta Intent metadata.
* @param convMeta Conversation metadata.
- * @param fragMeta Optional fragment (argument) metadata passed during intent
fragment reference.
+ * @param fragMeta Fragment (argument) metadata passed during intent fragment
reference.
* @param req Server request holder.
- * @param vars Variable storage.
+ * @param vars Intent variable storage.
*/
case class NCIdlContext(
- intentMeta: ScalaMeta = Map.empty[String, Object],
- convMeta: ScalaMeta = Map.empty[String, Object],
- fragMeta: ScalaMeta = Map.empty[String, Object],
+ toks: Seq[NCToken] = Seq.empty,
+ intentMeta: ScalaMeta = Map.empty,
+ convMeta: ScalaMeta = Map.empty,
+ fragMeta: ScalaMeta = Map.empty,
req: NCRequest,
- vars: mutable.Map[String, NCIdlFunction] = mutable.HashMap.empty[String,
NCIdlFunction]
+ vars: mutable.Map[String, NCIdlFunction] = mutable.HashMap.empty
)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompilerBase.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompilerBase.scala
index 367e338..81ac827 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompilerBase.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompilerBase.scala
@@ -161,6 +161,8 @@ trait NCIdlCompilerBase {
newRuntimeError(s"Expected uniform list type for IDL function
'$fun()', found polymorphic list.", cause)
def rtFunError(fun: String, cause: Exception)(implicit ctx: PRC): NCE =
newRuntimeError(s"Runtime error in IDL function: $fun()", cause)
+ def rtUnavailFunError(fun: String)(implicit ctx: PRC): NCE =
+ newRuntimeError(s"IDL function is unavailable in this context: $fun()")
/**
*
@@ -519,7 +521,7 @@ trait NCIdlCompilerBase {
* @param id
* @return
*/
- def parseCallExpr(id: TN)(implicit ctx: PRC): SI = (tok, stack: S,
termCtx) => {
+ def parseCallExpr(id: TN)(implicit ctx: PRC): SI = (tok, stack: S, idlCtx)
=> {
implicit val evidence: S = stack
val fun = id.getText
@@ -992,13 +994,13 @@ trait NCIdlCompilerBase {
}
})
}
-
+
def doOrElse(): Unit = {
val (x1, x2) = arg2()
-
+
stack.push(() => {
val Z(v1, n1) = x1()
-
+
if (v1 != null)
Z(v1, n1)
else
@@ -1039,10 +1041,10 @@ trait NCIdlCompilerBase {
Z(box(findPart(toToken(tok), toStr(aliasId))), n)
})
}
-
+
def doPartMeta(): Unit = {
val (x1, x2) = arg2()
-
+
stack.push(() => {
val (aliasId, key, n) = extract2(x1, x2)
@@ -1060,17 +1062,41 @@ trait NCIdlCompilerBase {
Z(toToken(t).findPartTokens(toStr(a)), n)
})
}
-
+
def doHasPart(): Unit = {
val (x1, x2) = arg2()
-
+
stack.push(() => {
val (t, a, n) = extract2(x1, x2)
-
+
Z(toToken(t).findPartTokens(toStr(a)).size() == 1, n)
})
}
+ def doIsBefore(f: (NCToken, String) => Boolean): Unit = {
+ val (x1, x2) = arg2()
+
+ stack.push(() => {
+ val (t, a, n) = extract2(x1, x2)
+
+ val tok = toToken(t)
+
+ Z(idlCtx.toks.exists(t => t.getIndex > tok.getIndex && f(t,
toStr(a))), n)
+ })
+ }
+
+ def doIsAfter(f: (NCToken, String) => Boolean): Unit = {
+ val (x1, x2) = arg2()
+
+ stack.push(() => {
+ val (t, a, n) = extract2(x1, x2)
+
+ val tok = toToken(t)
+
+ Z(idlCtx.toks.exists(t => t.getIndex < tok.getIndex && f(t,
toStr(a))), n)
+ })
+ }
+
def doLength(): Unit = {
val x = arg1()
@@ -1108,18 +1134,22 @@ trait NCIdlCompilerBase {
def z[Y](args: () => Y, body: Y => Z): Unit = { val x = args();
stack.push(() => body(x)) }
def z0(body: () => Z): Unit = { popMarker(0); stack.push(() => body())
}
+ def checkAvail(): Unit =
+ if (idlCtx.toks.isEmpty)
+ throw rtUnavailFunError(fun)
+
try
fun match {
// Metadata access.
case "meta_part" => doPartMeta()
case "meta_tok" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(tok.meta[Object](toStr(v))), 1) })
case "meta_model" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(tok.getModel.meta[Object](toStr(v))), 0) })
- case "meta_req" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(termCtx.req.getRequestData.get(toStr(v))), 0) })
- case "meta_user" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(termCtx.req.getUser.meta(toStr(v))), 0) })
- case "meta_company" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(termCtx.req.getCompany.meta(toStr(v))), 0) })
- case "meta_intent" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(termCtx.intentMeta.get(toStr(v)).orNull), 0) })
- case "meta_conv" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(termCtx.convMeta.get(toStr(v)).orNull), 0) })
- case "meta_frag" => z[ST](arg1, { x => val Z(v, f) = x();
Z(box(termCtx.fragMeta.get(toStr(v)).orNull), f) })
+ case "meta_req" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(idlCtx.req.getRequestData.get(toStr(v))), 0) })
+ case "meta_user" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(idlCtx.req.getUser.meta(toStr(v))), 0) })
+ case "meta_company" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(idlCtx.req.getCompany.meta(toStr(v))), 0) })
+ case "meta_intent" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(idlCtx.intentMeta.get(toStr(v)).orNull), 0) })
+ case "meta_conv" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(idlCtx.convMeta.get(toStr(v)).orNull), 0) })
+ case "meta_frag" => z[ST](arg1, { x => val Z(v, f) = x();
Z(box(idlCtx.fragMeta.get(toStr(v)).orNull), f) })
case "meta_sys" => z[ST](arg1, { x => val Z(v, _) = x();
Z(box(U.sysEnv(toStr(v)).orNull), 0) })
// Converts JSON to map.
@@ -1137,6 +1167,17 @@ trait NCIdlCompilerBase {
case "tok_pos" => arg1Tok() match { case x => stack.push(() => {
Z(toToken(x().value).getPos, 1) }) }
case "tok_sparsity" => arg1Tok() match { case x => stack.push(()
=> { Z(toToken(x().value).getSparsity, 1) }) }
case "tok_unid" => arg1Tok() match { case x => stack.push(() => {
Z(toToken(x().value).getUnid, 1) }) }
+
+ case "tok_index" => checkAvail(); arg1Tok() match { case x =>
stack.push(() => { Z(toToken(x().value).getIndex, 1) }) }
+ case "tok_is_last" => checkAvail(); arg1Tok() match { case x =>
stack.push(() => { Z(toToken(x().value).getIndex == idlCtx.toks.size - 1, 1) })
}
+ case "tok_is_first" => checkAvail(); arg1Tok() match { case x =>
stack.push(() => { Z(toToken(x().value).getIndex == 0, 1) }) }
+ case "tok_is_before_id" => checkAvail(); doIsBefore((tok, id) =>
tok.getId == id)
+ case "tok_is_before_group" => checkAvail(); doIsBefore((tok,
grpId) => tok.getGroups.contains(grpId))
+ case "tok_is_before_parent" => checkAvail(); doIsBefore((tok, id)
=> tok.getParentId == id)
+ case "tok_is_after_id" => checkAvail(); doIsAfter((tok, id) =>
tok.getId == id)
+ case "tok_is_after_group" => checkAvail(); doIsAfter((tok, grpId)
=> tok.getGroups.contains(grpId))
+ case "tok_is_after_parent" => checkAvail(); doIsAfter((tok, id) =>
tok.getParentId == id)
+
case "tok_is_abstract" => arg1Tok() match { case x =>
stack.push(() => { Z(toToken(x().value).isAbstract, 1) }) }
case "tok_is_bracketed" => arg1Tok() match { case x =>
stack.push(() => { Z(toToken(x().value).isBracketed, 1) }) }
case "tok_is_direct" => arg1Tok() match { case x => stack.push(()
=> { Z(toToken(x().value).isDirect, 1) }) }
@@ -1161,29 +1202,29 @@ trait NCIdlCompilerBase {
case "tok_find_parts" => doFindParts()
// Request data.
- case "req_id" => z0(() => Z(termCtx.req.getServerRequestId, 0))
- case "req_normtext" => z0(() => Z(termCtx.req.getNormalizedText,
0))
- case "req_tstamp" => z0(() => Z(termCtx.req.getReceiveTimestamp,
0))
- case "req_addr" => z0(() =>
Z(termCtx.req.getRemoteAddress.orElse(null), 0))
- case "req_agent" => z0(() =>
Z(termCtx.req.getClientAgent.orElse(null), 0))
+ case "req_id" => z0(() => Z(idlCtx.req.getServerRequestId, 0))
+ case "req_normtext" => z0(() => Z(idlCtx.req.getNormalizedText, 0))
+ case "req_tstamp" => z0(() => Z(idlCtx.req.getReceiveTimestamp, 0))
+ case "req_addr" => z0(() =>
Z(idlCtx.req.getRemoteAddress.orElse(null), 0))
+ case "req_agent" => z0(() =>
Z(idlCtx.req.getClientAgent.orElse(null), 0))
// User data.
- case "user_id" => z0(() => Z(termCtx.req.getUser.getId, 0))
- case "user_fname" => z0(() =>
Z(termCtx.req.getUser.getFirstName.orElse(null), 0))
- case "user_lname" => z0(() =>
Z(termCtx.req.getUser.getLastName.orElse(null), 0))
- case "user_email" => z0(() =>
Z(termCtx.req.getUser.getEmail.orElse(null), 0))
- case "user_admin" => z0(() => Z(termCtx.req.getUser.isAdmin, 0))
- case "user_signup_tstamp" => z0(() =>
Z(termCtx.req.getUser.getSignupTimestamp, 0))
+ case "user_id" => z0(() => Z(idlCtx.req.getUser.getId, 0))
+ case "user_fname" => z0(() =>
Z(idlCtx.req.getUser.getFirstName.orElse(null), 0))
+ case "user_lname" => z0(() =>
Z(idlCtx.req.getUser.getLastName.orElse(null), 0))
+ case "user_email" => z0(() =>
Z(idlCtx.req.getUser.getEmail.orElse(null), 0))
+ case "user_admin" => z0(() => Z(idlCtx.req.getUser.isAdmin, 0))
+ case "user_signup_tstamp" => z0(() =>
Z(idlCtx.req.getUser.getSignupTimestamp, 0))
// Company data.
- case "comp_id" => z0(() => Z(termCtx.req.getCompany.getId, 0))
- case "comp_name" => z0(() => Z(termCtx.req.getCompany.getName, 0))
- case "comp_website" => z0(() =>
Z(termCtx.req.getCompany.getWebsite.orElse(null), 0))
- case "comp_country" => z0(() =>
Z(termCtx.req.getCompany.getCountry.orElse(null), 0))
- case "comp_region" => z0(() =>
Z(termCtx.req.getCompany.getRegion.orElse(null), 0))
- case "comp_city" => z0(() =>
Z(termCtx.req.getCompany.getCity.orElse(null), 0))
- case "comp_addr" => z0(() =>
Z(termCtx.req.getCompany.getAddress.orElse(null), 0))
- case "comp_postcode" => z0(() =>
Z(termCtx.req.getCompany.getPostalCode.orElse(null), 0))
+ case "comp_id" => z0(() => Z(idlCtx.req.getCompany.getId, 0))
+ case "comp_name" => z0(() => Z(idlCtx.req.getCompany.getName, 0))
+ case "comp_website" => z0(() =>
Z(idlCtx.req.getCompany.getWebsite.orElse(null), 0))
+ case "comp_country" => z0(() =>
Z(idlCtx.req.getCompany.getCountry.orElse(null), 0))
+ case "comp_region" => z0(() =>
Z(idlCtx.req.getCompany.getRegion.orElse(null), 0))
+ case "comp_city" => z0(() =>
Z(idlCtx.req.getCompany.getCity.orElse(null), 0))
+ case "comp_addr" => z0(() =>
Z(idlCtx.req.getCompany.getAddress.orElse(null), 0))
+ case "comp_postcode" => z0(() =>
Z(idlCtx.req.getCompany.getPostalCode.orElse(null), 0))
// String functions.
case "trim" | "strip" => z[ST](arg1, { x => val Z(v, f) = x();
Z(toStr(v).trim, f) })
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 86390ef..d8a3207 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -484,14 +484,15 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
var abort = false
var lastTermMatch: TermMatch = null
- // Conversation metadata (shared across all terms).
- val x = ctx.getConversation.getMetadata
+ val x = ctx.getConversation.getMetadata // Conversation metadata
(shared across all terms).
val convMeta = if (x.isEmpty) Map.empty[String, Object] else
x.asScala.toMap[String, Object]
+ val toks = senToks.map(_.token)
// Check terms.
for (term <- intent.terms if !abort) {
// Fresh context for each term.
- val termCtx = NCIdlContext(
+ val idlCtx = NCIdlContext(
+ toks,
intentMeta = intent.meta,
convMeta = convMeta,
req = ctx.getRequest,
@@ -500,7 +501,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
solveTerm(
term,
- termCtx,
+ idlCtx,
senToks,
if (term.conv) convToks else Seq.empty
) match {
@@ -623,7 +624,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
* Solves term.
*
* @param term
- * @param ctx
+ * @param idlCtx
* @param convToks
* @param senToks
* @return
@@ -631,7 +632,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
@throws[NCE]
private def solveTerm(
term: NCIdlTerm,
- ctx: NCIdlContext,
+ idlCtx: NCIdlContext,
senToks: Seq[IntentToken],
convToks: Seq[IntentToken]
): Option[TermMatch] = {
@@ -639,7 +640,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
logger.warn(s"No tokens available to match on for term
'${term.toAnsiString}'.")
try
- solvePredicate(term.pred, ctx, term.min, term.max, senToks,
convToks) match {
+ solvePredicate(term.pred, idlCtx, term.min, term.max, senToks,
convToks) match {
case Some((usedToks, predWeight)) => Some(
TermMatch(
term.id,
@@ -669,7 +670,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
* Solves term's predicate.
*
* @param pred
- * @param ctx
+ * @param idlCtx
* @param min
* @param max
* @param senToks
@@ -679,7 +680,7 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
@throws[NCE]
private def solvePredicate(
pred: (NCToken, NCIdlContext) => Z,
- ctx: NCIdlContext,
+ idlCtx: NCIdlContext,
min: Int,
max: Int,
senToks: Seq[IntentToken],
@@ -693,12 +694,12 @@ object NCIntentSolverEngine extends LazyLogging with
NCOpenCensusTrace {
var matches = 0
var tokUses = 0
-
+
val allToks = Seq(senToks, convToks)
-
+
// Collect to the 'max' from sentence & conversation, if possible.
for (col <- allToks; tok <- col.filter(!_.used) if
usedToks.lengthCompare(max) < 0) {
- val Z(res, uses) = pred.apply(tok.token, ctx)
+ val Z(res, uses) = pred.apply(tok.token, idlCtx)
res match {
case b: java.lang.Boolean =>
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 504db2f..c370738 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -154,6 +154,7 @@ class NCProbeSynonym(
chunk.kind match {
case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+
case REGEX =>
val r = chunk.regex