This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch web-site
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft-website.git
The following commit(s) were added to refs/heads/web-site by this push:
new c2829fc 1.0.0. fixes.
c2829fc is described below
commit c2829fc5413524d8ca3a4252034cf1cb2e2030a2
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Mar 2 09:51:09 2023 +0400
1.0.0. fixes.
---
data-model.html | 426 --------------------------------------------------------
docs.html | 2 +-
feed.xml | 2 +-
3 files changed, 2 insertions(+), 428 deletions(-)
diff --git a/data-model.html b/data-model.html
deleted file mode 100644
index 2f46923..0000000
--- a/data-model.html
+++ /dev/null
@@ -1,426 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!doctype html><html lang="en"> <script async
src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script>
<script> window.dataLayer = window.dataLayer || []; function
gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config',
'UA-180663034-1'); </script><meta charset="utf-8"><meta
http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport"
content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta
name="description" content="An open sourc [...]
-package org.apache.nlpcraft.examples.lightswitch
-
-import org.apache.nlpcraft.model.{NCIntentTerm, _}
-
-class LightSwitchModel extends NCModelFileAdapter("lightswitch_model.yaml") {
- @NCIntentRef("ls")
- @NCIntentSample(Array(
- "Turn the lights off in the entire house.",
- "Switch on the illumination in the master bedroom closet.",
- "Get the lights on.",
- "Lights up in the kitchen.",
- "Please, put the light out in the upstairs bedroom.",
- "Set the lights on in the entire house.",
- "Turn the lights off in the guest bedroom.",
- "Could you please switch off all the lights?",
- "Dial off illumination on the 2nd floor.",
- "Please, no lights!",
- "Kill off all the lights now!",
- "No lights in the bedroom, please.",
- "Light up the garage, please!"
- ))
- def onMatch(
- @NCIntentTerm("act") actTok: NCToken,
- @NCIntentTerm("loc") locToks: List[NCToken]
- ): NCResult = {
- val status = if (actTok.getId == "ls:on") "on" else "off"
- val locations =
- if (locToks.isEmpty)
- "entire house"
- else
-
locToks.map(_.meta[String]("nlpcraft:nlp:origtext")).mkString(", ")
-
- // Add HomeKit, Arduino or other integration here.
-
- // By default - return a descriptive action string.
- NCResult.text(s"Lights are [$status] in [${locations.toLowerCase}].")
- }
-}
- </pre></div><div class="tab-pane fade show"
id="lightswitch_yaml_model" role="tabpanel"><pre class="brush: js">
-id: "nlpcraft.lightswitch.ex"
-name: "Light Switch Example Model"
-version: "1.0"
-description: "NLI-powered light switch example model."
-macros:
- - name: "<ACTION>"
- macro: "{turn|switch|dial|let|set|get|put}"
- - name: "<KILL>"
- macro: "{shut|kill|stop|eliminate}"
- - name: "<ENTIRE_OPT>"
- macro: "{entire|full|whole|total|_}"
- - name: "<FLOOR_OPT>"
- macro:
"{upstairs|downstairs|{1st|first|2nd|second|3rd|third|4th|fourth|5th|fifth|top|ground}
floor|_}"
- - name: "<TYPE>"
- macro: "{room|closet|attic|loft|{store|storage} {room|_}}"
- - name: "<LIGHT>"
- macro: "{all|_} {it|them|light|illumination|lamp|lamplight}"
-enabledBuiltInTokens: [] # This example doesn't use any built-in tokens.
-
-#
-# Allows for multi-word synonyms in this entire model
-# to be sparse and permutate them for better detection.
-# These two properties generally enable a free-form
-# natural language comprehension.
-#
-permutateSynonyms: true
-sparse: true
-
-elements:
- - id: "ls:loc"
- description: "Location of lights."
- synonyms:
- - "<ENTIRE_OPT> <FLOOR_OPT>
{kitchen|library|closet|garage|office|playroom|{dinning|laundry|play}
<TYPE>}"
- - "<ENTIRE_OPT> <FLOOR_OPT>
{master|kid|children|child|guest|_} {bedroom|bathroom|washroom|storage}
{<TYPE>|_}"
- - "<ENTIRE_OPT> {house|home|building|{1st|first}
floor|{2nd|second} floor}"
-
- - id: "ls:on"
- groups:
- - "act"
- description: "Light switch ON action."
- synonyms:
- - "<ACTION> {on|up|_} <LIGHT> {on|up|_}"
- - "<LIGHT> {on|up}"
-
- - id: "ls:off"
- groups:
- - "act"
- description: "Light switch OFF action."
- synonyms:
- - "<ACTION> <LIGHT> {off|out|down}"
- - "{<ACTION>|<KILL>} {off|out|down} <LIGHT>"
- - "<KILL> <LIGHT>"
- - "<LIGHT> <KILL>"
- - "{out|no|off|down} <LIGHT>"
- - "<LIGHT> {out|off|down}"
-
-intents:
- - "intent=ls term(act)={has(tok_groups, 'act')} term(loc)={# == 'ls:loc'}*"
- </pre></div></div></div><div class="tab-pane fade
show" id="alarm" role="tabpanel"><nav><div class="nav nav-tabs" role="tablist">
<a class="nav-item nav-link active" data-toggle="tab" href="#alarm_java_model"
role="tab"><code>AlarmModel.java</code></a> <a class="nav-item nav-link"
data-toggle="tab" href="#alarm_intents_idl"
role="tab"><code>intents.idl</code></a> <a class="nav-item nav-link"
data-toggle="tab" href="#alarm_json_model" role="tab"><code>alarm_model.j [...]
-package org.apache.nlpcraft.examples.alarm;
-
-import org.apache.nlpcraft.model.*;
-
-import java.time.*;
-import java.util.*;
-
-import static java.time.temporal.ChronoUnit.MILLIS;
-
-public class AlarmModel extends NCModelFileAdapter {
- private static final DateTimeFormatter FMT =
- DateTimeFormatter.ofPattern("HH'h' mm'm'
ss's'").withZone(ZoneId.systemDefault());
-
- private final Timer timer = new Timer();
-
- public AlarmModel() {
- // Loading the model from the file.
- super("alarm_model.json");
- }
-
- @NCIntentRef("alarm") // Intent is defined in JSON model file
(alarm_model.json and intents.idl).
- @NCIntentSampleRef("alarm_samples.txt") // Samples supplied in an external
file.
- NCResult onMatch(
- NCIntentMatch ctx,
- @NCIntentTerm("nums") List<NCToken> numToks
- ) {
- long ms = calculateTime(numToks);
-
- assert ms >= 0;
-
- timer.schedule(
- new TimerTask() {
- @Override
- public void run() {
- System.out.println(
- "BEEP BEEP BEEP for: " +
ctx.getContext().getRequest().getNormalizedText() + ""
- );
- }
- },
- ms
- );
-
- return NCResult.text("Timer set for: " +
FMT.format(LocalDateTime.now().plus(ms, MILLIS)));
- }
-
- @Override
- public void onDiscard() {
- // Clean up when model gets discarded (e.g. during testing).
- timer.cancel();
- }
-
- public static long calculateTime(List<NCToken> numToks) {
- LocalDateTime now = LocalDateTime.now();
- LocalDateTime dt = now;
-
- for (NCToken num : numToks) {
- String unit = num.meta("nlpcraft:num:unit");
-
- // Skip possible fractional to simplify.
- long v = ((Double)num.meta("nlpcraft:num:from")).longValue();
-
- if (v <= 0)
- throw new NCRejection("Value must be positive: " + unit);
-
- switch (unit) {
- case "second": { dt = dt.plusSeconds(v); break; }
- case "minute": { dt = dt.plusMinutes(v); break; }
- case "hour": { dt = dt.plusHours(v); break; }
- case "day": { dt = dt.plusDays(v); break; }
- case "week": { dt = dt.plusWeeks(v); break; }
- case "month": { dt = dt.plusMonths(v); break; }
- case "year": { dt = dt.plusYears(v); break; }
-
- default:
- // It shouldn't be an assertion, because 'datetime' unit
can be extended outside.
- throw new NCRejection("Unsupported time unit: " + unit);
- }
- }
-
- return now.until(dt, MILLIS);
- }
-}
- </pre></div><div class="tab-pane fade show"
id="alarm_intents_idl" role="tabpanel"><pre class="brush: idl">
-// Fragments (mostly for demo purposes here).
-fragment=buzz term~{# == 'x:alarm'}
-fragment=when
- term(nums)~{
- // Demonstrating term variables.
- @type = meta_tok('nlpcraft:num:unittype')
- @iseq = meta_tok('nlpcraft:num:isequalcondition') // Excludes
conditional statements.
-
- # == 'nlpcraft:num' && @type == 'datetime' && @iseq == true
- }[1,7]
-
-// Intents (using fragments).
-intent=alarm
- fragment(buzz)
- fragment(when)
- </pre></div><div class="tab-pane fade show"
id="alarm_json_model" role="tabpanel"><pre class="brush: js">
-{
- "id": "nlpcraft.alarm.ex",
- "name": "Alarm Example Model",
- "version": "1.0",
- "description": "Alarm example model.",
- "enabledBuiltInTokens": [
- "nlpcraft:num"
- ],
- "elements": [
- {
- "id": "x:alarm",
- "description": "Alarm token indicator.",
- "synonyms": [
- "{ping|buzz|wake|call|hit} {me|up|me up|_}",
- "{set|_} {my|_} {wake|wake up|_}
{alarm|timer|clock|buzzer|call} {clock|_} {up|_}"
- ]
- }
- ],
- "intents": [
- "import('intents.idl')" // Import intents from external file.
- ]
-}
- </pre></div></div></div></div><p> Further sub-sections
will provide details on model's static configuration and dynamic programmable
logic implementation.</p></section><section id="dataflow"><h2
class="section-title">Model Dataflow <a href="#"><i class="top-link fas fa-fw
fa-angle-double-up"></i></a></h2><figure> <img alt="data model dataflow"
class="img-fluid" src="/images/homepage-fig1.1.png"><figcaption><b>Fig 1.</b>
NLPCraft Architecture</figcaption></figure>< [...]
-{
- "id": "user.defined.id",
- "name": "User Defined Name",
- "version": "1.0",
- "description": "Short model description.",
- "enabledBuiltInTokens": ["google:person", "google:location"]
- "macros": [],
- "metadata": {},
- "elements": [
- {
- "id": "x:id",
- "description": "",
- "groups": [],
- "parentId": "",
- "synonyms": [],
- "metadata": {},
- "values": []
- }
- ],
- ...
- "intents": []
-}
- </pre></div><div class="tab-pane fade show" id="model-yaml"
role="tabpanel"><pre class="brush: js">
-id: "user.defined.id"
-name: "User Defined Name"
-version: "1.0"
-description: "Short model description."
-macros:
-enabledBuiltInTokens:
-elements:
- - id: "x:id"
- description: ""
- synonyms:
- groups:
- values:
- parentId:
- metadata:
-...
-intents:
- </pre></div></div><div class="bq success"><div
class="bq-idea-container"><div><div>💡</div></div><div> Note that using
JSON/YAML-based configuration is a <b>canonical way</b> for creating data
models in NLPCraft as it allows to cleanly separate static configuration from
model's programmable logic.</div></div></div></section><section id="ne"><h2
class="section-title">Named Entities <a href="#"><i class="top-link fas fa-fw
fa-angle-double-up"></i></a></h2><p> Named entity, a [...]
- ...
- "elements": [
- {
- "id": "transport.vehicle",
- "description": "Transportation vehicle",
- "synonyms": [
- "car",
- "truck",
- "light duty truck"
- "heavy duty truck"
- "sedan",
- "coupe"
- ]
- }
- ]
- ...
- </pre><p> While adding multi-word synonyms looks somewhat trivial - in
real models, the naive approach can lead to thousands and even tens of
thousands of possible synonyms due to words, grammar, and linguistic
permutations - which quickly becomes untenable if performed manually.</p><p>
NLPCraft provides an effective tool for a compact synonyms representation.
Instead of listing all possible multi-word synonyms one by one you can use
combination of following techniques:</p><ul><l [...]
- ...
- "macros": [
- {
- "name": "<TRUCK_TYPE>",
- "macro": "{light duty|heavy duty|half ton|1/2 ton|3/4
ton|one ton|super duty}"
- }
- ]
- "elements": [
- {
- "id": "transport.vehicle",
- "description": "Transportation vehicle",
- "synonyms": [
- "car",
- "{<TRUCK_TYPE>|_} {pickup|_} truck"
- "sedan",
- "coupe"
- ],
- "values": [
- {
- "value": "mercedes",
- "synonyms": ["mercedes-ben{z|s}", "mb", "ben{z|s}"]
- },
- {
- "value": "bmw",
- "synonyms": ["{bimmer|bimer|beemer}", "bayerische
motoren werke"]
- }
- {
- "value": "chevrolet",
- "synonyms": ["chevy"]
- }
- ]
- }
- ]
- ...
- </pre><p> With that setup <code>transport.vehicle</code> element will
be recognized by any of the following input
string:</p><ul><li><code>car</code><li><code>benz</code> (with value
<code>mercedes</code>)<li><code>3/4 ton pickup truck</code><li><code>light duty
truck</code><li><code>chevy</code> (with value
<code>chevrolet</code>)<li><code>bimmer</code> (with value
<code>bmw</code>)<li><code>transport.vehicle</code></ul><span id="groups"
class="section-sub-title">Element Groups [...]
-+-- vehicle
-| +--truck
-| | |-- light.duty.truck
-| | |-- heavy.duty.truck
-| | +-- medium.duty.truck
-| +--car
-| | |-- coupe
-| | |-- sedan
-| | |-- hatchback
-| | +-- wagon
- </pre><p> Then in our intent, for example, we could look for any token
with root parent ID <code>vehicle</code> or immediate parent ID
<code>truck</code> or <code>car</code> without a need to match on all current
and future individual sub-IDs. For example:</p><pre class="brush: idl">
- intent=vehicle.intent term~{has(tok_ancestors, 'vehicle')}
- intent=truck.intent term~{tok_parent == 'truck'}
- intent=car.intent term~{tok_parent == 'car'}
- </pre></section><section id="syns-tools"> <span id="macros"
class="section-sub-title">Macros <a href="#"><i class="top-link fas fa-fw
fa-angle-double-up"></i></a></span><p> Listing all possible multi-word synonyms
for a given element can be a time-consuming task. Macros together with option
groups allow for significant simplification of this task. Macros allow you to
give a name to an often used set of words or option groups and reuse it without
repeating those words or option gr [...]
- "macros": [
- {
- "name": "<A>",
- "macro": "aaa"
- },
- {
- "name": "<B>",
- "macro": "<A> bbb"
- },
- {
- "name": "<C>",
- "macro": "<A> bbb {z|w}"
- }
- ]
- </pre><span id="option-groups" class="section-sub-title">Option Groups
<a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></span><p>
Option groups are similar to wildcard patterns that operates on a single word
base. One line of option group expands into one or more individual synonyms.
Option groups is the key mechanism for shortened synonyms notation. The
following examples demonstrate how to use option groups.</p><p> Consider the
following macros defined belo [...]
- ...
- "macros": [
- {
- "name": "<TRUCK_TYPE>",
- "macro": "{ {light|super|heavy|medium} duty|half ton|1/2
ton|3/4 ton|one ton}"
- }
- ]
- "elements": [
- {
- "id": "transport.vehicle",
- "description": "Transportation vehicle",
- "synonyms": [
- "car",
- "{<TRUCK_TYPE>|_} {pickup|_} truck"
- "sedan",
- "coupe"
- ]
- }
- ]
- ...
- </pre><span id="regex" class="section-sub-title">Regular Expressions
<a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></span><p>
Any individual synonym word that starts and ends with <code>//</code> (two
forward slashes) is considered to be Java regular expression as defined in
<code>java.util.regex.Pattern</code>. Note that regular expression can only
span a single word, i.e. only individual words from the user input will be
matched against given regular expr [...]
- "synonyms": [
- "{foo|//[bar].+//}}"
- ]
- </pre><p> will match word <code>foo</code> or any other strings that
start with <code>bar</code> as long as this string doesn't contain
whitespaces.</p><div class="bq info"> <b>Regular Expressions Performance</b><p>
It's important to note that regular expressions can significantly affect the
performance of the NLPCraft processing if used uncontrolled. Use it with
caution and test the performance of your model to ensure it meets your
requirements.</p></div><h2 id="dsl" class="sect [...]
- ...
- "elements": [
- {
- "id": "transport.vehicle",
- "description": "Transportation vehicle",
- "synonyms": [
- "car",
- "truck",
- "{light|heavy|super|medium} duty {pickup|_} truck"
- "sedan",
- "coupe"
- ]
- },
- {
- "id": "race.vehicle",
- "description": "Race vehicle",
- "synonyms": [
- "{race|speed|track} ^^{# == 'transport.vehicle'}^^"
- ]
- }
-
- ]
- ...
- </pre><div class="bq warn"><p> <b>Greedy NERs <span
class="amp">&</span> Synonyms Conflicts</b></p><p> Note that in the above
example you need to ensure that words <code>race</code>, <code>speed</code> or
<code>track</code> are not part of the <code>transport.vehicle</code> token. It
is particular important for the 3rd party NERs where specific rules about what
words can or cannot be part of the token are unclear or undefined. In such
cases the only remedy is to extensively test [...]
- ...
- "elements": [
- {
- "id": "google.loc.wrap",
- "description": "Wrapper for google location",
- "groups": ["my_group"],
- "synonyms": [
- "^^{# == 'google:location'}^^"
- ]
- }
- ]
- ...
- </pre><b>IDL Expression Syntax</b><p> IDL expressions are a subset of
overall <a href="/intent-matching.html#idl">IDL syntax</a>. You can review
formal <a target="github"
href="https://github.com/apache/incubator-nlpcraft/blob/master/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/antlr4/NCIdl.g4">ANTLR4
grammar</a> but basically an IDL expression for synonym is a term expression
with the optional alias at the beginning. Here's an example of IDL expression
defin [...]
- "synonyms": [
- "population {of|for} ^^[city]{# == 'nlpcraft:city' &&
lowercase(meta_tok('city:country')) == 'france'}^^"
- ]
- </pre><b>NOTES:</b><ul><li>Optional alias <code>city</code> can be
used to access a constituent part token (with ID
<code>nlpcraft:city</code>).<li> The expression between <code>{</code> and
<code>}</code> brackets is a standard IDL term expression.</ul><h2
id="custom_ners" class="section-sub-title">Custom NERs <a href="#"><i
class="top-link fas fa-fw fa-angle-double-up"></i></a></h2><p> By default, the
data model detects its elements by their synonyms, regexp or IDL expressions.
[...]
diff --git a/docs.html b/docs.html
index 100ea75..cb0d108 100644
--- a/docs.html
+++ b/docs.html
@@ -15,4 +15,4 @@
limitations under the License.
-->
-<!doctype html><html lang="en"> <script async
src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script>
<script> window.dataLayer = window.dataLayer || []; function
gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config',
'UA-180663034-1'); </script><meta charset="utf-8"><meta
http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport"
content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta
name="description" content="An open sourc [...]
+<!doctype html><html lang="en"> <script async
src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script>
<script> window.dataLayer = window.dataLayer || []; function
gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config',
'UA-180663034-1'); </script><meta charset="utf-8"><meta
http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport"
content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta
name="description" content="An open sourc [...]
diff --git a/feed.xml b/feed.xml
index c8dbccc..bf3b113 100644
--- a/feed.xml
+++ b/feed.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8"?><feed
xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/"
version="4.2.2">Jekyll</generator><link href="http://localhost:4000/feed.xml"
rel="self" type="application/atom+xml" /><link href="http://localhost:4000/"
rel="alternate" type="text/html"
/><updated>2023-03-01T10:11:31+04:00</updated><id>http://localhost:4000/feed.xml</id><title
type="html">Apache NLPCraft</title></feed>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?><feed
xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/"
version="4.2.2">Jekyll</generator><link href="http://localhost:4000/feed.xml"
rel="self" type="application/atom+xml" /><link href="http://localhost:4000/"
rel="alternate" type="text/html"
/><updated>2023-03-02T09:48:25+04:00</updated><id>http://localhost:4000/feed.xml</id><title
type="html">Apache NLPCraft</title></feed>
\ No newline at end of file