This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-468 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 9a21402f1597a3e4ff8acef7cc7bcd4a0035ba11 Author: Sergey Kamov <[email protected]> AuthorDate: Mon Oct 11 13:30:59 2021 +0300 WIP. --- .../main/scala/org/apache/nlpcraft/NCNlpcraft.java | 49 ++ .../NCNlpRichWord.java => NCNlpcraftBuilder.java} | 27 +- .../nlpcraft/common/ansi/NCAnsiProgressBar.scala | 7 +- .../nlpcraft/common/ansi/NCAnsiSpinner.scala | 5 +- .../nlpcraft/common/ascii/NCAsciiTable.scala | 6 +- .../nlpcraft/common/makro/NCMacroCompiler.scala | 2 +- .../common/makro/antlr4/NCMacroDslLexer.java | 15 +- .../common/makro/antlr4/NCMacroDslParser.java | 25 +- .../scala/org/apache/nlpcraft/model/NCContext.java | 66 ++ .../org/apache/nlpcraft/model/NCConversation.java | 95 +++ .../apache/nlpcraft/model/NCDialogFlowItem.java | 149 ++++ .../scala/org/apache/nlpcraft/model/NCIntent.java | 11 +- .../org/apache/nlpcraft/model/NCIntentMatch.java | 108 +++ .../org/apache/nlpcraft/model/NCIntentRef.java | 6 +- .../org/apache/nlpcraft/model/NCIntentSample.java | 7 +- .../org/apache/nlpcraft/model/NCIntentSkip.java | 2 +- .../org/apache/nlpcraft/model/NCIntentTerm.java | 8 +- .../apache/nlpcraft/model/NCMacroProcessor.java | 1 + .../org/apache/nlpcraft/model/NCMetadata.java | 6 +- .../scala/org/apache/nlpcraft/model/NCModel.java | 7 +- .../apache/nlpcraft/model/NCModelBehaviour.java | 153 ++++ .../org/apache/nlpcraft/model/NCModelView.java | 800 +++++++++++++++++++++ .../org/apache/nlpcraft/model/NCRejection.java | 2 +- .../scala/org/apache/nlpcraft/model/NCRequest.java | 85 +++ .../scala/org/apache/nlpcraft/model/NCResult.java | 3 +- .../scala/org/apache/nlpcraft/model/NCToken.java | 12 +- .../scala/org/apache/nlpcraft/model/NCValue.java | 5 +- .../scala/org/apache/nlpcraft/model/NCVariant.java | 86 +++ .../nlpcraft/model/builders/NCModelBuilder.java | 157 ++++ .../NCResultBuilder.java} | 27 +- .../model/impl/ner/NCDefaultNerElement.java | 337 +++++++++ .../model/impl/ner/NCDefaultNerParser.java | 222 ++++++ .../ner/NCDefaultNervalueLoader.java} | 10 +- .../ner/builders/NCDefaultNerElementBuilder.java | 58 ++ .../ner/builders/NCDefaultNerParserBuilder.java | 52 ++ .../opennlp/NCOpenNlpNerParser.java} | 24 +- .../opennlp/NCOpenNlpWordsParser.java} | 23 +- .../apache/nlpcraft/model/nlp/NCNlpNerParser.java | 65 ++ .../apache/nlpcraft/model/nlp/NCNlpNerToken.java | 23 +- .../apache/nlpcraft/model/nlp/NCNlpRichWord.java | 6 - .../apache/nlpcraft/model/nlp/NCNlpTextParser.java | 17 + .../org/apache/nlpcraft/model/nlp/NCNlpWord.java | 17 + .../src/test/java/org/apache/nlpcraft/NCSpec.java | 124 ++++ 43 files changed, 2799 insertions(+), 111 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraft.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraft.java new file mode 100644 index 0000000..b27b22e --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraft.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft; + +import org.apache.nlpcraft.model.NCResult; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +public interface NCNlpcraft { + String ask(String txt, Map<String, Object> data, boolean enableLog, String userId); + String ask(String txt, String userId); + String ask(String txt); + + NCResult askSync(String txt, Map<String, Object> data, boolean enableLog, String userId); + NCResult askSync(String txt, String userId); + NCResult askSync(String txt); + + List<NCResult> check(Set<String> srvReqIds, int maxRows); + List<NCResult> check(String userId, int maxRows); + NCResult check(String srvReqId); + + void cancel(Set<String> srvReqIds); + void cancel(String srvReqId); + void cancelAll(String userId); + void cancelAll(); + + void clearConversation(String userId); + void clearConversation(); + + void clearDialog(String userId); + void clearDialog(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraftBuilder.java similarity index 68% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraftBuilder.java index 1ccd742..31c9231 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCNlpcraftBuilder.java @@ -15,22 +15,21 @@ * limitations under the License. */ -package org.apache.nlpcraft.model.nlp; +package org.apache.nlpcraft; -import org.apache.nlpcraft.model.nlp.NCNlpWord; +import org.apache.nlpcraft.model.NCModel; -/** - * Extended word data, enriched by NLP. - * It is argument for NCNlpNerTokensParser. - */ -public interface NCNlpRichWord extends NCNlpWord { - boolean isStopWord(); - - boolean isBracketed(); - - boolean isQuoted(); +public class NCNlpcraftBuilder { + public NCNlpcraftBuilder withModel(NCModel mdl) { + return null; + } - boolean isKnownWord(); + // TODO: add deserialization component. + //public NCNlpcraftBuilder withUrl(String url) { + // return null; + //} - boolean isSwearWord(); + public NCNlpcraft getNCNlpcraft() { + return null; + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiProgressBar.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiProgressBar.scala index 64b5705..c8df0c7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiProgressBar.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiProgressBar.scala @@ -17,12 +17,13 @@ package org.apache.nlpcraft.common.ansi -import java.io.PrintWriter -import org.apache.nlpcraft.common.* -import NCAnsi.* import org.apache.commons.lang3.StringUtils +import org.apache.nlpcraft.common.* +import org.apache.nlpcraft.common.ansi.NCAnsi.* import org.apache.nlpcraft.common.ansi.NCAnsiProgressBar.* +import java.io.PrintWriter + /** * Forward-only, bound ANSI-based progress bar. * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiSpinner.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiSpinner.scala index 1a4bb4a..2b266a7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiSpinner.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ansi/NCAnsiSpinner.scala @@ -17,12 +17,13 @@ package org.apache.nlpcraft.common.ansi -import java.io.PrintWriter -import NCAnsi.* import org.apache.nlpcraft.common.* +import org.apache.nlpcraft.common.ansi.NCAnsi.* import org.apache.nlpcraft.common.ansi.NCAnsiSpinner.* import org.apache.nlpcraft.common.util.NCUtils +import java.io.PrintWriter + /** * ANSI-based hourglass spinner. * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ascii/NCAsciiTable.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ascii/NCAsciiTable.scala index 067093f..35349ec 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ascii/NCAsciiTable.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/ascii/NCAsciiTable.scala @@ -17,14 +17,14 @@ package org.apache.nlpcraft.common.ascii -import java.io.{IOException, PrintStream} -import java.util.List as JList import com.typesafe.scalalogging.Logger import org.apache.nlpcraft.common.* -import org.apache.nlpcraft.common.ascii.NCAsciiTable.* import org.apache.nlpcraft.common.ansi.NCAnsi.* +import org.apache.nlpcraft.common.ascii.NCAsciiTable.* import org.apache.nlpcraft.common.util.NCUtils +import java.io.{IOException, PrintStream} +import java.util.List as JList import scala.collection.mutable import scala.jdk.CollectionConverters.CollectionHasAsScala import scala.util.Using diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala index 3616e32..b632804 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala @@ -18,8 +18,8 @@ package org.apache.nlpcraft.common.makro import com.typesafe.scalalogging.LazyLogging -import org.antlr.v4.runtime.tree.ParseTreeWalker import org.antlr.v4.runtime.* +import org.antlr.v4.runtime.tree.ParseTreeWalker import org.apache.nlpcraft.common.* import org.apache.nlpcraft.common.ansi.NCAnsi.* import org.apache.nlpcraft.common.antlr4.* diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java index 31e2d67..bb5b3a7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java @@ -1,13 +1,16 @@ // Generated from C:/Users/Nikita Ivanov/Documents/GitHub/incubator-nlpcraft/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4\NCMacroDsl.g4 by ANTLR 4.9.1 package org.apache.nlpcraft.common.makro.antlr4; -import org.antlr.v4.runtime.Lexer; + import org.antlr.v4.runtime.CharStream; -import org.antlr.v4.runtime.Token; -import org.antlr.v4.runtime.TokenStream; -import org.antlr.v4.runtime.*; -import org.antlr.v4.runtime.atn.*; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.RuntimeMetaData; +import org.antlr.v4.runtime.Vocabulary; +import org.antlr.v4.runtime.VocabularyImpl; +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNDeserializer; +import org.antlr.v4.runtime.atn.LexerATNSimulator; +import org.antlr.v4.runtime.atn.PredictionContextCache; import org.antlr.v4.runtime.dfa.DFA; -import org.antlr.v4.runtime.misc.*; @SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"}) public class NCMacroDslLexer extends Lexer { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java index eef5f94..2c5aff3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java @@ -1,13 +1,24 @@ // Generated from C:/Users/Nikita Ivanov/Documents/GitHub/incubator-nlpcraft/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4\NCMacroDsl.g4 by ANTLR 4.9.1 package org.apache.nlpcraft.common.makro.antlr4; -import org.antlr.v4.runtime.atn.*; + +import org.antlr.v4.runtime.FailedPredicateException; +import org.antlr.v4.runtime.NoViableAltException; +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.RuleContext; +import org.antlr.v4.runtime.RuntimeMetaData; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.Vocabulary; +import org.antlr.v4.runtime.VocabularyImpl; +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNDeserializer; +import org.antlr.v4.runtime.atn.ParserATNSimulator; +import org.antlr.v4.runtime.atn.PredictionContextCache; import org.antlr.v4.runtime.dfa.DFA; -import org.antlr.v4.runtime.*; -import org.antlr.v4.runtime.misc.*; -import org.antlr.v4.runtime.tree.*; -import java.util.List; -import java.util.Iterator; -import java.util.ArrayList; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; @SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"}) public class NCMacroDslParser extends Parser { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContext.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContext.java new file mode 100644 index 0000000..f4acced --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCContext.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.io.Serializable; +import java.util.Collection; + +/** + * Data model query context. This context defines fully processed user input and its associated data that + * the model's intents need to process and return the result. + * + * @see NCIntentMatch#getContext() + */ +public interface NCContext extends NCMetadata, Serializable { + /** + * Tests if given token is part of the query this context is associated with. + * + * @param tok Token to check. + * @return {@code true} if given token is from the sentence associated with this context, {@code false} otherwise. + */ + boolean isOwnerOf(NCToken tok); + + /** + * Gets collection of all parsing variants for this query. Each parsing variant is a list of detected tokens. + * Note that a given user input can have one or more possible different parsing variants. + * + * @return All parsing variants of this query. Always contains at least one variant. + */ + Collection<? extends NCVariant> getVariants(); + + /** + * Gets model instance for this query. + * + * @return Model. + */ + NCModelView getModel(); + + /** + * Gets supplemental information about user request. + * + * @return Supplemental information about user request. + */ + NCRequest getRequest(); + + /** + * Gets current conversation for the query's user and data model. + * + * @return Current conversation. + */ + NCConversation getConversation(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCConversation.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCConversation.java new file mode 100644 index 0000000..c44ac45 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCConversation.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; + +import java.util.List; +import java.util.function.Predicate; + +/** + * Conversation container for unique combination of user and data model. + * <p> + * Conversation management is based on idea of a short-term-memory (STM). STM can be viewed as a condensed + * short-term history of the input for a given user and data model. Every submitted user request that was + * successfully answered is added to the conversation STM as a list of {@link NCToken tokens}. Existing STM tokens belonging to + * the same {@link NCDefaultNerElement#getGroups() group} will be overridden by the more recent tokens from the same group. + * Note also that tokens in STM automatically expire (i.e. context is "forgotten") after a certain period of time and/or + * based on the depth of the conversation since the last mention. + * <p> + * You can also maintain user state between requests using metadata. Conversation's metadata is a + * mutable thread-safe container that can hold any arbitrary user data while supporting the same + * expiration logic as the rest of the conversation elements (i.e. tokens and previously matched intent IDs). + * <p> + * You can also access dialog flow history as a chronologically ordered list of previously matched intents sorted + * from oldest to newest for the current user and data model. + * + * @see NCContext#getConversation() + * @see NCModelView#getConversationDepth() + * @see NCModelView#getConversationTimeout() + */ +public interface NCConversation extends NCMetadata { + /** + * Gets an ordered list of tokens stored in the conversation STM for the current + * user and data model. Tokens in the returned list are ordered by their conversational depth, i.e. + * the tokens from more recent requests appear before tokens from older requests. + * <p> + * Note that this list excludes free words and stopwords. Note also that specific rules + * by which STM operates are undefined for the purpose of this function (i.e. callers should not rely on + * any observed behavior of how STM stores and evicts its content). + * + * @return List of tokens for this conversation's STM. The list can be empty which indicates that + * conversation is brand new (or timed out). + */ + List<NCToken> getTokens(); + + /** + * Gets the chronologically ordered list of previously matched intents sorted from oldest to newest for the current + * user and data model. + * + * @return List of chronologically ordered previously matched intents. + */ + List<NCDialogFlowItem> getDialogFlow(); + + /** + * Removes all tokens satisfying given token predicate from the conversation STM. + * <p> + * This is particularly useful when the logic processing the user input makes an implicit + * assumption not present in the user input itself. Such assumption may alter the conversation (without + * having an explicit token responsible for it) and therefore this method can be used to remove "stale" tokens + * from conversation STM. For example, in some cases the intent logic can assume the user current location as + * an implicit geolocation and therefore all existing <code>nlpcraft:geo</code> tokens should be removed from the + * conversation STM to maintain correct context. + * + * @param filter Token remove filter. + */ + void clearStm(Predicate<NCToken> filter); + + /** + * Removes all previously matched intents using given intent predicate. + * <p> + * History of matched intents (i.e. the dialog flow) can be used in intent definition as part of its + * matching template. NLPCraft maintains the window of previously matched intents based on time, i.e. + * after certain period of time the oldest previously matched intents are forgotten and removed from + * dialog flow. This method allows explicitly clear previously matched intents from the + * dialog flow based on user logic other than time window. + * + * @param filter Dialog flow filter based on IDs of previously matched intents. + */ + void clearDialog(Predicate<String/* Intent ID. */> filter); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCDialogFlowItem.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCDialogFlowItem.java new file mode 100644 index 0000000..59f44a3 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCDialogFlowItem.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * An item of the dialog flow. Dialog flow is a chronologically ordered list of dialog flow + * items. Each item represents a snapshot of winning intent's match and its associated data. List of instances + * of this interface is passed into a custom user-defined dialog flow match method. + * <p> + * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html">Intent Matching</a> section and review + * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. + */ +public interface NCDialogFlowItem extends NCMetadata { + /** + * Gets ID of the matched intent. + * + * @return ID of the matched intent. + */ + String getIntentId(); + + /** + * Gets the winning intent's callback result. + * + * @return Winning intent's callback result. + */ + NCResult getResult(); + + /** + * Gets a subset of tokens representing matched intent. This subset is grouped by the matched terms + * where a {@code null} sub-list defines an optional term. Order and index of sub-lists corresponds + * to the order and index of terms in the matching intent. Number of sub-lists will always be the same + * as the number of terms in the matched intent. + * <p> + * Note that unlike {@link #getVariant()} method + * this method returns only subset of the tokens that were part of the matched intent. Specifically, it will + * not return tokens for free words, stopwords or unmatched ("dangling") tokens. + * + * @return List of list of tokens representing matched intent. + * @see #getVariant() + */ + List<List<NCToken>> getIntentTokens(); + + /** + * Gets tokens for given term. This is a companion method for {@link #getIntentTokens()}. + * + * @param idx Index of the term (starting from <code>0</code>). + * @return List of tokens, potentially {@code null}, for given term. + * @see NCIntentTerm + * @see #getTermTokens(String) + */ + List<NCToken> getTermTokens(int idx); + + /** + * Gets tokens for given term. This is a companion method for {@link #getIntentTokens()}. + * + * @param termId ID of the term for which to get tokens. + * @return List of tokens, potentially {@code null}, for given term. + * @see NCIntentTerm + * @see #getTermTokens(int) + */ + List<NCToken> getTermTokens(String termId); + + /** + * Gets sentence parsing variant that produced the matching for the winning intent. Returned variant is one of the + * variants provided by {@link NCContext#getVariants()} methods. Note that tokens returned by this method are + * a superset of the tokens returned by {@link #getIntentTokens()} method, i.e. not all tokens + * from this variant may have been used in matching of the winning intent. + * + * @return Sentence parsing variant that produced the matching for the winning intent. + * @see #getIntentTokens() + */ + NCVariant getVariant(); + + /** + * Gets descriptor of the user on behalf of which the input request was submitted. + * + * @return User descriptor. + * TODO: + */ + String getUserId(); + + /** + * Gets globally unique server ID of the input request. + * <p> + * Server request is defined as a processing of a one user input request. + * Note that the model can be accessed multiple times during processing of a single user request + * and therefore multiple instances of this interface can return the same server + * request ID. In fact, users of this interfaces can use this fact by using this ID, + * for example, as a map key for a session scoped storage. + * + * @return Server request ID. + */ + String getServerRequestId(); + + /** + * Gets normalized text of the user input. + * + * @return Normalized text of the user input. + */ + String getNormalizedText(); + + /** + * Gets UTC/GMT timestamp in milliseconds when user input was received. + * + * @return UTC/GMT timestamp in milliseconds when user input was received. + */ + long getReceiveTimestamp(); + + /** + * Gets optional address of the remote client that made the initial REST request. + * + * @return Optional address of the remote client. + */ + Optional<String> getRemoteAddress(); + + /** + * Gets string representation of the user client agent that made the initial REST + * request . + * + * @return User agent string from user client (web browser, REST client, etc.). + */ + Optional<String> getClientAgent(); + + /** + * Gets optional JSON data passed in with the user request. + * + * @return Optional JSON data, can be empty but never {@code null}. + */ + Map<String, Object> getRequestData(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java index c8963e1..9f4d3ba 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java @@ -17,10 +17,15 @@ package org.apache.nlpcraft.model; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Repeatable; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; -import static java.lang.annotation.ElementType.*; -import static java.lang.annotation.RetentionPolicy.*; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.TYPE; +import static java.lang.annotation.RetentionPolicy.RUNTIME; /** * Annotation to bind an intent with the method serving as its callback. This annotation takes a string value diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java new file mode 100644 index 0000000..f2219a2 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.io.Serializable; +import java.util.List; + +/** + * An intent match context that can be passed into the callback of the matched intent. + * Note that when using {@link NCIntent} and {@link NCIntentTerm} + * annotations the callback method can be defined parameter-less (and this context won't be passed in). + * If used, intent context must be the 1st formal parameter in intent callback. + * <p> + * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html">Intent Matching</a> section and review + * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. + * + * @see NCIntent + * @see NCIntentTerm + * @see NCIntentSkip + * @see NCModelAddClasses + * @see NCModelAddPackage + * @see NCIntentSample + * @see NCIntentSampleRef + * @see NCIntentRef + */ +public interface NCIntentMatch extends NCMetadata, Serializable { + /** + * Gets ID of the matched intent. + * + * @return ID of the matched intent. + */ + String getIntentId(); + + /** + * Gets context of the user input query. + * + * @return Original query context. + */ + NCContext getContext(); + + /** + * Gets a subset of tokens representing matched intent. This subset is grouped by the matched terms + * where a {@code null} sub-list defines an optional term. Order and index of sub-lists corresponds + * to the order and index of terms in the matching intent. Number of sub-lists will always be the same + * as the number of terms in the matched intent. + * <p> + * Note that unlike {@link #getVariant()} method + * this method returns only subset of the tokens that were part of the matched intent. Specifically, it will + * not return tokens for free words, stopwords or unmatched ("dangling") tokens. + * <p> + * Consider using {@link NCIntentTerm} annotation instead for simpler access to intent tokens. + * + * @return List of list of tokens representing matched intent. + * @see #getVariant() + * @see NCIntentTerm + */ + List<List<NCToken>> getIntentTokens(); + + /** + * Gets tokens for given term. This is a companion method for {@link #getIntentTokens()}. + * <p> + * Consider using {@link NCIntentTerm} annotation instead for simpler access to intent tokens. + * + * @param idx Index of the term (starting from <code>0</code>). + * @return List of tokens, potentially {@code null}, for given term. + * @see NCIntentTerm + * @see #getTermTokens(String) + */ + List<NCToken> getTermTokens(int idx); + + /** + * Gets tokens for given term. This is a companion method for {@link #getIntentTokens()}. + * <p> + * Consider using {@link NCIntentTerm} annotation instead for simpler access to intent tokens. + * + * @param termId ID of the term for which to get tokens. + * @return List of tokens, potentially {@code null}, for given term. + * @see NCIntentTerm + * @see #getTermTokens(int) + */ + List<NCToken> getTermTokens(String termId); + + /** + * Gets sentence parsing variant that produced the matching for this intent. Returned variant is one of the + * variants provided by {@link NCContext#getVariants()} methods. Note that tokens returned by this method are + * a superset of the tokens returned by {@link #getIntentTokens()} method, i.e. not all tokens + * from this variant may have been used in matching of the winning intent. + * + * @return Sentence parsing variant that produced the matching for this intent. + * @see #getIntentTokens() + */ + NCVariant getVariant(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java index 3348ffe..1b0ec24 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java @@ -17,7 +17,11 @@ package org.apache.nlpcraft.model; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Repeatable; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; import static java.lang.annotation.ElementType.METHOD; import static java.lang.annotation.RetentionPolicy.RUNTIME; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSample.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSample.java index b18f44a..30e3b7d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSample.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSample.java @@ -17,7 +17,12 @@ package org.apache.nlpcraft.model; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Repeatable; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + import static java.lang.annotation.ElementType.METHOD; import static java.lang.annotation.RetentionPolicy.RUNTIME; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java index a04ccd1..94d0a53 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java @@ -17,7 +17,7 @@ package org.apache.nlpcraft.model; -import org.apache.nlpcraft.common.*; +import org.apache.nlpcraft.common.NCException; /** * Control flow exception to skip current intent. This exception can be thrown by the intent diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java index d8302ac..62e7f8a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java @@ -17,10 +17,12 @@ package org.apache.nlpcraft.model; -import java.lang.annotation.*; +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; -import static java.lang.annotation.ElementType.*; -import static java.lang.annotation.RetentionPolicy.*; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.RetentionPolicy.RUNTIME; /** * Annotation to mark callback parameter to receive intent term's tokens. This is a companion annotation diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMacroProcessor.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMacroProcessor.java index e053152..b0b6037 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMacroProcessor.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMacroProcessor.java @@ -22,6 +22,7 @@ import org.apache.nlpcraft.common.makro.NCMacroJavaParserTrait; import java.util.Set; +// TODO: move it from public API. /** * Standalone synonym macro DSL processor. * <p> diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java index 91c10f4..801edaf 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java @@ -17,8 +17,10 @@ package org.apache.nlpcraft.model; -import org.apache.nlpcraft.common.*; -import java.util.*; +import org.apache.nlpcraft.common.NCException; + +import java.util.Map; +import java.util.Optional; /** * Provides support for mutable runtime-only metadata. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java index 45a0a06..f48e9d8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java @@ -20,6 +20,9 @@ package org.apache.nlpcraft.model; /** * */ -public interface NCModel { - // TODO +public interface NCModel extends NCModelView { + NCModelBehaviour getModelBehaviour(); + // TDOO: do we need it? + void start(); + void stop(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelBehaviour.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelBehaviour.java new file mode 100644 index 0000000..11f931b --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelBehaviour.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +/** + * + */ +public interface NCModelBehaviour { + /** + * A callback to accept or reject a parsed variant. This callback is called before any other + * callbacks at the beginning of the processing pipeline, and it is called for each parsed variant. + * <p> + * Note that a given user input can have one or more possible different parsing variants. Depending on model + * configuration a user input can produce hundreds or even thousands of parsing variants that can significantly slow + * down the overall processing. This method allows filtering out unnecessary parsing variants based on variety of + * user-defined factors like number of tokens, presence of a particular token in the variant, etc. + * <p> + * By default, this method accepts all variants (returns {@code true}). + * + * @param var A variant (list of tokens) to accept or reject. + * @return {@code True} to accept variant for further processing, {@code false} otherwise. + */ + default boolean onParsedVariant(NCVariant var) { + return true; + } + + /** + * A callback that is called when a fully assembled query context is ready. This callback is called after + * all {@link #onParsedVariant(NCVariant)} callbacks are called but before any {@link #onMatchedIntent(NCIntentMatch)} + * are called, i.e. right before the intent matching is performed. It's called always once per user request processing. + * Typical use case for this callback is to perform logging, debugging, statistic or usage collection, + * explicit update or initialization of conversation context, security audit or validation, etc. + * <p> + * Default implementation returns {@code null}. + * + * @param ctx Query context. + * @return Optional query result to return interrupting the default workflow. Specifically, if this method returns + * a non-{@code null} result, it will be returned to the caller immediately overriding default behavior. If + * the method returns {@code null} - the default processing flow will continue. + * @throws NCRejection This callback can throw this rejection exception to abort user request processing. + */ + default NCResult onContext(NCContext ctx) throws NCRejection { + return null; + } + + /** + * A callback that is called when intent was successfully matched but right before its callback is called. + * This callback is called after {@link #onContext(NCContext)} is called and may be called multiple times + * depending on its return value. If {@code true} is returned than the default workflow will continue and + * the matched intent's callback will be called. However, if {@code false} is returned than the entire + * existing set of parsing variants will be matched against all declared intents again. Returning {@code false} + * allows this method to alter the state of the model (like soft-reset conversation or change metadata) and + * force the full re-evaluation of the parsing variants against all declared intents. Note that user logic should + * be careful not to induce infinite loop in this behavior. + * <p> + * Note that this callback may not be called at all based on the return value + * of {@link #onContext(NCContext)} callback. Typical use case for this callback is to perform logging, debugging, + * statistic or usage collection, explicit update or initialization of conversation context, security audit + * or validation, etc. + * <p> + * By default, this method returns {@code true}. + * + * @param ctx Intent match context - the same instance that's passed to the matched intent callback. + * @return If {@code true} is returned than the default workflow will continue and the matched intent's callback + * will be called. However, if {@code false} is returned than the entire existing set of parsing variants will + * be matched against all declared intents again. Returning {@code false} allows this method to alter the state of + * the model (like soft-reset conversation or change metadata) and force the re-evaluation of the parsing + * variants against all declared intents. Note that user logic should be careful not to induce infinite loop in + * this behavior. + * @throws NCRejection This callback can throw the rejection exception to abort user request processing. In this + * case the {@link #onRejection(NCIntentMatch, NCRejection)} callback will be called next. + */ + default boolean onMatchedIntent(NCIntentMatch ctx) throws NCRejection { + return true; + } + + /** + * A callback that is called when successful result is obtained from the intent callback and right before sending it + * back to the caller. This callback is called after {@link #onMatchedIntent(NCIntentMatch)} is called. + * Note that this callback may not be called at all, and if called - it's called only once. + * Typical use case for this callback is to perform logging, debugging, statistic or usage collection, + * explicit update or initialization of conversation context, security audit or validation, etc. + * <p> + * Default implementation is a no-op returning {@code null}. + * + * @param ctx Intent match context - the same instance that's passed to the matched intent callback that + * produced this result. + * @param res Existing result. + * @return Optional query result to return interrupting the default workflow. Specifically, if this method returns + * a non-{@code null} result, it will be returned to the caller immediately overriding default behavior and + * existing query result or error processing, if any. If the method returns {@code null} - the default + * processing flow will continue. + */ + default NCResult onResult(NCIntentMatch ctx, NCResult res) { + return null; + } + + /** + * A callback that is called when intent callback threw {@link NCRejection} exception. + * This callback is called after {@link #onMatchedIntent(NCIntentMatch)} is called. + * Note that this callback may not be called at all, and if called - it's called only once. + * Typical use case for this callback is to perform logging, debugging, statistic or usage collection, + * explicit update or initialization of conversation context, security audit or validation, etc. + * <p> + * Default implementation is a no-op returning {@code null}. + * + * @param ctx Optional intent match context - the same instance that's passed to the matched intent callback that + * produced this rejection. It is {@code null} if rejection was triggered outside of the intent callback. + * @param e Rejection exception. + * @return Optional query result to return interrupting the default workflow. Specifically, if this method returns + * a non-{@code null} result, it will be returned to the caller immediately overriding default behavior and + * existing query result or error processing, if any. If the method returns {@code null} - the default + * processing flow will continue. + */ + default NCResult onRejection(NCIntentMatch ctx, NCRejection e) { + return null; + } + + /** + * A callback that is called when intent callback failed with unexpected exception. + * Note that this callback may not be called at all, and if called - it's called only once. + * Typical use case for this callback is to perform logging, debugging, statistic or usage collection, + * explicit update or initialization of conversation context, security audit or validation, etc. + * <p> + * Default implementation is a no-op returning {@code null}. + * + * @param ctx Intent match context - the same instance that's passed to the matched intent that + * produced this error. + * @param e Failure exception. + * @return Optional query result to return interrupting the default workflow. Specifically, if this method returns + * a non-{@code null} result, it will be returned to the caller immediately overriding default behavior and + * existing query result or error processing, if any. If the method returns {@code null} - the default + * processing flow will continue. + */ + default NCResult onError(NCContext ctx, Throwable e) { + return null; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java new file mode 100644 index 0000000..6d0cf6c --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java @@ -0,0 +1,800 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; +import org.apache.nlpcraft.model.nlp.NCNlpNerParser; + +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Read-only view on data model. Model view defines a declarative, or configurable, part of the model. + * All properties in this interface can be defined or overridden in JSON/YAML external + * presentation when used with {@link NCModelFileAdapter} adapter. + * <p> + * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/data-model.html">Data Model</a> section and review + * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. + * + * @see NCModel + * @see NCModelAdapter + * @see NCModelFileAdapter + */ +public interface NCModelView extends NCMetadata { + /** + * Minimum value for {@link #getConversationTimeout()} method. + */ + long CONV_TIMEOUT_MIN = 0L; + + /** + * Maximum value for {@link #getConversationTimeout()} method. + */ + long CONV_TIMEOUT_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMaxUnknownWords()} method. + */ + long MAX_UNKNOWN_WORDS_MIN = 0L; + + /** + * Maximum value for {@link #getMaxUnknownWords()} method. + */ + long MAX_UNKNOWN_WORDS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMaxFreeWords()} method. + */ + long MAX_FREE_WORDS_MIN = 0L; + + /** + * Maximum value for {@link #getMaxFreeWords()} method. + */ + long MAX_FREE_WORDS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMaxSuspiciousWords()} method. + */ + long MAX_SUSPICIOUS_WORDS_MIN = 0L; + + /** + * Maximum value for {@link #getMaxSuspiciousWords()} method. + */ + long MAX_SUSPICIOUS_WORDS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMinWords()} method. + */ + long MIN_WORDS_MIN = 1L; + + /** + * Maximum value for {@link #getMinWords()} method. + */ + long MIN_WORDS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMinNonStopwords()} method. + */ + long MIN_NON_STOPWORDS_MIN = 0L; + + /** + * Maximum value for {@link #getMinNonStopwords()} method. + */ + long MIN_NON_STOPWORDS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMinTokens()} method. + */ + long MIN_TOKENS_MIN = 0L; + + /** + * Maximum value for {@link #getMinTokens()} method. + */ + long MIN_TOKENS_MAX = Long.MAX_VALUE; + + /** + * Minimum value for {@link #getMaxTokens()} method. + */ + long MAX_TOKENS_MIN = 0L; + + /** + * Maximum value for {@link #getMaxTokens()} method. + */ + long MAX_TOKENS_MAX = 100L; + + /** + * Minimum value for {@link #getMaxWords()} method. + */ + long MAX_WORDS_MIN = 1L; + + /** + * Maximum value for {@link #getMaxWords()} method. + */ + long MAX_WORDS_MAX = 100L; + + + /** + * Minimum value for {@link #getConversationDepth()} method. + */ + long CONV_DEPTH_MIN = 1L; + + /** + * Maximum value for {@link #getConversationDepth()} method. + */ + long CONV_DEPTH_MAX = Long.MAX_VALUE; + + /** + * Max length for {@link #getId()} method. + */ + int MODEL_ID_MAXLEN = 32; + + /** + * Max length for {@link #getName()} method. + */ + int MODEL_NAME_MAXLEN = 64; + + /** + * Max length for {@link #getVersion()} method. + */ + int MODEL_VERSION_MAXLEN = 16; + + /** + * Max length for {@link NCDefaultNerElement#getId()} method. + */ + int MODEL_ELEMENT_ID_MAXLEN = 64; + + /** + * Default value for {@link #getConversationTimeout()} method. + */ + long DFLT_CONV_TIMEOUT_MS = Duration.ofMinutes(60).toMillis(); + + /** + * Default value for {@link #getConversationDepth()} method. + */ + int DFLT_CONV_DEPTH = 3; + + /** + * Default value fof {@link #getMetadata()} method. + */ + Map<String, Object> DFLT_METADATA = new HashMap<>(); + + /** + * Default value for {@link #getMaxUnknownWords()} method. + */ + int DFLT_MAX_UNKNOWN_WORDS = Integer.MAX_VALUE; + + /** + * Default value for {@link #getMaxFreeWords()} method. + */ + int DFLT_MAX_FREE_WORDS = Integer.MAX_VALUE; + + /** + * Default value for {@link #getMaxSuspiciousWords()} method. + */ + int DFLT_MAX_SUSPICIOUS_WORDS = 0; + + /** + * Default value for {@link #getMinWords()} method. + */ + int DFLT_MIN_WORDS = 1; + + /** + * Default value for {@link #getMaxWords()} method. + */ + int DFLT_MAX_WORDS = 50; + + /** + * Default value for {@link #getMinTokens()} method. + */ + int DFLT_MIN_TOKENS = 0; + + /** + * Default value for {@link #getMaxTokens()} method. + */ + int DFLT_MAX_TOKENS = 50; + + /** + * Default value for {@link #getMinNonStopwords()} method. + */ + int DFLT_MIN_NON_STOPWORDS = 0; + + /** + * Default value for {@link #isSwearWordsAllowed()} method. + */ + boolean DFLT_IS_SWEAR_WORDS_ALLOWED = false; + + /** + * Default value for {@link #isNoNounsAllowed()} method. + */ + boolean DFLT_IS_NO_NOUNS_ALLOWED = true; + + + // TODO: add javadoc + boolean DFLT_IS_STOPWORDS_ALLOWED = true; + + /** + * Default value for {@link #isNoUserTokensAllowed()} method. + */ + boolean DFLT_IS_NO_USER_TOKENS_ALLOWED = true; + + /** + * Gets unique, <i>immutable</i> ID of this model. + * <p> + * Note that <b>model IDs are immutable</b> while name and version + * can be changed freely. Changing model ID is equal to creating a completely new model. + * Model IDs (unlike name and version) are not exposed to the end user and only serve a + * technical purpose. ID's max length is 32 characters. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>id</code> property: + * <pre class="brush: js"> + * { + * "id": "my.model.id" + * } + * </pre> + * + * @return Unique, <i>immutable</i> ID of this model. + */ + String getId(); + + /** + * Gets descriptive name of this model. Name's max length is 64 characters. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>name</code> property: + * <pre class="brush: js"> + * { + * "name": "My Model" + * } + * </pre> + * + * @return Descriptive name for this model. + */ + String getName(); + + /** + * Gets the version of this model using semantic versioning. Version's max length is 16 characters. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>version</code> property: + * <pre class="brush: js"> + * { + * "version": "1.0.0" + * } + * </pre> + * + * @return A version compatible with (<a href="http://www.semver.org">www.semver.org</a>) specification. + */ + String getVersion(); + + /** + * Gets optional short model description. This can be displayed by the management tools. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>description</code> property: + * <pre class="brush: js"> + * { + * "description": "Model description..." + * } + * </pre> + * + * @return Optional short model description. + */ + default String getDescription() { + return null; + } + + /** + * Gets the origin of this model like name of the class, file path or URL. + * + * @return Origin of this model like name of the class, file path or URL. + */ + default String getOrigin() { + return getClass().getCanonicalName(); + } + + /** + * Gets maximum number of unknown words until automatic rejection. An unknown word is a word + * that is not part of Princeton WordNet database. If you expect a very formalized and well-defined + * input without uncommon slang and abbreviations you can set this to a small number + * like one or two. However, in most cases we recommend leaving it as default or set it to a larger + * number like five or more. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_UNKNOWN_WORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxUnknownWords</code> property: + * <pre class="brush: js"> + * { + * "maxUnknownWords": 2 + * } + * </pre> + * + * @return Maximum number of unknown words until automatic rejection. + */ + default int getMaxUnknownWords() { + return DFLT_MAX_UNKNOWN_WORDS; + } + + /** + * Gets maximum number of free words until automatic rejection. A free word is a known word that is + * not part of any recognized token. In other words, a word that is present in the user input + * but won't be used to understand its meaning. Setting it to a non-zero risks the misunderstanding + * of the user input, while setting it to zero often makes understanding logic too rigid. In most + * cases we recommend setting to between one and three. If you expect the user input to contain + * many <i>noisy</i> idioms, slang or colloquials - you can set it to a larger number. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_FREE_WORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxFreeWords</code> property: + * <pre class="brush: js"> + * { + * "maxFreeWords": 2 + * } + * </pre> + * + * @return Maximum number of free words until automatic rejection. + */ + default int getMaxFreeWords() { + return DFLT_MAX_FREE_WORDS; + } + + /** + * Gets maximum number of suspicious words until automatic rejection. A suspicious word is a word + * that is defined by the model that should not appear in a valid user input under no circumstances. + * A typical example of suspicious words would be words "sex" or "porn" when processing + * queries about children books. In most cases this should be set to zero (default) to automatically + * reject any such suspicious words in the user input. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_SUSPICIOUS_WORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxSuspiciousWords</code> property: + * <pre class="brush: js"> + * { + * "maxSuspiciousWords": 2 + * } + * </pre> + * + * @return Maximum number of suspicious words until automatic rejection. + */ + default int getMaxSuspiciousWords() { + return DFLT_MAX_SUSPICIOUS_WORDS; + } + + /** + * Gets minimum word count (<i>including</i> stopwords) below which user input will be automatically + * rejected as too short. In almost all cases this value should be greater than or equal to one. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MIN_WORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>minWords</code> property: + * <pre class="brush: js"> + * { + * "minWords": 2 + * } + * </pre> + * + * @return Minimum word count (<i>including</i> stopwords) below which user input will be automatically + * rejected as too short. + */ + default int getMinWords() { + return DFLT_MIN_WORDS; + } + + /** + * Gets maximum word count (<i>including</i> stopwords) above which user input will be automatically + * rejected as too long. In almost all cases this value should be greater than or equal to one. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_WORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxWords</code> property: + * <pre class="brush: js"> + * { + * "maxWords": 50 + * } + * </pre> + * + * @return Maximum word count (<i>including</i> stopwords) above which user input will be automatically + * rejected as too long. + */ + default int getMaxWords() { + return DFLT_MAX_WORDS; + } + + /** + * Gets minimum number of all tokens (system and user defined) below which user input will be + * automatically rejected as too short. In almost all cases this value should be greater than or equal to one. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MIN_TOKENS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>minTokens</code> property: + * <pre class="brush: js"> + * { + * "minTokens": 1 + * } + * </pre> + * + * @return Minimum number of all tokens. + */ + default int getMinTokens() { + return DFLT_MIN_TOKENS; + } + + /** + * Gets maximum number of all tokens (system and user defined) above which user input will be + * automatically rejected as too long. Note that sentences with large number of token can result + * in significant processing delay and substantial memory consumption. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_TOKENS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxTokens</code> property: + * <pre class="brush: js"> + * { + * "maxTokens": 100 + * } + * </pre> + * + * @return Maximum number of all tokens. + */ + default int getMaxTokens() { + return DFLT_MAX_TOKENS; + } + + /** + * Gets minimum word count (<i>excluding</i> stopwords) below which user input will be automatically rejected + * as ambiguous sentence. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MIN_NON_STOPWORDS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>minNonStopwords</code> property: + * <pre class="brush: js"> + * { + * "minNonStopwords": 2 + * } + * </pre> + * + * @return Minimum word count (<i>excluding</i> stopwords) below which user input will be automatically + * rejected as too short. + */ + default int getMinNonStopwords() { + return DFLT_MIN_NON_STOPWORDS; + } + + /** + * Whether to allow known English swear words in user input. If {@code false} - user input with + * detected known English swear words will be automatically rejected. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_IS_SWEAR_WORDS_ALLOWED} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>swearWordsAllowed</code> property: + * <pre class="brush: js"> + * { + * "swearWordsAllowed": false + * } + * </pre> + * + * @return Whether to allow known swear words in user input. + */ + default boolean isSwearWordsAllowed() { + return DFLT_IS_SWEAR_WORDS_ALLOWED; + } + + /** + * Whether to allow user input without a single noun. If {@code false} such user input + * will be automatically rejected. Typically, for strict command or query-oriented models this should be set to + * {@code false} as any command or query should have at least one noun subject. However, for conversational + * models this can be set to {@code false} to allow for a smalltalk and one-liners. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_IS_NO_NOUNS_ALLOWED} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>noNounsAllowed</code> property: + * <pre class="brush: js"> + * { + * "noNounsAllowed": false + * } + * </pre> + * + * @return Whether to allow user input without a single noun. + */ + default boolean isNoNounsAllowed() { + return DFLT_IS_NO_NOUNS_ALLOWED; + } + + + + /** + * Whether to allow the user input with no user token detected. If {@code false} such user + * input will be automatically rejected. Note that this property only applies to user-defined + * token (i.e. model element). Even if there are no user defined tokens, the user input may still + * contain system token like <code>nlpcraft:city</code> or <code>nlpcraft:date</code>. In many cases models + * should be build to allow user input without user tokens. However, set it to {@code false} if presence + * of at least one user token is mandatory. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_IS_NO_USER_TOKENS_ALLOWED} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>noUserTokensAllowed</code> property: + * <pre class="brush: js"> + * { + * "noUserTokensAllowed": false + * } + * </pre> + * + * @return Whether to allow the user input with no user token detected. + */ + // TODO? do we need it? + default boolean isNoUserTokensAllowed() { + return DFLT_IS_NO_USER_TOKENS_ALLOWED; + } + + /** + * Gets optional user defined model metadata that can be set by the developer and accessed later. + * By default, it returns an empty map. Note that this metadata is mutable and can be + * changed at runtime by the model's code. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>metadata</code> property: + * <pre class="brush: js"> + * { + * "metadata": { + * "str": "val1", + * "num": 100, + * "bool": false + * } + * } + * </pre> + * + * @return Optional user defined model metadata. By default, returns an empty map. Never returns {@code null}. + */ + default Map<String, Object> getMetadata() { + return DFLT_METADATA; + } + + /** + * Gets an optional list of stopwords to add to the built-in ones. + * <p> + * Stopword is an individual word (i.e. sequence of characters excluding whitespaces) that contribute no + * semantic meaning to the sentence. For example, 'the', 'wow', or 'hm' provide no semantic meaning to the + * sentence and can be safely excluded from semantic analysis. + * <p> + * NLPCraft comes with a carefully selected list of English stopwords which should be sufficient + * for a majority of use cases. However, you can add additional stopwords to this list. The typical + * use for user-defined stopwords are jargon parasite words that are specific to the model's domain. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>additionalStopwords</code> property: + * <pre class="brush: js"> + * { + * "additionalStopwords": [ + * "stopword1", + * "stopword2" + * ] + * } + * </pre> + * + * @return Potentially empty list of additional stopwords. + */ + default Set<String> getAdditionalStopWords() { + return Collections.emptySet(); + } + + /** + * Gets an optional list of stopwords to exclude from the built-in list of stopwords. + * <p> + * Just like you can add additional stopwords via {@link #getAdditionalStopWords()} you can exclude + * certain words from the list of stopwords. This can be useful in rare cases when default built-in + * stopword has specific meaning of your model. In order to process them you need to exclude them + * from the list of stopwords. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>excludedStopwords</code> property: + * <pre class="brush: js"> + * { + * "excludedStopwords": [ + * "excludedStopword1", + * "excludedStopword2" + * ] + * } + * </pre> + * + * @return Potentially empty list of excluded stopwords. + */ + default Set<String> getExcludedStopWords() { + return Collections.emptySet(); + } + + /** + * Gets an optional list of suspicious words. A suspicious word is a word that generally should not appear in user + * sentence when used with this model. For example, if a particular model is for children oriented book search, + * the words "sex" and "porn" should probably NOT appear in the user input and can be automatically rejected + * when added here and model's metadata {@code MAX_SUSPICIOUS_WORDS} property set to zero. + * <p> + * Note that by setting model's metadata {@code MAX_SUSPICIOUS_WORDS} property to non-zero value you can + * adjust the sensitivity of suspicious words auto-rejection logic. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>suspiciousWords</code> property: + * <pre class="brush: js"> + * { + * "suspiciousWords": [ + * "sex", + * "porn" + * ] + * } + * </pre> + * + * @return Potentially empty list of suspicious words in their lemma form. + */ + default Set<String> getSuspiciousWords() { + return Collections.emptySet(); + } + + /** + * Gets optional user-defined model element parsers for custom NER implementations. Note that order of the parsers + * is important as they will be invoked in the same order they are returned. + * <p> + * By default, the data model detects its elements by their synonyms, regexp or IDL expressions. However, + * in some cases these methods are not expressive enough. In such cases, a user-defined parser can be defined + * for the model that would allow the user to define its own NER logic to detect the model elements in the user + * input programmatically. Note that a single parser can detect any number of model elements. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>parser</code> property which is an array + * with every element being a fully qualified class name implementing {@link NCCustomParser} interface: + * <pre class="brush: js"> + * { + * "parsers": [ + * "my.package.Parser1", + * "my.package.Parser2" + * ] + * } + * </pre> + * + * @return Custom user parsers for model elements or empty list if not used (default). Never returns {@code null}. + * TODO: javadoc + */ + default List<NCNlpNerParser> getParsers() { + return Collections.emptyList(); + } + + /** + * Gets timeout in ms after which the unused conversation element is automatically "forgotten". + * <p> + * Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it + * for certain period of time during further dialog, the conversation participants subconsciously "forget" + * about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in + * conversation's short-term-memory. + * <p> + * Note that both conversation timeout and {@link #getConversationDepth() depth} + * combined define the expiration policy for the conversation management. These two properties allow fine-tuning + * for different types of dialogs. For example, setting longer timeout and smaller depth mimics + * slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better + * supports fast-moving wide-ranging conversation that may cover multiple topics. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_CONV_TIMEOUT_MS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>conversationTimeout</code> property: + * <pre class="brush: js"> + * { + * "conversationTimeout": 300000 + * } + * </pre> + * + * @return Timeout in ms after which the unused conversation element is automatically "forgotten". + * @see #getConversationDepth() + */ + default long getConversationTimeout() { return DFLT_CONV_TIMEOUT_MS; } + + /** + * Gets maximum number of requests after which the unused conversation element is automatically "forgotten". + * <p> + * Just like in a normal human conversation if we talk about, say, "Chicago", and then don't mention it + * for a certain number of utterances during further dialog, the conversation participants subconsciously "forget" + * about it and exclude it from conversation context. In other words, the term "Chicago" is no longer in + * conversation's short-term-memory. + * <p> + * Note that both conversation {@link #getConversationTimeout() timeout} and depth + * combined define the expiration policy for the conversation management. These two properties allow fine-tuning + * for different types of dialogs. For example, setting longer timeout and smaller depth mimics + * slow-moving but topic-focused conversation. Alternatively, settings shorter timeout and longer depth better + * supports fast-moving wide-ranging conversation that may cover multiple topics. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_CONV_DEPTH} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>conversationDepth</code> property: + * <pre class="brush: js"> + * { + * "conversationDepth": 5 + * } + * </pre> + * + * @return Maximum number of requests after which the unused conversation element is automatically "forgotten". + * @see #getConversationTimeout() + */ + default int getConversationDepth() { return DFLT_CONV_DEPTH; } + + /** + * // TODO: add javadoc + * @return TBD + */ + default boolean isStopWordsAllowed() { + return DFLT_IS_STOPWORDS_ALLOWED; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java index 6ae83b3..1772889 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java @@ -17,7 +17,7 @@ package org.apache.nlpcraft.model; -import org.apache.nlpcraft.common.*; +import org.apache.nlpcraft.common.NCException; /** * Exception to indicate that user input cannot be processed as is. This exception can be thrown from diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java new file mode 100644 index 0000000..4be5f91 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.util.Map; +import java.util.Optional; + +/** + * Information about the user request. + * + * @see NCContext#getRequest() + */ +public interface NCRequest extends NCMetadata { + /** + * TODO: + * Gets descriptor of the user on behalf of which this request was submitted. + * + * @return User descriptor. + */ + String getUserId(); + + /** + * Gets globally unique server ID of the current request. + * <p> + * Server request is defined as a processing of a one user input request. + * Note that the model can be accessed multiple times during processing of a single user request + * and therefore multiple instances of this interface can return the same server + * request ID. In fact, users of this interfaces can use this fact by using this ID, + * for example, as a map key for a session scoped storage. + * + * @return Server request ID. + */ + String getServerRequestId(); + + /** + * Gets normalized text of the user input. + * + * @return Normalized text of the user input. + */ + String getNormalizedText(); + + /** + * Gets UTC/GMT timestamp in ms when user input was received. + * + * @return UTC/GMT timestamp in ms when user input was received. + */ + long getReceiveTimestamp(); + + /** + * Gets optional address of the remote client. + * + * @return Optional address of the remote client. + */ + Optional<String> getRemoteAddress(); + + /** + * Gets string representation of the user client agent that made the call with + * this request. + * + * @return User agent string from user client (web browser, REST client, etc.). + */ + Optional<String> getClientAgent(); + + /** + * Gets optional JSON data passed in with the user request. + * + * @return Optional JSON data, can be empty but never {@code null}. + */ + Map<String, Object> getRequestData(); +} \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java index 5ff6681..b557b07 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java @@ -18,13 +18,14 @@ package org.apache.nlpcraft.model; import org.apache.nlpcraft.common.NCException; -import org.apache.nlpcraft.common.util.*; +import org.apache.nlpcraft.common.util.NCUtils; import org.apache.nlpcraft.model.impl.NCMetadataAdapter; import java.io.Serializable; import java.util.Collection; /** + * TODO: it should be interface. Drop yaml. Drop html. * Data model result returned from model intent callbacks. Result consists of the * text body and the type. The type is similar in notion to MIME types. * <table class="dl-table"> diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java index b815511..c3fe0d3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java @@ -17,6 +17,8 @@ package org.apache.nlpcraft.model; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; + import java.util.List; /** @@ -25,7 +27,7 @@ import java.util.List; * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/data-model.html">Data Model</a> section and review * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. * - * @see NCElement + * @see NCDefaultNerElement */ public interface NCToken extends NCMetadata { /** @@ -48,7 +50,7 @@ public interface NCToken extends NCMetadata { * Note that a sentence can have multiple tokens with the same element ID. * * @return ID of the element (system or user defined). - * @see NCElement#getId() + * @see NCDefaultNerElement#getId() */ String getId(); @@ -57,7 +59,7 @@ public interface NCToken extends NCMetadata { * for user-defined model elements - built-in tokens do not have parents and this will return {@code null}. * * @return ID of the token's element immediate parent or {@code null} if not available. - * @see NCElement#getParentId() + * @see NCDefaultNerElement#getParentId() * @see #getAncestors() */ String getParentId(); @@ -90,7 +92,7 @@ public interface NCToken extends NCMetadata { * do not have values, and it will return {@code null}. * * @return Value for the user-defined model element or {@code null}, if not available. - * @see NCElement#getValues() + * @see NCDefaultNerElement#getValues() */ String getValue(); @@ -99,7 +101,7 @@ public interface NCToken extends NCMetadata { * token always belongs to one group with ID equal to token ID. * * @return Token groups list. Never {@code null} - but can be empty. - * @see NCElement#getGroups() + * @see NCDefaultNerElement#getGroups() */ List<String> getGroups(); diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java index f846c19..c381322 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java @@ -17,7 +17,8 @@ package org.apache.nlpcraft.model; -import java.io.Serializable; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; + import java.util.List; /** @@ -29,7 +30,7 @@ import java.util.List; * recognized by. Note that value name itself acts as an implicit synonym even when no additional synonyms added * for that value. * - * @see NCElement#getValues() + * @see NCDefaultNerElement#getValues() */ public interface NCValue { /** diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java new file mode 100644 index 0000000..3eb8c11 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * A parsing variant is a list of tokens representing one possible parsing variant of the user input. + * <p> + * Note that a given user input can have one or more possible different parsing variants. Depending on model + * configuration a user input can produce hundreds or even thousands of parsing variants. + * + * @see NCModel#onParsedVariant(NCVariant) + * @see NCContext#getVariants() + */ +public interface NCVariant extends List<NCToken>, NCMetadata { + /** + * Utility method that returns all non-freeword tokens. It's equivalent to: + * <pre class="brush: java"> + * return stream().filter(tok -> !tok.isFreeWord() && !tok.isStopWord()).collect(Collectors.toList()); + * </pre> + * + * @return All non-freeword tokens. + * @see NCToken#isFreeWord() + */ + default List<NCToken> getMatchedTokens() { + return stream().filter(tok -> !tok.isFreeWord() && !tok.isStopWord()).collect(Collectors.toList()); + } + + /** + * Utility method that returns all freeword tokens. It's equivalent to: + * <pre class="brush: java"> + * return stream().filter(NCToken::isFreeWord).collect(Collectors.toList()); + * </pre> + * + * @return All freeword tokens. + * @see NCToken#isFreeWord() + */ + default List<NCToken> getFreeTokens() { + return stream().filter(NCToken::isFreeWord).collect(Collectors.toList()); + } + + /** + * Utility method that returns all stop word tokens. It's equivalent to: + * <pre class="brush: java"> + * return stream().filter(NCToken::isStopWord).collect(Collectors.toList()); + * </pre> + * + * @return All stop word tokens. + * @see NCToken#isAbstract() + */ + default List<NCToken> getStopWordTokens() { + return stream().filter(NCToken::isStopWord).collect(Collectors.toList()); + } + + /** + * Utility method that returns all user-defined tokens. It's equivalent to: + * <pre class="brush: java"> + * return stream().filter(NCToken::isUserDefined).collect(Collectors.toList()); + * </pre> + * + * @return All user-defined tokens. + * @see NCToken#isUserDefined() + */ + default List<NCToken> getUserDefinedTokens() { + // TODO: Why is it dropped? + //return stream().filter(NCToken::isUserDefined).collect(Collectors.toList()); + return null; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelBuilder.java new file mode 100644 index 0000000..73b7095 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCModelBuilder.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.builders; + +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCModelBehaviour; +import org.apache.nlpcraft.model.nlp.NCNlpNerParser; +import org.apache.nlpcraft.model.nlp.NCNlpTextParser; +import org.apache.nlpcraft.model.nlp.NCNlpWord; + +import java.io.File; +import java.net.URL; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; + +// Mandatory withOnContext or any of withIntentsXXX methods. +// All other - optional. +public class NCModelBuilder { + // 1. Common properties. + public NCModelBuilder withId(String description) { + return null; + } + public NCModelBuilder withName(String description) { + return null; + } + public NCModelBuilder withDescription(String description) { + return null; + } + public NCModelBuilder withOrigin(String origin) { + return null; + } + public NCModelBuilder withMaxUnknownWords(int maxUnknownWords) { + return null; + } + public NCModelBuilder withMaxFreeWords(int maxFreeWords) { + return null; + } + public NCModelBuilder withMaxSuspiciousWords(int maxSuspiciousWords) { + return null; + } + public NCModelBuilder withMinWords(int minWords) { + return null; + } + public NCModelBuilder withMaxWords(int maxWords) { + return null; + } + public NCModelBuilder withMinTokens(int minTokens) { + return null; + } + public NCModelBuilder withMaxTokens(int maxTokens) { + return null; + } + public NCModelBuilder withMinNonStopwords(int minNonStopwords) { + return null; + } + public NCModelBuilder withSwearWordsAllowed(boolean swearWordsAllowed) { + return null; + } + public NCModelBuilder withNoNounsAllowed(boolean noNounsAllowed) { + return null; + } + // TODO? do we need it? + public NCModelBuilder withNoUserTokensAllowed(boolean noUserTokensAllowed) { + return null; + } + public NCModelBuilder withConversationTimeout(long conversationTimeout) { return null; } + public NCModelBuilder withConversationDepth(int conversationDepth) { return null; } + public NCModelBuilder withMetadata(Map<String, Object> meta) { + return null; + } + + // 2. Words. + public NCModelBuilder withAdditionalStopWords(Set<String> additionalStopWords) { + return null; + } + public NCModelBuilder withExcludedStopWords(Set<String> excludedStopWords) { + return null; + } + public NCModelBuilder withSuspiciousWords(Set<String> suspiciousWords) { + return null; + } + + // TODO: 3 custom words - discuss it. + public NCModelBuilder withStopWordsFilter(Function<List<NCNlpWord>, List<NCNlpWord>> filter) { + return null; + } + public NCModelBuilder withSwearWordsFilter(Function<List<NCNlpWord>, List<NCNlpWord>> filter) { + return null; + } + public NCModelBuilder withSuspiciousWordsFilter(Function<List<NCNlpWord>, List<NCNlpWord>> filter) { + return null; + } + + // 3. Base Nlp parser (open nlp, standord) + public NCModelBuilder withNlpWordsParser(NCNlpTextParser parser) { + return null; + } + + // 4. NER parsers (open nlp, standord, our one built parser NCDefaultNerParser + any custom) + public NCModelBuilder withNlpNerParsers(List<NCNlpNerParser> parsers) { + return null; + } + + // 5. Intents related methods: + // model class by default + for static methods of given classes. + // Scanned for NCIntent, NCIntentRef, NCIntentSample, NCIntentSampleRef + public NCModelBuilder withIntentsClasses(List<Class<?>> classes) { + return null; + } + + // model class by default + for methods of given instances. + // Scanned for NCIntent, NCIntentRef, NCIntentSample, NCIntentSampleRef + public NCModelBuilder withIntentsObjects(List<Object> objs) { + return null; + } + + // Manually defined intents. + public NCModelBuilder withIntents(List<String> objs) { + return null; + } + public NCModelBuilder withIntentsFromFiles(List<File> objs) { + return null; + } + public NCModelBuilder withIntentsFromUrls(List<URL> objs) { + return null; + } + public NCModelBuilder withIntentsSamplesMap(Map<String, List<List<String>>> map) { + return null; + } + + + // 6. Behaviour + public NCModelBuilder withModelBehaviour(NCModelBehaviour behaviour) { + return null; + } + + public NCModel getModel() { + return null; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCResultBuilder.java similarity index 66% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCResultBuilder.java index 1ccd742..30cfc91 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/builders/NCResultBuilder.java @@ -15,22 +15,19 @@ * limitations under the License. */ -package org.apache.nlpcraft.model.nlp; +package org.apache.nlpcraft.model.builders; -import org.apache.nlpcraft.model.nlp.NCNlpWord; +import org.apache.nlpcraft.model.NCResult; -/** - * Extended word data, enriched by NLP. - * It is argument for NCNlpNerTokensParser. - */ -public interface NCNlpRichWord extends NCNlpWord { - boolean isStopWord(); - - boolean isBracketed(); - - boolean isQuoted(); - - boolean isKnownWord(); +// Can be refactored after any server (spring, akka etc) implementation. +// Maybe some 'result type' will be added again etc. +public class NCResultBuilder { + // TODO: implement it. + public NCResultBuilder withBody(Object o) { + return null; + } - boolean isSwearWord(); + public NCResult getResult() { + return null; + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerElement.java new file mode 100644 index 0000000..cb7912d --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerElement.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.impl.ner; + +import org.apache.nlpcraft.model.NCConversation; +import org.apache.nlpcraft.model.NCMetadata; +import org.apache.nlpcraft.model.NCToken; +import org.apache.nlpcraft.model.NCValue; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Data model element. + * <p> + * Data model element defines a named entity that will be detected in the user input. A model element + * typically is one or more individual words that have a consistent semantic meaning and typically denote + * a real-world object, such as persons, locations, number, date and time, organizations, products, etc. + * Such object can be abstract or have a physical existence. + * <p> + * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/data-model.html">Data Model</a> section and review + * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. + * + * @see NCCustomParser + */ +public interface NCDefaultNerElement extends NCMetadata { + /** + * Gets unique ID of this element. + * <p> + * This unique ID should be human-readable for simpler debugging and testing of the model. + * Although element ID could be any arbitrary string it is highly recommended having + * element ID as a lower case string starting with some model prefix, followed by colon and + * then the element's name. For example, some built-in NLPCraft IDs are: <code>nlpcraft:date</code>, + * <code>nlpcraft:city</code>. + * <p> + * Few important notes: + * <ul> + * <li>Element IDs starting with <code>nlpcraft:</code> are reserved for built-in NLPCraft IDs.</li> + * <li> + * Element ID is an implicit synonym for that element. + * Thus element ID can be used in the user input directly to clearly + * disambiguate the element in the input sentence instead of relying on synonyms or other + * ways of detection. + * </li> + * </ul> + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>id</code> property: + * <pre class="brush: js, highlight: [3]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ] + * } + * ] + * </pre> + * + * @see NCToken#getId() + * @return Unique ID of this element. + */ + String getId(); + + /** + * Gets the list of groups this element belongs to. + * <p> + * Model element can belong to one or more groups. By default, the element belongs to a single group whose group + * ID is equal to its {@link #getId() ID}. The proper grouping of the model elements is required for operation + * of Short-Term-Memory (STM) in {@link NCConversation conversation} (if and when conversation + * is used). Specifically, a token (i.e. found model element) that is part of the group set will override + * other tokens from the same set or its superset. In other words, tokens with a smaller group set + * (more specific token) will override the tokens from a larger group set (more generic tokens). + * <p> + * Note that built-in tokens (including from 3rd party token providers) belong to a single group whose group + * ID is equal to their IDs. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>groups</code> property: + * <pre class="brush: js, highlight: [5]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "groups": ["group1", "group2"] + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ] + * } + * ] + * </pre> + * + * @return List of groups this element belongs to. By default, the model element belongs to one group + * with ID equal to the element {@link #getId() ID}. + * @see NCConversation + * @see #getId() + */ + default List<String> getGroups() { + return Collections.singletonList(getId()); + } + + /** + * Shortcut method to test if this element is a member of given group. It is equivalent to: + * <pre class="brush: java"> + * return getGroups().contains(grp); + * </pre> + * + * @param grp Token group to test. + * @return {@code True} if this element belongs to the given group, {@code false} otherwise. + */ + default boolean isMemberOf(String grp) { + return getGroups().contains(grp); + } + + /** + * Gets optional user-defined element's metadata. When a {@link NCToken token} for this element + * is detected in the input this metadata is merged into {@link NCToken#getMetadata()} method returned metadata. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>description</code> property: + * <pre class="brush: js, highlight: [8,9,10,11,12]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ], + * "metadata": { + * "str": "val1", + * "num": 100, + * "bool": false + * } + * } + * ] + * </pre> + * + * @return Element's metadata or empty collection if none provided. Default implementation return empty collection. + */ + default Map<String, Object> getMetadata() { + return Collections.emptyMap(); + } + + /** + * Gets optional element description. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>description</code> property: + * <pre class="brush: js, highlight: [4]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ] + * } + * ] + * </pre> + * + * @return Optional element description. Default implementation returns {@code null}. + */ + default String getDescription() { + return null; + } + + /** + * Gets optional map of {@link NCValue values} for this element. + * <p> + * Each element can generally be recognized either by one of its synonyms or values. Elements and their values + * are analogous to types and instances of that type in programming languages. Each value + * has a name and optional set of its own synonyms by which that value, and ultimately its element, can be + * recognized by. Note that value name itself acts as an implicit synonym even when no additional synonyms added + * for that value. + * <p> + * Consider this example. A model element {@code x:car} can have: + * <ul> + * <li> + * Set of general synonyms: + * <code>{transportation|transport|_} {vehicle|car|sedan|auto|automobile|suv|crossover|coupe|truck}</code> + * </li> + * <li>Set of values: + * <ul> + * <li>{@code mercedes} with synonyms {@code (mercedes, mercedes-benz, mb, benz)}</li> + * <li>{@code bmw} with synonyms {@code (bmw, bimmer)}</li> + * <li>{@code chevrolet} with synonyms {@code (chevy, chevrolet)}</li> + * </ul> + * </li> + * </ul> + * With that setup {@code x:car} element will be recognized by any of the following input sub-string: + * <ul> + * <li>{@code transport car}</li> + * <li>{@code benz}</li> + * <li>{@code automobile}</li> + * <li>{@code transport vehicle}</li> + * <li>{@code sedan}</li> + * <li>{@code chevy}</li> + * <li>{@code bimmer}</li> + * <li>{@code x:car}</li> + * </ul> + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>values</code> property: + * <pre class="brush: js, highlight: [8,9,10,11,12,13]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ], + * "values": [ + * { + * "name": "name1", + * "synonyms": ["syn1", "syn2"] + * } + * ] + * } + * ] + * </pre> + * + * @return Map of value's name and its synonyms or {@code null} if not defined. + */ + default List<NCValue> getValues() { + return Collections.emptyList(); + } + + /** + * Gets optional ID of the immediate parent element. Parent ID allows model elements to form into hierarchy. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>parentId</code> property: + * <pre class="brush: js, highlight: [5]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "parentId": "parent", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ] + * } + * ] + * </pre> + * + * @return Optional parent element ID, or {@code null} if not specified. Default implementation returns + * {@code null}. + */ + default String getParentId() { + return null; + } + + /** + * Gets the list of synonyms by which this model element will be recognized by. Read more about + * many forms of synonyms in <a target=_ href="https://nlpcraft.apache.org/data-model.html">Data Model</a> section + * and review <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>synonyms</code> property: + * <pre class="brush: js, highlight: [5,6,7]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ] + * } + * ] + * </pre> + * + * @return List of synonyms for this element. List is generally optional since element's ID acts + * as an implicit synonym. Default implementation returns an empty list. + */ + default List<String> getSynonyms() { + return Collections.emptyList(); + } + + /** + * Gets optional dynamic value loader. This loader will be used additionally to any + * values defined in {@link #getValues()} method. Default implementation returns {@code null}. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>valueLoader</code> property with value + * of a fully qualified class name implementing {@link NCDefaultNervalueLoader} interface. Note that + * only one instance of the value loader will be created per model and given class name: + * <pre class="brush: js, highlight: [14]"> + * "elements": [ + * { + * "id": "phone:act", + * "description": "Phone action.", + * "synonyms": [ + * "{give|_} {call|phone|ring|dial|dial up|ping|contact}" + * ], + * "values": [ + * { + * "name": "name1", + * "synonyms": ["syn1", "syn2"] + * } + * ], + * "valueLoader": "my.package.ValueLoader" + * } + * ] + * </pre> + * + * @return Optional instance of dynamic value loader. + */ + default Optional<NCDefaultNervalueLoader> getValueLoader() { + return Optional.empty(); + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerParser.java new file mode 100644 index 0000000..8fe00c6 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNerParser.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.impl.ner; + +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.nlp.NCNlpNerParser; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +// TODO: maybe class. +public interface NCDefaultNerParser extends NCNlpNerParser { + + /** + * Default value for {@link #getMaxElementSynonyms()} method. + */ + int DFLT_MAX_ELEMENT_SYNONYMS = 1000; + + /** + * Default value for {@link #getMaxTotalSynonyms()} method. + */ + int DFLT_MAX_TOTAL_SYNONYMS = Integer.MAX_VALUE; + + /** + * Default value for {@link #isMaxSynonymsThresholdError()} method. + */ + boolean DFLT_MAX_SYNONYMS_THRESHOLD_ERROR = false; + + /** + * Default value for {@link #isDupSynonymsAllowed()} method. + */ + boolean DFLT_IS_DUP_SYNONYMS_ALLOWED = true; + + /** + * Minimum value for {@link #getMaxElementSynonyms()} method. + */ + long MAX_SYN_MIN = 1L; + + /** + * Maximum value for {@link #getMaxElementSynonyms()} method. + */ + long MAX_SYN_MAX = Long.MAX_VALUE; + + /** + * Gets a set of model elements or named entities. Model can have zero or more user defined elements. + * <p> + * An element is the main building block of the model. Data model element defines a named entity + * that will be automatically recognized in the user input. See also {@link NCModel#getParsers()} method on how + * to provide programmatic named entity recognizer (NER) implementations. + * <p> + * Note that unless model elements are loaded dynamically it is highly recommended declaring model + * elements in the external JSON/YAML model configuration (under <code>elements</code> property): + * <pre class="brush: js"> + * { + * "elements": [ + * { + * "id": "wt:hist", + * "synonyms": [ + * "{<WEATHER>|_} <HISTORY>", + * "<HISTORY> {<OF>|_} <WEATHER>" + * ], + * "description": "Past weather conditions." + * } + * ] + * } + * </pre> + * + * @return Set of model elements, potentially empty. + * @see NCModel#getParsers() + */ + default Set<NCDefaultNerElement> getElements() { + return Collections.emptySet(); + } + + /** + * Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or throwing an exception. + * Note that throwing exception will prevent data probe from starting. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_SYNONYMS_THRESHOLD_ERROR} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxSynonymThresholdError</code> property: + * <pre class="brush: js"> + * { + * "maxSynonymThresholdError": true + * } + * </pre> + * + * @return Whether exceeding {@link #getMaxElementSynonyms()} will trigger a warning log or + * throwing an exception. + * @see #getMaxElementSynonyms() + */ + default boolean isMaxSynonymsThresholdError() { + return DFLT_MAX_SYNONYMS_THRESHOLD_ERROR; + } + + /** + * Gets maximum number of unique synonyms per model element after which either warning or error will be + * triggered. Note that there is no technical limit on how many synonyms a model element can have apart + * from memory consumption and performance considerations. However, in cases where synonyms are auto-generated + * (i.e. from database) this property can serve as a courtesy notification that a model element has too many + * synonyms. Also, in general, too many synonyms can potentially lead to a performance degradation. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_ELEMENT_SYNONYMS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxSynonymThreshold</code> property: + * <pre class="brush: js"> + * { + * "maxSynonymThreshold": 1000 + * } + * </pre> + * + * @return Maximum number of unique synonyms per model element after which either warning or + * error will be triggered. + * @see #isMaxSynonymsThresholdError() + * @see #getMaxTotalSynonyms() + */ + default int getMaxElementSynonyms() { + return DFLT_MAX_ELEMENT_SYNONYMS; + } + + /** + * Total number of synonyms allowed per model. Model won't deploy if total number of synonyms exceeds this + * number. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_MAX_TOTAL_SYNONYMS} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>maxTotalSynonyms</code> property: + * <pre class="brush: js"> + * { + * "maxTotalSynonyms": true + * } + * </pre> + * + * @return Total number of synonyms allowed per model. + * @see #getMaxElementSynonyms() + */ + default int getMaxTotalSynonyms() { + return DFLT_MAX_TOTAL_SYNONYMS; + } + + + /** + * Whether duplicate synonyms are allowed. If {@code true} - the model will pick the random + * model element when multiple elements found due to duplicate synonyms. If {@code false} - model + * will print error message and will not deploy. + * <p> + * <b>Default</b> + * <br> + * If not provided by the model the default value {@link #DFLT_IS_DUP_SYNONYMS_ALLOWED} will be used. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>dupSynonymsAllowed</code> property: + * <pre class="brush: js"> + * { + * "dupSynonymsAllowed": true + * } + * </pre> + * + * @return Whether to allow duplicate synonyms. + */ + default boolean isDupSynonymsAllowed() { + return DFLT_IS_DUP_SYNONYMS_ALLOWED; + } + + + /** + * Gets an optional map of macros to be used in this model. Macros and option groups are instrumental + * in defining model's elements. See {@link NCDefaultNerElement} for documentation on macros. + * <p> + * <b>JSON</b> + * <br> + * If using JSON/YAML model presentation this is set by <code>macros</code> property: + * <pre class="brush: js"> + * { + * "macros": [ + * { + * "name": "<OF>", + * "macro": "{of|for|per}" + * }, + * { + * "name": "<CUR>", + * "macro": "{current|present|moment|now}" + * } + * ] + * } + * </pre> + * + * @return Potentially empty map of macros. + */ + default Map<String, String> getMacros() { + return Collections.emptyMap(); + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNervalueLoader.java similarity index 90% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNervalueLoader.java index ed4bcdd..90230e4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/NCDefaultNervalueLoader.java @@ -15,7 +15,9 @@ * limitations under the License. */ -package org.apache.nlpcraft.model; +package org.apache.nlpcraft.model.impl.ner; + +import org.apache.nlpcraft.model.NCValue; import java.util.Set; @@ -32,7 +34,7 @@ import java.util.Set; * keeping the rest of the model declaration static (i.e. in JSON/YAML). To accomplish this you can * define <code>valueLoader</code> property and provide a fully qualified class name that implements * this interface. During the model instantiation an instance of that class will be created once per - * each model and class of loader and method {@link #load(NCElement)} will be called to load + * each model and class of loader and method {@link #load(NCDefaultNerElement)} will be called to load * element's values. Note that you can use both statically defined values (i.e. <code>values</code> property) * and dynamically loaded values together and they will be merged: * <pre class="brush: js, highlight: [11]"> @@ -51,12 +53,12 @@ import java.util.Set; * ] * </pre> */ -public interface NCValueLoader { +public interface NCDefaultNervalueLoader { /** * Loads values for given model element. * * @param owner Model element to which this value loader belongs to. * @return Set of values, potentially empty but never {@code null}. */ - Set<NCValue> load(NCElement owner); + Set<NCValue> load(NCDefaultNerElement owner); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerElementBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerElementBuilder.java new file mode 100644 index 0000000..b20af43 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerElementBuilder.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.impl.ner.builders; + +import org.apache.nlpcraft.model.NCValue; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNervalueLoader; + +import java.util.List; +import java.util.Map; + +// withId is only one mandatory call. +// It is named NCSingleElementBuilder to have different name with NCMultiElementsBuilder. +public class NCDefaultNerElementBuilder { + public NCDefaultNerElementBuilder withId(String id) { + return null; + } + public NCDefaultNerElementBuilder withParentId(String id) { + return null; + } + public NCDefaultNerElementBuilder withGroups(String... groups) { + return null; + } + public NCDefaultNerElementBuilder withMetadata(Map<String, Object> meta) { + return null; + } + public NCDefaultNerElementBuilder withDescrition(String desc) { + return null; + } + public NCDefaultNerElementBuilder withValues(List<NCValue> values) { + return null; + } + public NCDefaultNerElementBuilder withValueLoader(NCDefaultNervalueLoader loader) { + return null; + } + public NCDefaultNerElementBuilder withSynonyms(List<String> syns) { + return null; + } + + public NCDefaultNerElement getElement() { + return null; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerParserBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerParserBuilder.java new file mode 100644 index 0000000..c236dd6 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/ner/builders/NCDefaultNerParserBuilder.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.impl.ner.builders; + +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerParser; + +import java.io.File; +import java.util.List; +import java.util.Map; + +public class NCDefaultNerParserBuilder { + public NCDefaultNerParserBuilder withMaxElementSynonyms(int maxElementSynonyms) { return null; } + public NCDefaultNerParserBuilder withMaxTotalSynonyms(int maxTotalSynonyms) { + return null; + } + public NCDefaultNerParserBuilder withMaxSynonymsThresholdError(boolean maxSynonymsThresholdError) { return null; } + public NCDefaultNerParserBuilder withDupSynonymsAllowed(boolean dupSynonymsAllowed) { + return null; + } + + public NCDefaultNerParserBuilder withMacros(Map<String, String> macros) { + return null; + } + + // 2 alterabatives. + public NCDefaultNerParserBuilder withElements(List<NCDefaultNerElement> elements) { + return null; + } + public NCDefaultNerParserBuilder withElements(File file) { + return null; + } + + public NCDefaultNerParser getNlpcraftNerParser() { + return null; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpNerParser.java similarity index 60% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpNerParser.java index f065cdc..61fd8b1 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpNerParser.java @@ -15,21 +15,21 @@ * limitations under the License. */ -package org.apache.nlpcraft.model.nlp; +package org.apache.nlpcraft.model.impl.opennlp; import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCModelView; import org.apache.nlpcraft.model.NCRequest; +import org.apache.nlpcraft.model.nlp.NCNlpNerParser; +import org.apache.nlpcraft.model.nlp.NCNlpNerToken; +import org.apache.nlpcraft.model.nlp.NCNlpRichWord; -import java.util.*; +import java.util.List; -/** - * OnenNlp implementation - provided (DATE etc) - * Stanford implementation - separated module. - * User implementations can be provided too. - * - * Order of configured NCNlpNerTokensParser elements is important. - * Only one parsers iteration called. - */ -public interface NCNlpNerTokensParser { - List<NCNlpNerToken> parse(NCRequest req, NCModel mdl, List<NCNlpRichWord> words, List<NCNlpNerToken> elements); +// Implementation by default. Stanford in another module. Can be provided by user. +public class NCOpenNlpNerParser implements NCNlpNerParser { + @Override + public List<NCNlpNerToken> parse(NCRequest req, NCModelView mdl, List<NCNlpRichWord> words, List<NCNlpNerToken> elements) { + return null; + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpWordsParser.java similarity index 62% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpWordsParser.java index f065cdc..6c193a1 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/opennlp/NCOpenNlpWordsParser.java @@ -15,21 +15,18 @@ * limitations under the License. */ -package org.apache.nlpcraft.model.nlp; +package org.apache.nlpcraft.model.impl.opennlp; -import org.apache.nlpcraft.model.NCModel; import org.apache.nlpcraft.model.NCRequest; +import org.apache.nlpcraft.model.nlp.NCNlpTextParser; +import org.apache.nlpcraft.model.nlp.NCNlpWord; -import java.util.*; +import java.util.List; -/** - * OnenNlp implementation - provided (DATE etc) - * Stanford implementation - separated module. - * User implementations can be provided too. - * - * Order of configured NCNlpNerTokensParser elements is important. - * Only one parsers iteration called. - */ -public interface NCNlpNerTokensParser { - List<NCNlpNerToken> parse(NCRequest req, NCModel mdl, List<NCNlpRichWord> words, List<NCNlpNerToken> elements); +// Implementation by default. Stanford in another module. Can be provided by user. +public class NCOpenNlpWordsParser implements NCNlpTextParser { + @Override + public List<NCNlpWord> parse(NCRequest req) { + return null; + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerParser.java new file mode 100644 index 0000000..c8c0e1b --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerParser.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.nlp; + +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCModelView; +import org.apache.nlpcraft.model.NCRequest; + +import java.util.List; + +/** + * TODO: + * OnenNlp implementation - provided (DATE etc) + * Stanford implementation - separated module. + * User implementations can be provided too. + * + * Order of configured NCNlpNerTokensParser elements is important. + * Only one parsers iteration called. + */ + +/** + * Custom model element parser for programmatic NER implementations. This parser allows to define your own + * Named Entity Recognizer (NER) implementation in cases when the standard declarative methods are not expressive + * enough. Instance of the parser should be made available in the model via {@link NCModel#getParsers()} method. + * <p> + * By default, the data model detects its elements by their declarative synonyms. However, + * in some cases this is not expressive enough. In such cases, one or more user-defined parsers can be defined + * for the model that would allow the user to define its own NER logic to detect the model elements in the user + * input programmatically. Note that there can be multiple custom parsers per model and each one can detect + * any number of model elements. + * + * @see NCModel#getParsers() + */ + +public interface NCNlpNerParser { + /** + * Analyses user input provided as a list of {@link NCCustomWord} objects and returns a list + * of {@link NCCustomElement} objects. Note that model elements returned from this method must + * be defined in the model, i.e. this method only provides an additional logic of detecting these + * elements, but they still need to be defined normally in the model. + * + * @param req User request descriptor. + * @param mdl Instance of data model this parser belongs to. + * @param words Entire user input represented as a list of custom words. + * @param toks List of already parsed and detected model elements at the point of this call. + * @return List of custom elements. List can be empty or {@code null} if no model elements detected. + * @see NCModel#getParsers() + */ + List<NCNlpNerToken> parse(NCRequest req, NCModelView mdl, List<NCNlpRichWord> words, List<NCNlpNerToken> toks); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java index 57d9d07..3ace535 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java @@ -18,14 +18,31 @@ package org.apache.nlpcraft.model.nlp; import org.apache.nlpcraft.model.NCMetadata; +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCModelView; +import org.apache.nlpcraft.model.NCRequest; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; -import java.util.*; +import java.util.List; // NCNlpNerTokensParser parsing result. public interface NCNlpNerToken extends NCMetadata { + /** + * Gets ID of the detected model element. Note that it <b>must correspond</b> to one of the elements + * defined in the model. In other words, the parser doesn't define a new model element but rather + * references the element that's already defined in the model. + * + * @return ID of the detected model element. + * @see NCDefaultNerElement#getId() + * @see NCModel#getElements() + */ String getId(); + /** + * Gets a list of NLP custom words that matched detected model element. These must be the same custom words + * that were originally passed to {@link NCCustomParser#parse(NCRequest, NCModelView, List, List)} method. + * + * @return List of NLP custom words that comprise detected custom model element. + */ List<NCNlpRichWord> getWords(); - - Map<String, Object> getMetadata(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java index 1ccd742..b32397d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java @@ -17,20 +17,14 @@ package org.apache.nlpcraft.model.nlp; -import org.apache.nlpcraft.model.nlp.NCNlpWord; - /** * Extended word data, enriched by NLP. * It is argument for NCNlpNerTokensParser. */ public interface NCNlpRichWord extends NCNlpWord { boolean isStopWord(); - boolean isBracketed(); - boolean isQuoted(); - boolean isKnownWord(); - boolean isSwearWord(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java index a63cab9..6bf941f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nlpcraft.model.nlp; import org.apache.nlpcraft.model.NCRequest; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java index 511553a..dca1f5d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nlpcraft.model.nlp; // Initial parsing result, look at NCNlpWordsParser. diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java new file mode 100644 index 0000000..fc20dc7 --- /dev/null +++ b/nlpcraft/src/test/java/org/apache/nlpcraft/NCSpec.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft; + +import org.apache.nlpcraft.model.NCIntentMatch; +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCModelBehaviour; +import org.apache.nlpcraft.model.NCRejection; +import org.apache.nlpcraft.model.NCResult; +import org.apache.nlpcraft.model.NCValue; +import org.apache.nlpcraft.model.builders.NCModelBuilder; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerElement; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNerParser; +import org.apache.nlpcraft.model.impl.ner.NCDefaultNervalueLoader; +import org.apache.nlpcraft.model.impl.ner.builders.NCDefaultNerElementBuilder; +import org.apache.nlpcraft.model.impl.ner.builders.NCDefaultNerParserBuilder; +import org.apache.nlpcraft.model.impl.opennlp.NCOpenNlpNerParser; +import org.apache.nlpcraft.model.impl.opennlp.NCOpenNlpWordsParser; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.net.URL; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +import org.apache.nlpcraft.model.NCIntentRef; + +public class NCSpec { + private static class SomeClassWithIntents { + @NCIntentRef("remove:waypoint") + public void x() { + + } + } + + @Test + public void test() throws Exception { + NCDefaultNerParser ner1 = + new NCDefaultNerParserBuilder(). + withElements(new File("JSON.json")). + withMaxTotalSynonyms(30). + getNlpcraftNerParser(); + + NCDefaultNerParser ner2 = + new NCDefaultNerParserBuilder(). + withMacros(new HashMap<>() { { put("<ACTION>", "{turn|switch|dial|let|set|get|put}"); } }). + withElements( + Arrays.asList( + new NCDefaultNerElementBuilder(). + withId("elementID1"). + withSynonyms(Arrays.asList("<ACTION> {on|up|_}", "<ACTION> qq")). + getElement(), + new NCDefaultNerElementBuilder(). + withId("elementID2"). + withValueLoader(new NCDefaultNervalueLoader() { + @Override + public Set<NCValue> load(NCDefaultNerElement owner) { + return null; + } + }). + getElement() + ) + ). + getNlpcraftNerParser(); + + + NCModel mdl = + new NCModelBuilder(). + // Common. + withId("modleId"). + withName("name"). + withSwearWordsAllowed(true). + // Stopwords etc. + withAdditionalStopWords(new HashSet<>(Arrays.asList("x1", "x2"))). + // Nlp parser. + withNlpWordsParser(new NCOpenNlpWordsParser()). + // NERs. + withNlpNerParsers(Arrays.asList(new NCOpenNlpNerParser(), ner1, ner2)). + // Intents. + withIntentsFromUrls(Collections.singletonList(new URL("http://urls.com"))). + withIntentsClasses(Collections.singletonList(SomeClassWithIntents.class)). + // You can set link on this if prepare model without builder. + withIntentsObjects(Collections.singletonList(new SomeClassWithIntents())). + withIntentsSamplesMap( + new HashMap<>() { { put("intent1", Arrays.asList(Arrays.asList("sample1", "sample2"))); } } + ). + // Model behaviour. + withModelBehaviour( + new NCModelBehaviour() { + @Override + public NCResult onRejection(NCIntentMatch ctx, NCRejection e) { + return null; + } + } + ).getModel(); + + mdl.start(); + + NCNlpcraft nlp = new NCNlpcraftBuilder().withModel(mdl).getNCNlpcraft(); + + String reqId = nlp.ask("weather today"); + + nlp.cancel(reqId); + + mdl.stop(); + } +}
