This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch master-model in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 4d972b901af2759151028cc560a54efd444c837e Author: Sergey Kamov <[email protected]> AuthorDate: Thu Oct 7 14:06:14 2021 +0300 WIP. --- .../model/{NCModel.java => NCCustomElement.java} | 15 +- .../model/{NCModel.java => NCCustomParser.java} | 11 +- .../model/{NCModel.java => NCCustomWord.java} | 36 ++- .../scala/org/apache/nlpcraft/model/NCElement.java | 57 +++++ .../scala/org/apache/nlpcraft/model/NCModel.java | 12 +- .../org/apache/nlpcraft/model/NCModelConfig.java | 246 +++++++++++++++++++++ .../model/{NCModel.java => NCRequest.java} | 22 +- .../scala/org/apache/nlpcraft/model/NCValue.java | 48 ++++ .../org/apache/nlpcraft/model/NCValueLoader.java | 62 ++++++ 9 files changed, 483 insertions(+), 26 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomElement.java similarity index 77% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomElement.java index 45a0a06..102d889 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomElement.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,9 +17,12 @@ package org.apache.nlpcraft.model; -/** - * - */ -public interface NCModel { - // TODO +import java.util.*; + +public interface NCCustomElement extends NCMetadata { + String getElementId(); + + List<NCCustomWord> getWords(); + + Map<String, Object> getMetadata(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java similarity index 75% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java index 45a0a06..3193fb5 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomParser.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,9 +17,8 @@ package org.apache.nlpcraft.model; -/** - * - */ -public interface NCModel { - // TODO +import java.util.*; + +public interface NCCustomParser extends NCLifecycle { + List<NCCustomElement> parse(NCRequest req, NCModelConfig mdl, List<NCCustomWord> words, List<NCCustomElement> elements); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomWord.java similarity index 62% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomWord.java index 45a0a06..8806f98 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCCustomWord.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,9 +17,33 @@ package org.apache.nlpcraft.model; -/** - * - */ -public interface NCModel { - // TODO +public interface NCCustomWord { + String getNormalizedText(); + + String getOriginalText(); + + int getStartCharIndex(); + + int getEndCharIndex(); + + String getPos(); + + String getPosDescription(); + + String getLemma(); + + String getStem(); + + boolean isStopWord(); + + boolean isBracketed(); + + boolean isQuoted(); + + boolean isKnownWord(); + + boolean isSwearWord(); + + // TODO: + // boolean isEnglish(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java new file mode 100644 index 0000000..d21d52a --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.io.*; +import java.util.*; + +public interface NCElement extends NCMetadata, Serializable { + String getId(); + + default List<String> getGroups() { + return Collections.singletonList(getId()); + } + + default boolean isMemberOf(String grp) { + return getGroups().contains(grp); + } + + default Map<String, Object> getMetadata() { + return Collections.emptyMap(); + } + + default String getDescription() { + return null; + } + + default List<NCValue> getValues() { + return Collections.emptyList(); + } + + default String getParentId() { + return null; + } + + default List<String> getSynonyms() { + return Collections.emptyList(); + } + + default Optional<NCValueLoader> getValueLoader() { + return Optional.empty(); + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java index 45a0a06..c10db04 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java @@ -21,5 +21,15 @@ package org.apache.nlpcraft.model; * */ public interface NCModel { - // TODO + String ask(String mdlId, String txt, Map<String, Object> data, boolean enableLog, String userId); + + NCResult askSync(String mdlId, String txt, Map<String, Object> data, boolean enableLog, String userId); + + void cancel(Set<String> srvReqIds, Long usrId, String usrExtId); + + List<NCResult> check(Set<String> srvReqIds, Integer maxRows, String usrId); + + void clearConversation(String mdlId, String String); + + void clearDialog(String mdlId, String String); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java new file mode 100644 index 0000000..ca91c22 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelConfig.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.time.Duration; +import java.util.*; + +public interface NCModelConfig extends NCMetadata { + long CONV_TIMEOUT_MIN = 0L; + + long CONV_TIMEOUT_MAX = Long.MAX_VALUE; + + long MAX_UNKNOWN_WORDS_MIN = 0L; + + long MAX_UNKNOWN_WORDS_MAX = Long.MAX_VALUE; + + long MAX_FREE_WORDS_MIN = 0L; + + long MAX_FREE_WORDS_MAX = Long.MAX_VALUE; + + long MAX_SUSPICIOUS_WORDS_MIN = 0L; + + long MAX_SUSPICIOUS_WORDS_MAX = Long.MAX_VALUE; + + long MIN_WORDS_MIN = 1L; + + long MIN_WORDS_MAX = Long.MAX_VALUE; + + long MIN_NON_STOPWORDS_MIN = 0L; + + long MIN_NON_STOPWORDS_MAX = Long.MAX_VALUE; + + long MIN_TOKENS_MIN = 0L; + + long MIN_TOKENS_MAX = Long.MAX_VALUE; + + long MAX_TOKENS_MIN = 0L; + + long MAX_TOKENS_MAX = 100L; + + long MAX_WORDS_MIN = 1L; + + long MAX_WORDS_MAX = 100L; + + long MAX_SYN_MIN = 1L; + + long MAX_SYN_MAX = Long.MAX_VALUE; + + long CONV_DEPTH_MIN = 1L; + + long CONV_DEPTH_MAX = Long.MAX_VALUE; + + int MODEL_ID_MAXLEN = 32; + + int MODEL_NAME_MAXLEN = 64; + + int MODEL_VERSION_MAXLEN = 16; + + int MODEL_ELEMENT_ID_MAXLEN = 64; + + int DFLT_MAX_ELEMENT_SYNONYMS = 1000; + + int DFLT_MAX_TOTAL_SYNONYMS = Integer.MAX_VALUE; + + boolean DFLT_MAX_SYNONYMS_THRESHOLD_ERROR = false; + + long DFLT_CONV_TIMEOUT_MS = Duration.ofMinutes(60).toMillis(); + + int DFLT_CONV_DEPTH = 3; + + Map<String, Object> DFLT_METADATA = new HashMap<>(); + + int DFLT_MAX_UNKNOWN_WORDS = Integer.MAX_VALUE; + + int DFLT_MAX_FREE_WORDS = Integer.MAX_VALUE; + + int DFLT_MAX_SUSPICIOUS_WORDS = 0; + + int DFLT_MIN_WORDS = 1; + + int DFLT_MAX_WORDS = 50; + + int DFLT_MIN_TOKENS = 0; + + int DFLT_MAX_TOKENS = 50; + + int DFLT_MIN_NON_STOPWORDS = 0; + + boolean DFLT_IS_NON_ENGLISH_ALLOWED = true; + + boolean DFLT_IS_NOT_LATIN_CHARSET_ALLOWED = false; + + boolean DFLT_IS_SWEAR_WORDS_ALLOWED = false; + + boolean DFLT_IS_NO_NOUNS_ALLOWED = true; + + boolean DFLT_IS_PERMUTATE_SYNONYMS = false; + + boolean DFLT_IS_DUP_SYNONYMS_ALLOWED = true; + + boolean DFLT_IS_NO_USER_TOKENS_ALLOWED = true; + + // TODO: add javadoc + boolean DFLT_IS_STOPWORDS_ALLOWED = true; + + String getId(); + + String getName(); + + String getVersion(); + + default String getDescription() { + return null; + } + + default String getOrigin() { + return getClass().getCanonicalName(); + } + + default int getMaxUnknownWords() { + return DFLT_MAX_UNKNOWN_WORDS; + } + + default int getMaxFreeWords() { + return DFLT_MAX_FREE_WORDS; + } + + default int getMaxSuspiciousWords() { + return DFLT_MAX_SUSPICIOUS_WORDS; + } + + default int getMinWords() { + return DFLT_MIN_WORDS; + } + + default int getMaxWords() { + return DFLT_MAX_WORDS; + } + + default int getMinTokens() { + return DFLT_MIN_TOKENS; + } + + default int getMaxTokens() { + return DFLT_MAX_TOKENS; + } + + default int getMinNonStopwords() { + return DFLT_MIN_NON_STOPWORDS; + } + + default boolean isNonEnglishAllowed() { + return DFLT_IS_NON_ENGLISH_ALLOWED; + } + + default boolean isNotLatinCharsetAllowed() { + return DFLT_IS_NOT_LATIN_CHARSET_ALLOWED; + } + + default boolean isSwearWordsAllowed() { + return DFLT_IS_SWEAR_WORDS_ALLOWED; + } + + default boolean isNoNounsAllowed() { + return DFLT_IS_NO_NOUNS_ALLOWED; + } + + default boolean isPermutateSynonyms() { + return DFLT_IS_PERMUTATE_SYNONYMS; + } + + default boolean isDupSynonymsAllowed() { + return DFLT_IS_DUP_SYNONYMS_ALLOWED; + } + + default int getMaxTotalSynonyms() { + return DFLT_MAX_TOTAL_SYNONYMS; + } + + default boolean isNoUserTokensAllowed() { + return DFLT_IS_NO_USER_TOKENS_ALLOWED; + } + + default boolean isSparse() { + return DFLT_IS_SPARSE; + } + + default boolean isGreedy() { + return DFLT_IS_GREEDY; + } + + default Map<String, Object> getMetadata() { + return DFLT_METADATA; + } + + default Set<String> getAdditionalStopWords() { + return Collections.emptySet(); + } + + default Set<String> getExcludedStopWords() { + return Collections.emptySet(); + } + + default Set<String> getSuspiciousWords() { + return Collections.emptySet(); + } + + default Map<String, String> getMacros() { + return Collections.emptyMap(); + } + + default List<NCCustomParser> getParsers() { + return Collections.emptyList(); + } + + default Set<NCElement> getElements() { + return Collections.emptySet(); + } + + default int getMaxElementSynonyms() { return DFLT_MAX_ELEMENT_SYNONYMS; } + + default boolean isMaxSynonymsThresholdError() { return DFLT_MAX_SYNONYMS_THRESHOLD_ERROR; } + + default long getConversationTimeout() { return DFLT_CONV_TIMEOUT_MS; } + + default int getConversationDepth() { return DFLT_CONV_DEPTH; } + + default boolean isStopWordsAllowed() { + return DFLT_IS_STOPWORDS_ALLOWED; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java similarity index 65% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java index 45a0a06..54819c6 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRequest.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,9 +17,17 @@ package org.apache.nlpcraft.model; -/** - * - */ -public interface NCModel { - // TODO -} +import java.util.*; + +public interface NCRequest extends NCMetadata { + // TODO:? + String getUserId(); + String getServerRequestId(); + String getNormalizedText(); + long getReceiveTimestamp(); + Map<String, Object> getRequestData(); + + // NCCompany getCompany(); + // Optional<String> getRemoteAddress(); + // Optional<String> getClientAgent(); +} \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java new file mode 100644 index 0000000..227516c --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.io.Serializable; +import java.util.List; + +/** + * Model element's value. + * <p> + * Each model element can generally be recognized either by one of its synonyms or values. Elements and their values + * are analogous to types and instances of that type in programming languages. Each value + * has a name and optional set of its own synonyms by which that value, and ultimately its element, can be + * recognized by. Note that value name itself acts as an implicit synonym even when no additional synonyms added + * for that value. + * + * @see NCElement#getValues() + */ +public interface NCValue extends Serializable { + /** + * Gets value name. + * + * @return Value name. + */ + String getName(); + + /** + * Gets optional list of value's synonyms. + * + * @return Potentially empty list of value's synonyms. + */ + List<String> getSynonyms(); +} \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java new file mode 100644 index 0000000..ed4bcdd --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.util.Set; + +/** + * Dynamic value loader that can be used by model elements to dynamically load or create their values. Note that + * the primary use case for this interface is the dynamic value loading for the models defines in JSON/YAML + * presentation. However, it's not technically limited to that use case only, and this interface can be + * set programmatically when model elements are created programmatically too. + * <p> + * <b>JSON</b> + * <br> + * When using JSON/YAML model presentation element values can be defined statically. However, in some + * cases, it is required to load these values from external sources like database or REST services while + * keeping the rest of the model declaration static (i.e. in JSON/YAML). To accomplish this you can + * define <code>valueLoader</code> property and provide a fully qualified class name that implements + * this interface. During the model instantiation an instance of that class will be created once per + * each model and class of loader and method {@link #load(NCElement)} will be called to load + * element's values. Note that you can use both statically defined values (i.e. <code>values</code> property) + * and dynamically loaded values together and they will be merged: + * <pre class="brush: js, highlight: [11]"> + * "elements": [ + * { + * "id": "my:id", + * "description": "My description.", + * "values": [ + * { + * "name": "name1", + * "synonyms": ["syn1", "syn2"] + * } + * ], + * "valueLoader": "my.package.MyLoader" + * } + * ] + * </pre> + */ +public interface NCValueLoader { + /** + * Loads values for given model element. + * + * @param owner Model element to which this value loader belongs to. + * @return Set of values, potentially empty but never {@code null}. + */ + Set<NCValue> load(NCElement owner); +}
