This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-468 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 358c25ebcb1d41dfe86748d60365db4f05a2d805 Author: Sergey Kamov <[email protected]> AuthorDate: Mon Oct 11 10:44:14 2021 +0300 WIP. --- .../apache/nlpcraft/model/NCModelAddClasses.java | 55 ------------------- .../apache/nlpcraft/model/NCModelAddPackage.java | 55 ------------------- .../scala/org/apache/nlpcraft/model/NCValue.java | 48 +++++++++++++++++ .../org/apache/nlpcraft/model/NCValueLoader.java | 62 ++++++++++++++++++++++ .../nlp/NCNlpNerToken.java} | 20 ++++--- .../nlp/NCNlpNerTokensParser.java} | 22 ++++++-- .../nlp/NCNlpRichWord.java} | 23 ++++++-- .../apache/nlpcraft/model/nlp/NCNlpTextParser.java | 17 ++++++ .../org/apache/nlpcraft/model/nlp/NCNlpWord.java | 13 +++++ 9 files changed, 189 insertions(+), 126 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddClasses.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddClasses.java deleted file mode 100644 index d8241d4..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddClasses.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.model; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.TYPE; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -/** - * Annotation to add one or more classes that contain intent callbacks. This annotation should be applied to the main - * model class. When found the internal intent detection algorithm will scan these additional classes searching - * for intent callbacks. - * <p> - * Additionally with {@link NCModelAddPackage} annotation, these two annotations allowing to have model implementation, - * i.e. intent callbacks, in external classes not linked through sub-type relationship to the main model class. This - * approach provides greater modularity, isolated testability and overall coding efficiencies for the larger models - * <p> - * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html#binding">Intent Matching</a> section and review - * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. - * - * @see NCModelAddPackage - * @see NCIntentRef - * @see NCIntentTerm - * @see NCIntentSample - * @see NCIntentSampleRef - * @see NCIntentSkip - * @see NCIntentMatch - */ -@Retention(value=RUNTIME) -@Target(value=TYPE) -public @interface NCModelAddClasses { - /** - * Array of class instances to additionally scan for intent callbacks. - * - * @return Array of class instances to additionally scan for intent callbacks. - */ - Class<?>[] value(); -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddPackage.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddPackage.java deleted file mode 100644 index 41ca9dc..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelAddPackage.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.model; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.TYPE; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -/** - * Annotation to add one or more JVM packages that contain classes with intent callbacks. This annotation should be - * applied to the main model class. When found the internal intent detection algorithm will recursively scan these - * additional packages and their classes searching for intent callbacks. - * <p> - * Additionally with {@link NCModelAddClasses} annotation, these two annotations allowing to have model implementation, - * i.e. intent callbacks, in external classes not linked through sub-type relationship to the main model class. This - * approach provides greater modularity, isolated testability and overall coding efficiencies for the larger models - * <p> - * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html#binding">Intent Matching</a> section and review - * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>. - * - * @see NCModelAddClasses - * @see NCIntentRef - * @see NCIntentTerm - * @see NCIntentSample - * @see NCIntentSampleRef - * @see NCIntentSkip - * @see NCIntentMatch - */ -@Retention(value=RUNTIME) -@Target(value=TYPE) -public @interface NCModelAddPackage { - /** - * Array of JVM package names to recursively scan for intent callbacks. - * - * @return Array of JVM package names to recursively scan for intent callbacks. - */ - String[] value(); -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java new file mode 100644 index 0000000..f846c19 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValue.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.io.Serializable; +import java.util.List; + +/** + * Model element's value. + * <p> + * Each model element can generally be recognized either by one of its synonyms or values. Elements and their values + * are analogous to types and instances of that type in programming languages. Each value + * has a name and optional set of its own synonyms by which that value, and ultimately its element, can be + * recognized by. Note that value name itself acts as an implicit synonym even when no additional synonyms added + * for that value. + * + * @see NCElement#getValues() + */ +public interface NCValue { + /** + * Gets value name. + * + * @return Value name. + */ + String getName(); + + /** + * Gets optional list of value's synonyms. + * + * @return Potentially empty list of value's synonyms. + */ + List<String> getSynonyms(); +} \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java new file mode 100644 index 0000000..ed4bcdd --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCValueLoader.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model; + +import java.util.Set; + +/** + * Dynamic value loader that can be used by model elements to dynamically load or create their values. Note that + * the primary use case for this interface is the dynamic value loading for the models defines in JSON/YAML + * presentation. However, it's not technically limited to that use case only, and this interface can be + * set programmatically when model elements are created programmatically too. + * <p> + * <b>JSON</b> + * <br> + * When using JSON/YAML model presentation element values can be defined statically. However, in some + * cases, it is required to load these values from external sources like database or REST services while + * keeping the rest of the model declaration static (i.e. in JSON/YAML). To accomplish this you can + * define <code>valueLoader</code> property and provide a fully qualified class name that implements + * this interface. During the model instantiation an instance of that class will be created once per + * each model and class of loader and method {@link #load(NCElement)} will be called to load + * element's values. Note that you can use both statically defined values (i.e. <code>values</code> property) + * and dynamically loaded values together and they will be merged: + * <pre class="brush: js, highlight: [11]"> + * "elements": [ + * { + * "id": "my:id", + * "description": "My description.", + * "values": [ + * { + * "name": "name1", + * "synonyms": ["syn1", "syn2"] + * } + * ], + * "valueLoader": "my.package.MyLoader" + * } + * ] + * </pre> + */ +public interface NCValueLoader { + /** + * Loads values for given model element. + * + * @param owner Model element to which this value loader belongs to. + * @return Set of values, potentially empty but never {@code null}. + */ + Set<NCValue> load(NCElement owner); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java similarity index 68% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java index 7fb367c..57d9d07 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerToken.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,9 +15,17 @@ * limitations under the License. */ -package org.apache.nlpcraft +package org.apache.nlpcraft.model.nlp; -/** - * - */ -object NCStart extends App +import org.apache.nlpcraft.model.NCMetadata; + +import java.util.*; + +// NCNlpNerTokensParser parsing result. +public interface NCNlpNerToken extends NCMetadata { + String getId(); + + List<NCNlpRichWord> getWords(); + + Map<String, Object> getMetadata(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java similarity index 55% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala copy to nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java index 7fb367c..f065cdc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpNerTokensParser.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,9 +15,21 @@ * limitations under the License. */ -package org.apache.nlpcraft +package org.apache.nlpcraft.model.nlp; + +import org.apache.nlpcraft.model.NCModel; +import org.apache.nlpcraft.model.NCRequest; + +import java.util.*; /** - * - */ -object NCStart extends App + * OnenNlp implementation - provided (DATE etc) + * Stanford implementation - separated module. + * User implementations can be provided too. + * + * Order of configured NCNlpNerTokensParser elements is important. + * Only one parsers iteration called. + */ +public interface NCNlpNerTokensParser { + List<NCNlpNerToken> parse(NCRequest req, NCModel mdl, List<NCNlpRichWord> words, List<NCNlpNerToken> elements); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java similarity index 64% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java index 7fb367c..1ccd742 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCStart.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpRichWord.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,9 +15,22 @@ * limitations under the License. */ -package org.apache.nlpcraft +package org.apache.nlpcraft.model.nlp; + +import org.apache.nlpcraft.model.nlp.NCNlpWord; /** - * - */ -object NCStart extends App + * Extended word data, enriched by NLP. + * It is argument for NCNlpNerTokensParser. + */ +public interface NCNlpRichWord extends NCNlpWord { + boolean isStopWord(); + + boolean isBracketed(); + + boolean isQuoted(); + + boolean isKnownWord(); + + boolean isSwearWord(); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java new file mode 100644 index 0000000..a63cab9 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpTextParser.java @@ -0,0 +1,17 @@ +package org.apache.nlpcraft.model.nlp; + +import org.apache.nlpcraft.model.NCRequest; + +import java.util.List; + +/** + * Initial request text parser. + */ +public interface NCNlpTextParser { + /** + * + * @param req Main parameter is request text, but also request can contain some hints in its request data. + * @return + */ + List<NCNlpWord> parse(NCRequest req); +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java new file mode 100644 index 0000000..511553a --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/nlp/NCNlpWord.java @@ -0,0 +1,13 @@ +package org.apache.nlpcraft.model.nlp; + +// Initial parsing result, look at NCNlpWordsParser. +public interface NCNlpWord { + String getWord(); + String getNormalWord(); + String getLemma(); + String getStem(); + String getPos(); + int getStart(); + int getEnd(); + int getLength(); +}
