[incubator-nlpcraft] branch master updated: WIP

aradzinski Wed, 06 Oct 2021 13:54:12 -0700

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/master by this push:
     new 03626dd  WIP
03626dd is described below

commit 03626dd56f81244c6702ea542162b40c351e4cf4
Author: Aaron Radzinski <[email protected]>
AuthorDate: Wed Oct 6 13:54:01 2021 -0700

    WIP
---
 nlpcraft/pom.xml                                   |   5 +
 .../org/apache/nlpcraft/common/util/NCUtils.scala  |  14 +
 .../org/apache/nlpcraft/model/NCMetadata.java      | 120 +++++++++
 .../scala/org/apache/nlpcraft/model/NCModel.java   |  25 ++
 .../org/apache/nlpcraft/model/NCRejection.java     |  49 ++++
 .../scala/org/apache/nlpcraft/model/NCResult.java  | 257 ++++++++++++++++++
 .../scala/org/apache/nlpcraft/model/NCToken.java   | 294 +++++++++++++++++++++
 .../nlpcraft/model/impl/NCMetadataAdapter.java     |  50 ++++
 pom.xml                                            |   7 +
 9 files changed, 821 insertions(+)

diff --git a/nlpcraft/pom.xml b/nlpcraft/pom.xml
index 94a9ea2..a01bc70 100644
--- a/nlpcraft/pom.xml
+++ b/nlpcraft/pom.xml
@@ -89,6 +89,11 @@
             <artifactId>antlr4-runtime</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+        </dependency>
+
         <!--
          JUnit & ScalaTest dependencies.
          ===============================
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 4cc3469..5838a2d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.nlpcraft.common.util
 
+import com.google.gson.GsonBuilder
 import com.typesafe.scalalogging.*
 import org.apache.nlpcraft.common.NCException
 import org.apache.nlpcraft.common.ansi.NCAnsi.*
@@ -36,6 +37,7 @@ object NCUtils extends LazyLogging:
     final val NL = System getProperty "line.separator"
     private val RND = new Random()
     private val sysProps = new SystemProperties
+    private final lazy val GSON = new 
GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create()
     private final val ANSI_SEQ = Pattern.compile("\u001B\\[[?;\\d]*[a-zA-Z]")
     private val ANSI_FG_8BIT_COLORS = for (i <- 16 to 255) yield ansi256Fg(i)
     private val ANSI_BG_8BIT_COLORS = for (i <- 16 to 255) yield ansi256Bg(i)
@@ -75,6 +77,18 @@ object NCUtils extends LazyLogging:
     def isSysEnvSet(s: String): Boolean = sysProps.get(s).nonEmpty || 
sys.env.contains(s)
 
     /**
+      * Creates object from JSON string.
+      *
+      * @param js JSON string.
+      */
+    @throws[NCException]
+    def jsonToObject(js: String): AnyRef =
+        try
+            GSON.fromJson(js, classOf[Object])
+        catch
+            case e: Exception => throw new NCException(s"Failed to convert 
JSON string to map: $js", e)
+
+    /**
       * Returns `true` if given system property, or environment variable is 
provided and has value
       * 'true'. In all other cases returns `false`.
       *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java
new file mode 100644
index 0000000..91c10f4
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMetadata.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import org.apache.nlpcraft.common.*;
+import java.util.*;
+
+/**
+ * Provides support for mutable runtime-only metadata.
+ * <p>
+ * Read full documentation in <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>Data Model</a> section and 
review
+ * <a target=_ 
href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples";>examples</a>.
+ */
+public interface NCMetadata {
+    /**
+     * Factory for creating metadata out of standard map.
+     *
+     * @param map Map to convert to metadata.
+     * @return Newly created metadata container.
+     */
+    static NCMetadata apply(Map<String, Object> map) {
+        return () -> map;
+    }
+
+    /**
+     * Gets mutable metadata underlying container.
+     * Returned map can be used to mutate the metadata or perform any other 
operations.
+     *
+     * @return Mutable, underlying metadata container.
+     * @see #meta(String)
+     * @see #metaOpt(String)
+     * @see #meta(String, Object)
+     */
+    Map<String, Object> getMetadata();
+
+    /**
+     * Shortcut method to get given optional metadata property. Equivalent to:
+     * <pre class="brush: java">
+     *      Optional.ofNullable((T)getMetadata().get(prop));
+     * </pre>
+     *
+     * @param prop Metadata property name.
+     * @param <T> Type of the metadata property.
+     * @return Metadata optional property value.
+     */
+    @SuppressWarnings("unchecked")
+    default <T> Optional<T> metaOpt(String prop) {
+        return Optional.ofNullable((T)getMetadata().get(prop));
+    }
+
+    /**
+     * Shortcut method to get given metadata property. Equivalent to:
+     * <pre class="brush: java">
+     *      (T)getMetadata().get(prop);
+     * </pre>
+     *
+     * @param prop Metadata property name.
+     * @param <T> Type of the metadata property.
+     * @return Metadata property value or {@code null} if given metadata 
property not found.
+     */
+    @SuppressWarnings("unchecked")
+    default <T> T meta(String prop) {
+        return (T)getMetadata().get(prop);
+    }
+
+    /**
+     * Shortcut method to get given mandatory metadata property. Equivalent to:
+     * <pre class="brush: java">
+     *     T t = (T)getMetadata().get(prop);
+     *     if (t == null)
+     *         throw new NCException("Mandatory metadata property not found: " 
+ prop);
+     *     else
+     *         return t;
+     * </pre>
+     *
+     * @param prop Metadata property name.
+     * @param <T> Type of the metadata property.
+     * @return Metadata property value or throws an exception if given 
metadata property not found.
+     * @throws NCException Thrown if given metadata property not found.
+     */
+    default <T> T metax(String prop) throws NCException {
+        T t = meta(prop);
+
+        if (t == null)
+            throw new NCException("Mandatory metadata property not found: " + 
prop);
+        else
+            return t;
+    }
+
+    /**
+     * Shortcut method to get given metadata property. Equivalent to:
+     * <pre class="brush: java">
+     *      getMetadata().get(tokId, prop, dflt);
+     * </pre>
+     *
+     * @param prop Metadata property name.
+     * @param dflt Default value to return if specified one isn't set.
+     * @param <T> Type of the metadata property.
+     * @return Metadata property value or default value if one isn't set.
+     */
+    @SuppressWarnings("unchecked")
+    default <T> T meta(String prop, T dflt) {
+        return (T)getMetadata().getOrDefault(prop, dflt);
+    }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java
new file mode 100644
index 0000000..45a0a06
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModel.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+/**
+ *
+ */
+public interface NCModel {
+    // TODO
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java
new file mode 100644
index 0000000..6ae83b3
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCRejection.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import org.apache.nlpcraft.common.*;
+
+/**
+ * Exception to indicate that user input cannot be processed as is. This 
exception can be thrown from
+ * intent callbacks.
+ * <p>
+ * This exception typically indicates that user has not provided enough 
information in the input string
+ * to have it processed automatically. In most cases this means that the 
user's input is either too short
+ * or too simple, too long or too complex, missing required context, or 
unrelated to requested data model.
+ */
+public class NCRejection extends NCException {
+    /**
+     * Creates new rejection exception with given message.
+     *
+     * @param msg Rejection message.
+     */
+    public NCRejection(String msg) {
+        super(msg);
+    }
+
+    /**
+     * Creates new rejection exception with given message and cause.
+     *
+     * @param msg Rejection message.
+     * @param cause Cause of this exception.
+     */
+    public NCRejection(String msg, Throwable cause) {
+        super(msg, cause);
+    }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java
new file mode 100644
index 0000000..5ff6681
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCResult.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import org.apache.nlpcraft.common.NCException;
+import org.apache.nlpcraft.common.util.*;
+import org.apache.nlpcraft.model.impl.NCMetadataAdapter;
+
+import java.io.Serializable;
+import java.util.Collection;
+
+/**
+ * Data model result returned from model intent callbacks. Result consists of 
the
+ * text body and the type. The type is similar in notion to MIME types.
+ * <table class="dl-table">
+ *     <caption>Supported result types:</caption>
+ *     <tr>
+ *         <th>Result Type</th>
+ *         <th>Factory Method</th>
+ *     </tr>
+ *     <tr>
+ *         <td><code>text</code></td>
+ *         <td>{@link #text(String)}</td>
+ *     </tr>
+ *     <tr>
+ *         <td><code>html</code></td>
+ *         <td>{@link #html(String)}</td>
+ *     </tr>
+ *     <tr>
+ *         <td><code>json</code></td>
+ *         <td>{@link #json(String)}</td>
+ *     </tr>
+ *     <tr>
+ *         <td><code>yaml</code></td>
+ *         <td>{@link #yaml(String)}</td>
+ *     </tr>
+ *     <tr>
+ *         <td><code>confirm</code></td>
+ *         <td>{@link #confirm(String)}</td>
+ *     </tr>
+ * </table>
+ * Note that all of these types have specific meaning for client applications 
that interpret them
+ * accordingly. For example, the REST client interfacing between NLPCraft and 
Amazon Alexa or Apple HomeKit can only
+ * accept {@code text} result type and ignore everything else.
+ */
+public class NCResult extends NCMetadataAdapter implements Serializable, 
NCMetadata {
+    /** Data Model result text. */
+    private String body;
+
+    /** Data Model result type. One of text, html, json or yaml. */
+    private String type;
+
+    /** Sequence of tokens represents a fully parsed (see {@link 
NCContext#getVariants()} method) user input. */
+    private Collection<NCToken> tokens;
+
+    /** ID of the intent. */
+    private String intentId;
+
+    /**
+     * Creates new result with given body and type.
+     *
+     * @param body Result body.
+     * @param type Result type.
+     * @throws IllegalArgumentException Thrown if type is invalid.
+     */
+    public NCResult(String body, String type) {
+        assert body != null;
+        assert type != null;
+
+        this.body = body;
+        this.type = checkType(type);
+    }
+
+    /**
+     * No-arg constructor.
+     */
+    public NCResult() {
+        // No-op.
+    }
+
+    /**
+     * Creates {@code text} result.
+     *
+     * @param txt Textual result. Text interpretation will be defined by the 
client receiving this result.
+     * @return Newly created query result.
+     */
+    public static NCResult text(String txt) {
+        return new NCResult(txt, "text");
+    }
+
+    /**
+     * Creates {@code html} result.
+     *
+     * @param html HTML markup.
+     * @return Newly created query result.
+     */
+    public static NCResult html(String html) {
+        return new NCResult(html, "html");
+    }
+
+    /**
+     * Creates {@code confirm} result.
+     *
+     * @param body Confirm result body.
+     * @return Newly created query result.
+     */
+    public static NCResult confirm(String body) {
+        return new NCResult(body, "confirm");
+    }
+
+    /**
+     * Creates {@code json} result. Note that this method will test given JSON 
string
+     * for validness by using <code>com.google.gson.Gson</code> JSON utility. 
If JSON string is invalid
+     * the {@link IllegalArgumentException} exception will be thrown.
+     *
+     * @param json Any JSON string to be rendered on the client.
+     * @return Newly created query result.
+     * @throws IllegalArgumentException Thrown if given JSON string is invalid.
+     */
+    public static NCResult json(String json) {
+        // Validation.
+        try {
+            NCUtils.jsonToObject(json);
+        }
+        catch (NCException e) {
+            throw new IllegalArgumentException(String.format("Invalid JSON 
value: %s.", json), e.getCause());
+        }
+
+        return new NCResult(json, "json");
+    }
+
+    /**
+     * Creates {@code yaml} result.
+     *
+     * @param yaml Any YAML string to be rendered on the client.
+     * @return Newly created query result.
+     */
+    public static NCResult yaml(String yaml) {
+        return new NCResult(yaml, "yaml");
+    }
+
+    /**
+     *
+     * @param type Type to check.
+     * @throws IllegalArgumentException Thrown if type is invalid.
+     */
+    private String checkType(String type) {
+        String typeLc = type.toLowerCase();
+
+        if (!typeLc.equals("html") &&
+            !typeLc.equals("json") &&
+            !typeLc.equals("yaml") &&
+            !typeLc.equals("confirm") &&
+            !typeLc.equals("text"))
+            throw new IllegalArgumentException("Invalid result type: " + type);
+        else
+            return typeLc;
+    }
+
+    /**
+     * Sets result body.
+     *
+     * @param body Result body.
+     */
+    public void setBody(String body) {
+        this.body = body;
+    }
+
+    /**
+     * Set result type.
+     *
+     * @param type Result type.
+     * @throws IllegalArgumentException Thrown if type is invalid.
+     */
+    public void setType(String type) {
+        this.type = checkType(type);
+    }
+
+    /**
+     * Gets tokens that were used to produce this query result. Note that the
+     * returned tokens can come from the current request as well as from the 
conversation (i.e. from
+     * previous requests). Order of tokens is not important.
+     *
+     * @return Gets tokens that were used to produce this query result.
+     * @see #setTokens(Collection)
+     */
+    public Collection<NCToken> getTokens() {
+        return tokens;
+    }
+
+    /**
+     * Sets a collection of tokens that was used to produce this query result. 
Note that the
+     * returned tokens can come from the current request as well as from the 
conversation (i.e. from
+     * previous requests). Order of tokens is not important.
+     * <p>
+     * Providing these tokens is necessary for proper STM operation. If 
conversational support isn't used
+     * setting these tokens is not required. Note that built-in intent based 
matched automatically sets
+     * these tokens.
+     *
+     * @param tokens Collection of tokens that was used to produce this query 
result.
+     * @see #getTokens()
+     */
+    public void setTokens(Collection<NCToken> tokens) {
+        this.tokens = tokens;
+    }
+
+    /**
+     * Gets result type.
+     *
+     * @return Result type.
+     */
+    public String getType() {
+        return type;
+    }
+
+    /**
+     * Gets result body.
+     *
+     * @return Result body.
+     */
+    public String getBody() {
+        return body;
+    }
+
+    /**
+     * Get optional intent ID.
+     *
+     * @return Intent ID or {@code null} if intent ID was not available.
+     */
+    public String getIntentId() {
+        return intentId;
+    }
+
+    /**
+     * Sets optional intent ID.
+     *
+     * @param intentId Intent ID to set for this result.
+     */
+    public void setIntentId(String intentId) {
+        this.intentId = intentId;
+    }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
new file mode 100644
index 0000000..b815511
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model;
+
+import java.util.List;
+
+/**
+ * Detected model element.
+ * <p>
+ * Read full documentation in <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>Data Model</a> section and 
review
+ * <a target=_ 
href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples";>examples</a>.
+ *
+ * @see NCElement
+ */
+public interface NCToken extends NCMetadata {
+    /**
+     * Gets reference to the model this token belongs to.
+     *
+     * @return Model reference.
+     */
+    NCModel getModel();
+
+    /**
+     * Gets ID of the server request this token is part of.
+     *
+     * @return ID of the server request this token is part of.
+     */
+    String getServerRequestId();
+
+    /**
+     * If this token represents user defined model element this method returns
+     * the ID of that element. Otherwise, it returns ID of the built-in system 
token.
+     * Note that a sentence can have multiple tokens with the same element ID. 
+     *
+     * @return ID of the element (system or user defined).
+     * @see NCElement#getId()
+     */
+    String getId();
+
+    /**
+     * Gets the optional parent ID of the model element this token represents. 
This only available
+     * for user-defined model elements - built-in tokens do not have parents 
and this will return {@code null}.
+     *
+     * @return ID of the token's element immediate parent or {@code null} if 
not available.
+     * @see NCElement#getParentId()
+     * @see #getAncestors()
+     */
+    String getParentId();
+
+    /**
+     * Gets the list of all parent IDs from this token up to the root. This 
only available
+     * for user-defined model elements = built-in tokens do not have parents 
and will return an empty list.
+     *
+     * @return List, potentially empty but never {@code null}, of all parent 
IDs from this token up to the root.
+     * @see #getParentId()
+     */
+    List<String> getAncestors();
+
+    /**
+     * Tests whether this token is a child of given token ID. It is equivalent 
to:
+     * <pre class="brush: java">
+     *     return getAncestors().contains(tokId);
+     * </pre>
+     *
+     * @param tokId Ancestor token ID.
+     * @return <code>true</code> this token is a child of given token ID, 
<code>false</code> otherwise.
+     */
+    default boolean isChildOf(String tokId) {
+        return getAncestors().contains(tokId);
+    }
+
+    /**
+     * Gets the value if this token was detected via element's value (or its 
synonyms). Otherwise,
+     * returns {@code null}. Only applicable for user-defined model elements - 
built-in tokens
+     * do not have values, and it will return {@code null}.
+     *
+     * @return Value for the user-defined model element or {@code null}, if 
not available.
+     * @see NCElement#getValues()
+     */
+    String getValue();
+
+    /**
+     * Gets the list of groups this token belongs to. Note that, by default, 
if not specified explicitly,
+     * token always belongs to one group with ID equal to token ID.
+     *
+     * @return Token groups list. Never {@code null} - but can be empty.
+     * @see NCElement#getGroups()
+     */
+    List<String> getGroups();
+
+    /**
+     * Tests whether this token belongs to the given group. It is equivalent 
to:
+     * <pre class="brush: java">
+     *      return getGroups().contains(grp);
+     * </pre>
+     *
+     * @param grp Group to test.
+     * @return <code>True</code> if this token belongs to the group 
<code>grp</code>, {@code false} otherwise.
+     */
+    default boolean isMemberOf(String grp) {
+        return getGroups().contains(grp);
+    }
+
+    /**
+     * Gets start character index of this token in the original text.
+     *
+     * @return Start character index of this token.
+     */
+    int getStartCharIndex();
+
+    /**
+     * Gets end character index of this token in the original text.
+     *
+     * @return End character index of this token.
+     */
+    int getEndCharIndex();
+
+    /**
+     * A shortcut method checking whether this token is a stopword. Stopwords 
are some extremely common
+     * words which add little value in helping to understand user input and 
are excluded from the
+     * processing entirely. For example, words like a, the, can, of, about, 
over, etc. are
+     * typical stopwords in English. NLPCraft has built-in set of stopwords.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:stopword");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Whether this token is a stopword.
+     */
+    default boolean isStopWord() {
+        return meta("nlpcraft:nlp:stopword");
+    }
+
+    /**
+     * A shortcut method checking whether this token represents a free word. A 
free word is a
+     * token that was detected neither as a part of user defined nor system 
tokens.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:freeword");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Whether this token is a freeword.
+     */
+    default boolean isFreeWord() {
+        return meta("nlpcraft:nlp:freeword");
+    }
+
+    /**
+     * A shortcut method that gets original user input text for this token.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:origtext");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Original user input text for this token.
+     */
+    default String getOriginalText() {
+        return meta("nlpcraft:nlp:origtext");
+    }
+
+    /**
+     * A shortcut method that gets index of this token in the sentence.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:index");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Index of this token in the sentence.
+     */
+    default int getIndex() {
+        return meta("nlpcraft:nlp:index");
+    }
+
+    /**
+     * A shortcut method that gets normalized user input text for this token.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:normtext");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Normalized user input text for this token.
+     */
+    default String getNormalizedText() {
+        return meta("nlpcraft:nlp:normtext");
+    }
+
+    /**
+     * A shortcut method on whether this token is a swear word. NLPCraft has 
built-in list of
+     * common English swear words.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:swear");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Whether this token is a swear word.
+     */
+    default boolean isSwear() {
+        return meta("nlpcraft:nlp:swear");
+    }
+
+    /**
+     * A shortcut method to get lemma of this token, i.e. a canonical form of 
this word. Note that
+     * stemming and lemmatization allow reducing inflectional forms and 
sometimes derivationally related
+     * forms of a word to a common base form. Lemmatization refers to the use 
of a vocabulary and
+     * morphological analysis of words, normally aiming to remove inflectional 
endings only and to
+     * return the base or dictionary form of a word, which is known as the 
lemma.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:lemma");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Lemma of this token, i.e. a canonical form of this word.
+     */
+    default String getLemma() {
+        return meta("nlpcraft:nlp:lemma");
+    }
+
+    /**
+     * A shortcut method to get stem of this token. Note that stemming and 
lemmatization allow to reduce
+     * inflectional forms and sometimes derivationally related forms of a word 
to a common base form.
+     * Unlike lemma, stemming is a basic heuristic process that chops off the 
ends of words in the
+     * hope of achieving this goal correctly most of the time, and often 
includes the removal of derivational affixes.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:stem");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Stem of this token.
+     */
+    default String getStem() {
+        return meta("nlpcraft:nlp:stem");
+    }
+
+    /**
+     * A shortcut method to get Penn Treebank POS tag for this token. Note 
that additionally to standard Penn
+     * Treebank POS tags NLPCraft introduced '---' synthetic tag to indicate a 
POS tag for multiword tokens.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:pos");
+     * </pre>
+     * See more information on token metadata <a target=_ 
href="https://nlpcraft.apache.org/data-model.html";>here</a>.
+     *
+     * @return Penn Treebank POS tag for this token.
+     */
+    default String getPos() {
+        return meta("nlpcraft:nlp:pos");
+    }
+
+    /**
+     * A shortcut method that gets internal globally unique system ID of the 
token.
+     * <p>
+     * This method is equivalent to:
+     * <pre class="brush: java">
+     *     return meta("nlpcraft:nlp:unid");
+     * </pre>
+     *
+     * @return Internal globally unique system ID of the token.
+     */
+    default String getUnid() {
+        return meta("nlpcraft:nlp:unid");
+    }    
+}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCMetadataAdapter.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCMetadataAdapter.java
new file mode 100644
index 0000000..b3b9d09
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCMetadataAdapter.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.impl;
+
+import org.apache.nlpcraft.model.NCMetadata;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Abstract adapter for metadata.
+ */
+public abstract class NCMetadataAdapter implements NCMetadata {
+    final private Map<String, Object> meta;
+
+    /**
+     *
+     */
+    protected NCMetadataAdapter() {
+        meta = new HashMap<>();
+    }
+
+    /**
+     *
+     * @param meta Metadata container to use.
+     */
+    protected NCMetadataAdapter(Map<String, Object> meta) {
+        this.meta = meta;
+    }
+
+    @Override
+    public Map<String, Object> getMetadata() {
+        return meta;
+    }
+}
diff --git a/pom.xml b/pom.xml
index 9f93030..6210045 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,6 +102,7 @@
         <scala3.ref.ver>1.0.0</scala3.ref.ver>
         <junit.ver>5.8.1</junit.ver>
         <scalatest.ver>3.2.9</scalatest.ver>
+        <gson.ver>2.8.5</gson.ver>
 
         <!-- Force specific encoding on text resources. -->
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -154,6 +155,12 @@
             -->
 
             <dependency>
+                <groupId>com.google.code.gson</groupId>
+                <artifactId>gson</artifactId>
+                <version>${gson.ver}</version>
+            </dependency>
+
+            <dependency>
                 <groupId>org.antlr</groupId>
                 <artifactId>antlr4-runtime</artifactId>
                 <version>${org.antlr4.ver}</version>

[incubator-nlpcraft] branch master updated: WIP

Reply via email to