This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new a41a1c0 WIP.
a41a1c0 is described below
commit a41a1c033c837771ca47723ab9a597eac7e79946
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Jan 1 19:08:53 2022 +0300
WIP.
---
.../org/apache/nlpcraft/NCEntityValidator.java | 34 ++++++++++
.../scala/org/apache/nlpcraft/NCModelConfig.java | 76 +++++-----------------
.../org/apache/nlpcraft/NCModelConfigAdapter.java | 49 ++++++++++++++
.../org/apache/nlpcraft/NCTokenValidator.java | 34 ++++++++++
.../org/apache/nlpcraft/NCVariantsFilter.java | 34 ++++++++++
.../apache/nlpcraft/nlp/util/NCTestConfig.scala | 4 ++
6 files changed, 170 insertions(+), 61 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java
new file mode 100644
index 0000000..cf64c39
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntityValidator.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft;
+
+import java.util.List;
+
+/**
+ *
+ */
+public interface NCEntityValidator extends NCLifecycle {
+ /**
+ * Checks all found entities and throws exceptions if necessary.
+ *
+ * @param req
+ * @param cfg
+ * @param toks
+ */
+ void validate(NCRequest req, NCModelConfig cfg, List<NCEntity> entities);
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
index 02e2883..78aa42e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfig.java
@@ -24,59 +24,53 @@ import java.util.*;
*/
public interface NCModelConfig extends NCPropertyMap {
/**
- * Default value for {@link #getMinTokens()} method.
- */
- int DFLT_MIN_TOKENS = 0;
-
- /**
- * Default value for {@link #getMaxTokens()} method.
- */
- int DFLT_MAX_TOKENS = 50;
-
- /**
- * Default value for {@link #getMinNonStopWords()} method.
+ *
+ * @return
*/
- int DFLT_MIN_NON_STOPWORDS = 0;
+ NCTokenizer getTokenizer();
/**
- * Default value for {@link #getMaxStopWords()} method.
+ *
+ * @return
*/
- int DFLT_MAX_STOPWORDS = 15;
+ List<NCTokenParser> getTokenParsers();
/**
- * Default value for {@link #isNotLatinCharsetAllowed()} method.
+ *
+ * @return
*/
- boolean DFLT_IS_NOT_LATIN_CHARSET_ALLOWED = false;
+ List<NCTokenEnricher> getTokenEnrichers();
/**
*
* @return
*/
- NCTokenizer getTokenizer();
+ List<NCEntityEnricher> getEntityEnrichers();
/**
*
* @return
*/
- List<NCTokenParser> getTokenParsers();
+ List<NCEntityParser> getEntityParsers();
/**
*
* @return
*/
- List<NCTokenEnricher> getTokenEnrichers();
+ List<NCTokenValidator> getTokenValidators();
/**
*
* @return
*/
- List<NCEntityEnricher> getEntityEnrichers();
+ List<NCEntityValidator> getEntityValidators();
/**
*
* @return
*/
- List<NCEntityParser> getEntityParsers();
+ List<NCVariantsFilter> getVariantsFilters();
+
/**
* Gets unique, <i>immutable</i> ID of this model.
@@ -118,44 +112,4 @@ public interface NCModelConfig extends NCPropertyMap {
default String getOrigin() {
return getClass().getCanonicalName();
}
-
- /**
- *
- * @return
- */
- default int getMinTokens() {
- return DFLT_MIN_TOKENS;
- }
-
- /**
- *
- * @return
- */
- default int getMaxTokens() {
- return DFLT_MAX_TOKENS;
- }
-
- /**
- *
- * @return
- */
- default int getMaxStopWords() {
- return DFLT_MAX_STOPWORDS;
- }
-
- /**
- *
- * @return
- */
- default int getMinNonStopWords() {
- return DFLT_MIN_NON_STOPWORDS;
- }
-
- /**
- *
- * @return
- */
- default boolean isNotLatinCharsetAllowed() {
- return DFLT_IS_NOT_LATIN_CHARSET_ALLOWED;
- }
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
index 9a51d83..1ffb978 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java
@@ -32,6 +32,10 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>();
private final List<NCEntityEnricher> entEnrichers = new ArrayList<>();
private final List<NCEntityParser> entParsers = new ArrayList<>();
+ private final List<NCTokenValidator> tokenValidators = new ArrayList<>();
+ private final List<NCEntityValidator> entityValidators = new ArrayList<>();
+ private final List<NCVariantsFilter> variantsFilters = new ArrayList<>();
+
/**
*
@@ -97,6 +101,36 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
entEnrichers.add(entEnricher);
}
+ /**
+ *
+ * @param entParser
+ */
+ public void addEntityParser(NCTokenValidator tokValidator) {
+ Objects.requireNonNull(tokValidator, "Token validator cannot be
null.");
+
+ tokenValidators.add(tokValidator);
+ }
+
+ /**
+ *
+ * @param entValidator
+ */
+ public void addEntityParser(NCEntityValidator entValidator) {
+ Objects.requireNonNull(entValidator, "Entity validator cannot be
null.");
+
+ entityValidators.add(entValidator);
+ }
+
+ /**
+ *
+ * @param variantFilter
+ */
+ public void addVariantFilter(NCVariantsFilter variantFilter) {
+ Objects.requireNonNull(variantFilter, "Variant filter cannot be
null.");
+
+ variantsFilters.add(variantFilter);
+ }
+
@Override
public String getId() {
return id;
@@ -136,4 +170,19 @@ public class NCModelConfigAdapter extends
NCPropertyMapAdapter implements NCMode
public NCTokenizer getTokenizer() {
return tokenizer;
}
+
+ @Override
+ public List<NCTokenValidator> getTokenValidators() {
+ return tokenValidators;
+ }
+
+ @Override
+ public List<NCEntityValidator> getEntityValidators() {
+ return entityValidators;
+ }
+
+ @Override
+ public List<NCVariantsFilter> getVariantsFilters() {
+ return variantsFilters;
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java
new file mode 100644
index 0000000..553959d
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCTokenValidator.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft;
+
+import java.util.List;
+
+/**
+ *
+ */
+public interface NCTokenValidator extends NCLifecycle {
+ /**
+ * Checks parsed tokens and throws exceptions if necessary.
+ *
+ * @param req
+ * @param cfg
+ * @param toks
+ */
+ void validate(NCRequest req, NCModelConfig cfg, List<NCToken> toks);
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java
new file mode 100644
index 0000000..09ec82c
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantsFilter.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft;
+
+import java.util.List;
+
+/**
+ *
+ */
+public interface NCVariantsFilter extends NCLifecycle {
+ /**
+ * Filters all found entities variants.
+ *
+ * @param req
+ * @param cfg
+ * @param toks
+ */
+ List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg,
List<List<NCEntity>> variants);
+}
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index 53f9e71..bd1d1b1 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -39,6 +39,10 @@ object NCTestConfig:
override def getTokenEnrichers: util.List[NCTokenEnricher] = new
util.ArrayList[NCTokenEnricher]()
override def getEntityEnrichers: util.List[NCEntityEnricher] = new
util.ArrayList[NCEntityEnricher]()
override def getEntityParsers: util.List[NCEntityParser] = new
util.ArrayList[NCEntityParser]()
+ override def getTokenValidators: util.List[NCTokenValidator] = new
util.ArrayList[NCTokenValidator]()
+ override def getEntityValidators: util.List[NCEntityValidator] = new
util.ArrayList[NCEntityValidator]()
+ override def getVariantsFilters: util.List[NCVariantsFilter] = new
util.ArrayList[NCVariantsFilter]()
+
override def getId: String = "test"
override def getName: String = "test"
override def getVersion: String = "1.0"