This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 61d5982b4eda9ec9f3622896151c229da345a36e Author: Aaron Radzinski <[email protected]> AuthorDate: Mon Mar 14 14:32:27 2022 -0700 Refactoring. --- .../examples/lightswitch/LightSwitchRuModel.scala | 5 +- .../entity/parser/NCRuSemanticEntityParser.scala | 1 + .../lightswitch/LightSwitchGroovyModel.groovy | 1 - .../examples/lightswitch/LightSwitchJavaModel.java | 1 - .../examples/lightswitch/LightSwitchKotlinModel.kt | 1 - .../lightswitch/LightSwitchScalaModel.scala | 5 +- .../apache/nlpcraft/examples/time/TimeModel.java | 1 - .../apache/nlpcraft/NCModelPipelineBuilder.java | 24 +++++----- .../nlpcraft/internal/util/NCResourceReader.scala | 25 +++++----- .../entity/parser/NCEnSemanticEntityParser.java | 53 ---------------------- .../NCSemanticEntityParserImpl.scala | 12 ++--- .../{ => semantic}/NCSemanticSourceReader.scala | 7 +-- .../impl/{ => semantic}/NCSemanticSynonym.scala | 5 +- .../NCSemanticSynonymsProcessor.scala | 7 +-- .../parser/{ => semantic}/NCSemanticElement.java | 2 +- .../{ => semantic}/NCSemanticEntityParser.java | 4 +- .../parser/{ => semantic}/NCSemanticStemmer.java | 2 +- .../enricher/NCENOpenNlpLemmaPosTokenEnricher.java | 40 ---------------- ...nricher.java => NCEnBracketsTokenEnricher.java} | 6 +-- ...icher.java => NCEnDictionaryTokenEnricher.java} | 6 +-- ...nEnricher.java => NCEnQuotesTokenEnricher.java} | 6 +-- ...richer.java => NCEnStopWordsTokenEnricher.java} | 14 +++--- ...er.java => NCOpenNLPLemmaPosTokenEnricher.java} | 10 ++-- .../N\320\241EnSwearWordsTokenEnricher.java" | 10 ++-- ...l.scala => NCEnBracketsTokenEnricherImpl.scala} | 2 +- ...scala => NCEnDictionaryTokenEnricherImpl.scala} | 2 +- ...mpl.scala => NCEnQuotesTokenEnricherImpl.scala} | 2 +- ...Generator.scala => NCEnStopWordGenerator.scala} | 2 +- ....scala => NCEnStopWordsTokenEnricherImpl.scala} | 8 ++-- ...scala => NCEnSwearWordsTokenEnricherImpl.scala} | 2 +- ...la => NCOpenNLPLemmaPosTokenEnricherImpl.scala} | 2 +- .../internal/impl/NCModelCallbacksSpec.scala | 2 +- .../nlpcraft/internal/impl/NCModelClientSpec.scala | 3 +- .../internal/impl/NCModelPingPongSpec.scala | 2 +- .../internal/impl/NCModelPipelineManagerSpec.scala | 2 +- .../nlpcraft/nlp/NCENDefaultPipelineSpec.scala | 3 +- .../semantic/NCSemanticEntityParserSpec.scala | 4 +- .../enricher/en/NCBracketsTokenEnricherSpec.scala | 2 +- .../en/NCDictionaryTokenEnricherSpec.scala | 4 +- .../enricher/en/NCQuotesTokenEnricherSpec.scala | 4 +- .../enricher/en/NCStopWordsEnricherSpec.scala | 8 ++-- .../en/NCSwearWordsTokenEnricherSpec.scala | 4 +- .../enricher/en/impl/NCStopWordsImplSpec.scala | 4 +- .../parser/opennlp/NCOpenNLPTokenParserSpec.scala | 4 +- 44 files changed, 113 insertions(+), 201 deletions(-) diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala index 5350aee..ae3d2c2 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala @@ -21,8 +21,9 @@ import org.apache.nlpcraft.* import org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.NCRuSemanticEntityParser import org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.{NCRuLemmaPosTokenEnricher, NCRuStopWordsTokenEnricher} import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser -import org.apache.nlpcraft.nlp.entity.parser.{NCNLPEntityParser, NCSemanticEntityParser} -import org.apache.nlpcraft.nlp.token.enricher.NCENStopWordsTokenEnricher +import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.token.enricher.NCEnStopWordsTokenEnricher import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import java.util diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala index 19ea166..ecdd725 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/NCRuSemanticEntityParser.scala @@ -20,6 +20,7 @@ package org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser import opennlp.tools.stemmer.snowball.SnowballStemmer import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser import org.apache.nlpcraft.nlp.entity.parser.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.{NCSemanticEntityParser, NCSemanticStemmer} /** * diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy index e6e4733..5336e87 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy @@ -18,7 +18,6 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser /** * This example provides very simple implementation for NLI-powered light switch. diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java index 68d94a9..ea1361a 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java @@ -18,7 +18,6 @@ package org.apache.nlpcraft.examples.lightswitch; import org.apache.nlpcraft.*; -import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser; import java.util.List; import java.util.stream.Collectors; diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt index aeef339..c115306 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt @@ -19,7 +19,6 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser import java.util.* import java.util.stream.Collectors diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala index a3e0f1f..fdbbc0e 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala @@ -20,8 +20,9 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCResourceReader import org.apache.nlpcraft.nlp.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCNLPEntityParser, NCSemanticEntityParser} -import org.apache.nlpcraft.nlp.token.enricher.NCENStopWordsTokenEnricher +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCNLPEntityParser} +import org.apache.nlpcraft.nlp.token.enricher.NCEnStopWordsTokenEnricher import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser /** diff --git a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java index 00358d1..e76e667 100644 --- a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java +++ b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java @@ -37,7 +37,6 @@ import org.apache.nlpcraft.examples.time.utils.cities.City; import org.apache.nlpcraft.examples.time.utils.cities.CityData; import org.apache.nlpcraft.examples.time.utils.keycdn.GeoData; import org.apache.nlpcraft.examples.time.utils.keycdn.GeoManager; -import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser; import java.time.ZoneId; import java.time.ZonedDateTime; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java index ec30a45..49af889 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java @@ -18,16 +18,14 @@ package org.apache.nlpcraft; import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.token.enricher.NCENBracketsTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.NCENDictionaryTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.NCENOpenNlpLemmaPosTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.NCENQuotesTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.NCENStopWordsTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.NСENSwearWordsTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.NCEnBracketsTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.NCEnDictionaryTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.NCEnQuotesTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.NCEnStopWordsTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.NСEnSwearWordsTokenEnricher; import org.apache.nlpcraft.nlp.token.parser.NCENOpenNLPTokenParser; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -199,12 +197,12 @@ public class NCModelPipelineBuilder { case "EN": tokParser = new NCENOpenNLPTokenParser(); - tokEnrichers.add(new NCENOpenNlpLemmaPosTokenEnricher()); - tokEnrichers.add(new NCENStopWordsTokenEnricher()); - tokEnrichers.add(new NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); - tokEnrichers.add(new NCENQuotesTokenEnricher()); - tokEnrichers.add(new NCENDictionaryTokenEnricher()); - tokEnrichers.add(new NCENBracketsTokenEnricher()); + //tokEnrichers.add(new NCEnOpenNLPLemmaPosTokenEnricher()); + //tokEnrichers.add(new NCEnStopWordsTokenEnricher()); + tokEnrichers.add(new NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); + tokEnrichers.add(new NCEnQuotesTokenEnricher()); + tokEnrichers.add(new NCEnDictionaryTokenEnricher()); + tokEnrichers.add(new NCEnBracketsTokenEnricher()); this.entParsers.addAll(entParsers); diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala index 5089958..2f285f4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala @@ -31,7 +31,7 @@ import scala.io.Source import scala.util.Using /** - * TODO: move it NCUtils? + * Caching resource reader for files that cannot be shipped with Apache release. */ object NCResourceReader extends LazyLogging: private final val DFLT_DIR = new File(System.getProperty("user.home"), ".nlpcraft/extcfg").getAbsolutePath @@ -47,10 +47,8 @@ object NCResourceReader extends LazyLogging: val normDir = if dir != null then dir else DFLT_DIR val f = new File(normDir) - if f.exists then - if !f.isDirectory then E(s"Invalid folder: $normDir") - else - if !f.mkdirs then E(s"Cannot create folder: $normDir") + if f.exists then if !f.isDirectory then E(s"Invalid folder: $normDir") + else if !f.mkdirs then E(s"Cannot create folder: $normDir") f @@ -92,7 +90,7 @@ object NCResourceReader extends LazyLogging: val nameLen = f.getName.length md5. - flatMap { (resPath, md5) => if path.endsWith(resPath) && resPath.length >= nameLen then Some(md5) else None }. + flatMap { (resPath, md5) => if path.endsWith(resPath) && resPath.length >= nameLen then Option(md5) else None }. to(LazyList). headOption. getOrElse(throw new NCException(s"MD5 data not found for: '$path'")) @@ -105,7 +103,6 @@ object NCResourceReader extends LazyLogging: */ private def isValid(f: File, md5: Map[String, String]): Boolean = val v1 = getMd5(f, md5) - val v2 = try Using.resource(Files.newInputStream(f.toPath)) { in => DigestUtils.md5Hex(in) } catch case e: IOException => throw new NCException(s"Failed to get MD5 for: '${f.getAbsolutePath}'", e) @@ -114,6 +111,14 @@ object NCResourceReader extends LazyLogging: /** * + * @param in + * @param dest + */ + private def copy(in: InputStream, dest: String): Unit = + Using.resource(new FileOutputStream(dest)) { out => IOUtils.copy(in, out) } + + /** + * * @param path * @param outFile * @param md5 @@ -121,12 +126,10 @@ object NCResourceReader extends LazyLogging: */ private def download(path: String, outFile: String, md5: Map[String, String]): File = mkDir(new File(outFile).getParent) - val url = s"$BASE_URL/$path" - try Using.resource(new BufferedInputStream(new URL(url).openStream())) { src => - Using.resource(new FileOutputStream(outFile)) { out => IOUtils.copy(src, out) } + copy(src, outFile) logger.info(s"One-time download for external config [url='$url', file='$outFile']") val f = new File(outFile) @@ -166,7 +169,7 @@ object NCResourceReader extends LazyLogging: if NCUtils.isResource(path) then getClass.getClassLoader.getResourceAsStream(path) match case in if in != null => - Using.resource(new FileOutputStream(f)) { out => IOUtils.copy(in, out) } + copy(in, f.getAbsolutePath) validateOrDownload(f) case _ => E(s"Resource not found: $path") // URL. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCEnSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCEnSemanticEntityParser.java deleted file mode 100644 index 4860677..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCEnSemanticEntityParser.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.nlp.entity.parser; - -import opennlp.tools.stemmer.PorterStemmer; -import org.apache.nlpcraft.NCTokenParser; -import org.apache.nlpcraft.nlp.token.parser.NCENOpenNLPTokenParser; - -import java.util.List; -import java.util.Map; - -/** - * - */ -public class NCEnSemanticEntityParser extends NCSemanticEntityParser { - private static final NCSemanticStemmer porterStemmer = new NCSemanticStemmer() { - private final PorterStemmer stemmerImpl = new PorterStemmer(); - - @Override - public synchronized String stem(String s) { - return stemmerImpl.stem(s.toLowerCase()); - } - }; - - private static final NCTokenParser opennlpParser = new NCENOpenNLPTokenParser(); - - public NCEnSemanticEntityParser(List<NCSemanticElement> elms) { - super(porterStemmer, opennlpParser, elms); - } - - public NCEnSemanticEntityParser(Map<String, String> macros, List<NCSemanticElement> elms) { - super(porterStemmer, opennlpParser, macros, elms); - } - - public NCEnSemanticEntityParser(String src) { - super(porterStemmer, opennlpParser, src); - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala index 2b4e35b..6294ed8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala @@ -15,16 +15,16 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.impl +package org.apache.nlpcraft.nlp.entity.parser.impl.semantic import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.makro.NCMacroParser import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticChunkKind.* -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticEntityParserImpl.combine -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticSourceType.* -import org.apache.nlpcraft.nlp.entity.parser.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticEntityParserImpl.combine +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer import java.io.* import java.util @@ -158,7 +158,7 @@ object NCSemanticEntityParserImpl: else if i >= data1.size then tmp else combine(data1, data2, i + 1, tmp.map(_ :+ data1(i)) ++ tmp.map(_ :+ data2(i))) -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticEntityParserImpl.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticEntityParserImpl.* /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSourceReader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSourceReader.scala similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSourceReader.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSourceReader.scala index 82d3b3e..fc4dcf4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSourceReader.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSourceReader.scala @@ -14,15 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.impl +package org.apache.nlpcraft.nlp.entity.parser.impl.semantic import com.fasterxml.jackson.core.JsonParser import com.fasterxml.jackson.databind.* import com.fasterxml.jackson.dataformat.yaml.* import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.NCSemanticElement -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticSourceType.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticSourceType.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.* import java.io.InputStream import java.util diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonym.scala similarity index 90% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonym.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonym.scala index 1c24188..420df4f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonym.scala @@ -14,10 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.impl +package org.apache.nlpcraft.nlp.entity.parser.impl.semantic import org.apache.nlpcraft.NCToken -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticChunkKind.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticChunkKind.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.* import java.util.regex.Pattern diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonymsProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala similarity index 97% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonymsProcessor.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala index 5e35ce0..2590ce4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/NCSemanticSynonymsProcessor.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.impl +package org.apache.nlpcraft.nlp.entity.parser.impl.semantic import com.fasterxml.jackson.databind.* import com.fasterxml.jackson.dataformat.yaml.* @@ -23,8 +23,9 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.makro.NCMacroParser import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticChunkKind.* -import org.apache.nlpcraft.nlp.entity.parser.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticChunkKind.* +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.* import java.io.InputStream import java.util diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticElement.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java index 393ad64..f8bdedb 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticElement.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser; +package org.apache.nlpcraft.nlp.entity.parser.semantic; import java.util.Collections; import java.util.Map; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java similarity index 95% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticEntityParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java index 59cfdf3..8203840 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser; +package org.apache.nlpcraft.nlp.entity.parser.semantic; import org.apache.nlpcraft.NCEntity; import org.apache.nlpcraft.NCEntityParser; @@ -24,7 +24,7 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenParser; -import org.apache.nlpcraft.nlp.entity.parser.impl.NCSemanticEntityParserImpl; +import org.apache.nlpcraft.nlp.entity.parser.impl.semantic.NCSemanticEntityParserImpl; import java.util.Collections; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticStemmer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java similarity index 94% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticStemmer.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java index 2b17cb0..279e4f4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/NCSemanticStemmer.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser; +package org.apache.nlpcraft.nlp.entity.parser.semantic; /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENOpenNlpLemmaPosTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENOpenNlpLemmaPosTokenEnricher.java deleted file mode 100644 index a3ad5c3..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENOpenNlpLemmaPosTokenEnricher.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.nlp.token.enricher; - -import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.token.enricher.NCOpenNlpLemmaPosTokenEnricher; - -/** - * TODO: enriches with <code>lemma</code> and <code>pos</code> properties. - * - * Models can be downloaded from the following resources: - * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin - * - lemmatizer: https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict - */ -public class NCENOpenNlpLemmaPosTokenEnricher extends NCOpenNlpLemmaPosTokenEnricher { - /** - * - */ - public NCENOpenNlpLemmaPosTokenEnricher() { - super( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ); - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENBracketsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnBracketsTokenEnricher.java similarity index 88% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENBracketsTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnBracketsTokenEnricher.java index 4db42c0..e76fc1d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENBracketsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnBracketsTokenEnricher.java @@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENBracketsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnBracketsTokenEnricherImpl; import java.util.List; /** * TODO: enriches with <code>brackets</code> property. */ -public class NCENBracketsTokenEnricher implements NCTokenEnricher { - private final NCENBracketsTokenEnricherImpl impl = new NCENBracketsTokenEnricherImpl(); +public class NCEnBracketsTokenEnricher implements NCTokenEnricher { + private final NCEnBracketsTokenEnricherImpl impl = new NCEnBracketsTokenEnricherImpl(); @Override public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENDictionaryTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnDictionaryTokenEnricher.java similarity index 86% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENDictionaryTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnDictionaryTokenEnricher.java index 12dc453..e75afdc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENDictionaryTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnDictionaryTokenEnricher.java @@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENDictionaryTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnDictionaryTokenEnricherImpl; import java.util.List; /** * TODO: enriches with <code>dict</code> property. */ -public class NCENDictionaryTokenEnricher implements NCTokenEnricher { - private final NCENDictionaryTokenEnricherImpl impl = new NCENDictionaryTokenEnricherImpl(); +public class NCEnDictionaryTokenEnricher implements NCTokenEnricher { + private final NCEnDictionaryTokenEnricherImpl impl = new NCEnDictionaryTokenEnricherImpl(); @Override public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENQuotesTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnQuotesTokenEnricher.java similarity index 88% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENQuotesTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnQuotesTokenEnricher.java index fdf0d0e..ac0aab8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENQuotesTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnQuotesTokenEnricher.java @@ -21,15 +21,15 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENQuotesTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnQuotesTokenEnricherImpl; import java.util.List; /** * TODO: enriches with <code>quoted</code> property. */ -public class NCENQuotesTokenEnricher implements NCTokenEnricher { - private final NCENQuotesTokenEnricherImpl impl = new NCENQuotesTokenEnricherImpl(); +public class NCEnQuotesTokenEnricher implements NCTokenEnricher { + private final NCEnQuotesTokenEnricherImpl impl = new NCEnQuotesTokenEnricherImpl(); @Override public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENStopWordsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnStopWordsTokenEnricher.java similarity index 78% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENStopWordsTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnStopWordsTokenEnricher.java index 2d7bc4e..395ade9 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCENStopWordsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCEnStopWordsTokenEnricher.java @@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENStopWordsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnStopWordsTokenEnricherImpl; import java.util.List; import java.util.Set; @@ -29,18 +29,18 @@ import java.util.Set; /** * TODO: enriches with <code>stopword</code> property. */ -public class NCENStopWordsTokenEnricher implements NCTokenEnricher { - private final NCENStopWordsTokenEnricherImpl impl; +public class NCEnStopWordsTokenEnricher implements NCTokenEnricher { + private final NCEnStopWordsTokenEnricherImpl impl; /** * */ - public NCENStopWordsTokenEnricher(Set<String> addSw, Set<String> exclSw) { - impl = new NCENStopWordsTokenEnricherImpl(addSw, exclSw); + public NCEnStopWordsTokenEnricher(Set<String> addSw, Set<String> exclSw) { + impl = new NCEnStopWordsTokenEnricherImpl(addSw, exclSw); } - public NCENStopWordsTokenEnricher() { - impl = new NCENStopWordsTokenEnricherImpl(null, null); + public NCEnStopWordsTokenEnricher() { + impl = new NCEnStopWordsTokenEnricherImpl(null, null); } @Override diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNlpLemmaPosTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNLPLemmaPosTokenEnricher.java similarity index 83% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNlpLemmaPosTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNLPLemmaPosTokenEnricher.java index fa44f93..6241a06 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNlpLemmaPosTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/NCOpenNLPLemmaPosTokenEnricher.java @@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCLemmaPosTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCOpenNLPLemmaPosTokenEnricherImpl; import java.util.List; @@ -32,14 +32,14 @@ import java.util.List; * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin * - lemmatizer: https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict */ -public class NCOpenNlpLemmaPosTokenEnricher implements NCTokenEnricher { - private final NCLemmaPosTokenEnricherImpl impl; +public class NCOpenNLPLemmaPosTokenEnricher implements NCTokenEnricher { + private final NCOpenNLPLemmaPosTokenEnricherImpl impl; /** * */ - public NCOpenNlpLemmaPosTokenEnricher(String posMdlSrc, String lemmaDicSrc) { - impl = new NCLemmaPosTokenEnricherImpl(posMdlSrc, lemmaDicSrc); + public NCOpenNLPLemmaPosTokenEnricher(String posMdlSrc, String lemmaDicSrc) { + impl = new NCOpenNLPLemmaPosTokenEnricherImpl(posMdlSrc, lemmaDicSrc); } @Override diff --git "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241ENSwearWordsTokenEnricher.java" "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241EnSwearWordsTokenEnricher.java" similarity index 85% rename from "nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241ENSwearWordsTokenEnricher.java" rename to "nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241EnSwearWordsTokenEnricher.java" index 59bc100..639d4c1 100644 --- "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241ENSwearWordsTokenEnricher.java" +++ "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/N\320\241EnSwearWordsTokenEnricher.java" @@ -21,7 +21,7 @@ import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENSwearWordsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnSwearWordsTokenEnricherImpl; import java.util.List; import java.util.Objects; @@ -29,18 +29,18 @@ import java.util.Objects; /** * TODO: enriches with <code>swear</code> property. */ -public class NСENSwearWordsTokenEnricher implements NCTokenEnricher { - private final NCENSwearWordsTokenEnricherImpl impl; +public class NСEnSwearWordsTokenEnricher implements NCTokenEnricher { + private final NCEnSwearWordsTokenEnricherImpl impl; /** * TODO: swear_words.txt - describe where it can be downloaded. * * @param mdlSrc */ - public NСENSwearWordsTokenEnricher(String mdlSrc) { + public NСEnSwearWordsTokenEnricher(String mdlSrc) { Objects.requireNonNull(mdlSrc, "Swear words model file cannot be null."); - impl = new NCENSwearWordsTokenEnricherImpl(mdlSrc); + impl = new NCEnSwearWordsTokenEnricherImpl(mdlSrc); } @Override diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnBracketsTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnBracketsTokenEnricherImpl.scala index c4f7709..a1ff627 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENBracketsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnBracketsTokenEnricherImpl.scala @@ -28,7 +28,7 @@ import scala.jdk.CollectionConverters.CollectionHasAsScala /** * */ -class NCENBracketsTokenEnricherImpl extends NCTokenEnricher with LazyLogging: +class NCEnBracketsTokenEnricherImpl extends NCTokenEnricher with LazyLogging: override def enrich(req: NCRequest, cfg: NCModelConfig, toks: JList[NCToken]): Unit = val stack = new java.util.Stack[String]() val map = mutable.HashMap.empty[NCToken, Boolean] diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnDictionaryTokenEnricherImpl.scala similarity index 95% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnDictionaryTokenEnricherImpl.scala index 28dbd30..ae804d0 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENDictionaryTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnDictionaryTokenEnricherImpl.scala @@ -25,7 +25,7 @@ import java.util.List as JList /** * */ -class NCENDictionaryTokenEnricherImpl extends NCTokenEnricher: +class NCEnDictionaryTokenEnricherImpl extends NCTokenEnricher: private var dict: Set[String] = _ init() diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnQuotesTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnQuotesTokenEnricherImpl.scala index c7e1d5f..c9cda18 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENQuotesTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnQuotesTokenEnricherImpl.scala @@ -26,7 +26,7 @@ import scala.jdk.CollectionConverters.* /** * */ -class NCENQuotesTokenEnricherImpl extends NCTokenEnricher with LazyLogging: +class NCEnQuotesTokenEnricherImpl extends NCTokenEnricher with LazyLogging: private final val Q_POS: Set[String] = Set("``", "''") private def getPos(t: NCToken): String = t.getOpt("pos").orElseThrow(() => throw new NCException("POS not found in token.")) private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t)) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordGenerator.scala similarity index 99% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordGenerator.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordGenerator.scala index 5644397..9b64868 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordGenerator.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordGenerator.scala @@ -8,7 +8,7 @@ import scala.collection.mutable /** * Generates first word sequences. */ -object NCENStopWordGenerator: +object NCEnStopWordGenerator: private final lazy val stemmer = new PorterStemmer // Output files. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordsTokenEnricherImpl.scala similarity index 98% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordsTokenEnricherImpl.scala index 56ab2ca..31a3a3f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENStopWordsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnStopWordsTokenEnricherImpl.scala @@ -21,7 +21,7 @@ import com.typesafe.scalalogging.LazyLogging import opennlp.tools.stemmer.PorterStemmer import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.NCSemanticStemmer +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer import java.io.* import java.util @@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.* /** * */ -object NCENStopWordsTokenEnricherImpl: +object NCEnStopWordsTokenEnricherImpl: // Condition types. type Wildcard = (String, String) type Word = String @@ -165,14 +165,14 @@ object NCENStopWordsTokenEnricherImpl: private def tokenMix(toks: Seq[NCToken], maxLen: Int = Integer.MAX_VALUE): Seq[Seq[NCToken]] = (for (n <- toks.length until 0 by -1 if n <= maxLen) yield toks.sliding(n)).flatten -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENStopWordsTokenEnricherImpl.* +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnStopWordsTokenEnricherImpl.* /** * * @param addStopsSet * @param exclStopsSet */ -class NCENStopWordsTokenEnricherImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) extends NCTokenEnricher with LazyLogging: +class NCEnStopWordsTokenEnricherImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) extends NCTokenEnricher with LazyLogging: private final val stemmer = new PorterStemmer private var addStems: Set[String] = _ diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnSwearWordsTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnSwearWordsTokenEnricherImpl.scala index 300e080..ef37b72 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCENSwearWordsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCEnSwearWordsTokenEnricherImpl.scala @@ -29,7 +29,7 @@ import java.util.List as JList * * @param res */ -class NCENSwearWordsTokenEnricherImpl(res: String) extends NCTokenEnricher with LazyLogging: +class NCEnSwearWordsTokenEnricherImpl(res: String) extends NCTokenEnricher with LazyLogging: require(res != null) private final val stemmer = new PorterStemmer diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCOpenNLPLemmaPosTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCOpenNLPLemmaPosTokenEnricherImpl.scala index b6e71ba..ab289cd 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCOpenNLPLemmaPosTokenEnricherImpl.scala @@ -35,7 +35,7 @@ import scala.jdk.CollectionConverters.* * @param posMdlSrc * @param lemmaDicSrc */ -class NCLemmaPosTokenEnricherImpl(posMdlSrc: String, lemmaDicSrc: String) extends NCTokenEnricher with LazyLogging: +class NCOpenNLPLemmaPosTokenEnricherImpl(posMdlSrc: String, lemmaDicSrc: String) extends NCTokenEnricher with LazyLogging: private var tagger: POSTaggerME = _ private var lemmatizer: DictionaryLemmatizer = _ diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala index b6af4c2..b749f4e 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* import org.apache.nlpcraft.NCResultType.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCSemanticEntityParser} +import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.* import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala index 04800b4..acc0c8f 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala @@ -18,7 +18,8 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCSemanticEntityParser} +import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.Test diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala index 3f49806..42ffc06 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* import org.apache.nlpcraft.NCResultType.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCSemanticEntityParser} +import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.{NCSemanticTestElement as STE, *} import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala index dbeaf92..ef7861c 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCSemanticElement} +import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.* import org.apache.nlpcraft.nlp.entity.parser.impl.NCNLPEntityParserImpl import org.apache.nlpcraft.nlp.util.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala index e8b7931..06dcdda 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala @@ -18,7 +18,8 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.{NCEnSemanticEntityParser, NCSemanticEntityParser} +import org.apache.nlpcraft.nlp.entity.parser.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.junit.jupiter.api.Test diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala index 76f84f6..fb334b2 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala @@ -84,9 +84,9 @@ class NCSemanticEntityParserSpec: ).asJava ) - private val stopWordsEnricher = new NCENStopWordsTokenEnricher() + private val stopWordsEnricher = new NCEnStopWordsTokenEnricher() - private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher() + private val lemmaPosEnricher = new NCEnOpenNLPLemmaPosTokenEnricher() /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala index a16e898..78ae9d4 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala @@ -30,7 +30,7 @@ import scala.jdk.CollectionConverters.* * */ class NCBracketsTokenEnricherSpec: - private val enricher = new NCENBracketsTokenEnricher() + private val enricher = new NCEnBracketsTokenEnricher() /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala index 654e132..3cc706e 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala @@ -30,9 +30,9 @@ import scala.jdk.CollectionConverters.* * */ class NCDictionaryTokenEnricherSpec: - private val dictEnricher = new NCENDictionaryTokenEnricher() + private val dictEnricher = new NCEnDictionaryTokenEnricher() - private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher() + private val lemmaPosEnricher = new NCEnOpenNLPLemmaPosTokenEnricher() @Test def test(): Unit = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala index ef4748c..28f7547 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala @@ -31,8 +31,8 @@ import scala.jdk.CollectionConverters.* * */ class NCQuotesTokenEnricherSpec: - private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher - private val quoteEnricher = new NCENQuotesTokenEnricher + private val lemmaPosEnricher = new NCEnOpenNLPLemmaPosTokenEnricher + private val quoteEnricher = new NCEnQuotesTokenEnricher /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala index 691a47d..3b0f4a3 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala @@ -32,7 +32,7 @@ import scala.jdk.CollectionConverters.* * */ class NCStopWordsEnricherSpec: - private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher + private val lemmaPosEnricher = new NCEnOpenNLPLemmaPosTokenEnricher /** * @@ -40,7 +40,7 @@ class NCStopWordsEnricherSpec: * @param txt * @param boolVals */ - private def test(stopEnricher: NCENStopWordsTokenEnricher, txt: String, boolVals: Boolean*): Unit = + private def test(stopEnricher: NCEnStopWordsTokenEnricher, txt: String, boolVals: Boolean*): Unit = val toksList = EN_PIPELINE.getTokenParser.tokenize(txt) require(toksList.size == boolVals.size) val toks = toksList.asScala.toSeq @@ -58,13 +58,13 @@ class NCStopWordsEnricherSpec: @Test def test(): Unit = test( - new NCENStopWordsTokenEnricher(), + new NCEnStopWordsTokenEnricher(), "the test", true, false ) test( - new NCENStopWordsTokenEnricher(Set("test").asJava, Set("the").asJava), + new NCEnStopWordsTokenEnricher(Set("test").asJava, Set("the").asJava), "the test", false, true diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala index b9eec76..48fb6e8 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.internal.util.NCResourceReader -import org.apache.nlpcraft.nlp.token.enricher.NСENSwearWordsTokenEnricher +import org.apache.nlpcraft.nlp.token.enricher.NСEnSwearWordsTokenEnricher import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -31,7 +31,7 @@ import scala.jdk.CollectionConverters.* * */ class NCSwearWordsTokenEnricherSpec: - private val enricher = new NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")) + private val enricher = new NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")) @Test def test(): Unit = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala index 7756838..3cb8bea 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en.impl import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.token.enricher.impl.NCENStopWordsTokenEnricherImpl +import org.apache.nlpcraft.nlp.token.enricher.impl.NCEnStopWordsTokenEnricherImpl import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -45,7 +45,7 @@ class NCStopWordsImplSpec: toks.zip(words).foreach { (t, w) => t.put("stopword", w.stop) } - val mix = NCENStopWordsTokenEnricherImpl.tokenMixWithStopWords(toks) + val mix = NCEnStopWordsTokenEnricherImpl.tokenMixWithStopWords(toks) val resSorted = mix.map(_.map(_.getText).mkString).sorted val expectedSorted = expected.sorted diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala index 0393f3a..b606464 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala @@ -33,8 +33,8 @@ import scala.jdk.CollectionConverters.* * */ class NCOpenNLPTokenParserSpec: - private val lemmaPosEnricher = new NCENOpenNlpLemmaPosTokenEnricher - private val stopEnricher = new NCENStopWordsTokenEnricher(null, null) + private val lemmaPosEnricher = new NCEnOpenNLPLemmaPosTokenEnricher + private val stopEnricher = new NCEnStopWordsTokenEnricher(null, null) private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
