This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-471 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 957a4b6418cdf835617f42f7b4325c80ae918209 Merge: 2b497ae 2793cdf Author: Sergey Kamov <[email protected]> AuthorDate: Tue Dec 28 22:34:26 2021 +0300 Merge branch 'master' into NLPCRAFT-471 # Conflicts: # nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java # nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestRequest.scala # nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala .../scala/org/apache/nlpcraft/NCLifecycle.java | 2 +- .../scala/org/apache/nlpcraft/NCModelClient.java | 10 +-- .../scala/org/apache/nlpcraft/NCModelConfig.java | 2 +- .../org/apache/nlpcraft/NCModelConfigAdapter.java | 86 ++++++++++++++++------ .../main/scala/org/apache/nlpcraft/NCRequest.java | 10 +-- .../main/scala/org/apache/nlpcraft/NCToken.java | 8 +- .../token/enricher/NCEnBracketsTokenEnricher.java | 4 +- .../enricher/NCEnDictionaryTokenEnricher.java | 4 +- .../token/enricher/NCEnLanguageTokenEnricher.java | 4 +- .../token/enricher/NCEnQuotesTokenEnricher.java | 4 +- .../enricher/NCEnSwearWordsTokenEnricher.java | 4 +- .../nlp/token/enricher/impl/NCEnBracketsImpl.scala | 6 +- .../token/enricher/impl/NCEnDictionaryImpl.scala | 2 +- .../enricher/impl/NCEnLanguageWordsImpl.scala | 2 +- .../nlp/token/enricher/impl/NCEnQuotesImpl.scala | 2 +- .../token/enricher/impl/NCEnSwearWordsImpl.scala | 2 +- .../parser/opennlp/NCEnOpenNlpTokenParser.java | 4 +- .../parser/opennlp/impl/NCEnOpenNlpImpl.scala | 10 +-- .../parser/opennlp/impl/NCEnStopWordsFinder.scala | 8 +- .../apache/nlpcraft/internal/util/NCUtils.scala | 4 +- .../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 2 +- .../opennlp/NCEnOpenNlpTokenParserSpec.scala | 4 +- .../nlpcraft/internal/nlp/util/NCTestRequest.scala | 3 +- .../nlpcraft/internal/nlp/util/NCTestToken.scala | 3 +- .../nlpcraft/internal/nlp/util/NCTestUtils.scala | 11 ++- 25 files changed, 111 insertions(+), 90 deletions(-) diff --cc nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java index 8dfc3f7,7d9b5dc..b70b2d1 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java @@@ -43,49 -43,24 +43,49 @@@ public class NCModelClient implements N // TODO: } - private static void start(List<? extends NCLifecycle> list, NCModelConfig cfg) { + private static void start(ExecutorService s, List<? extends NCLifecycle> list) { + assert s != null; + if (list != null) - list.forEach(p -> s.execute(() -> p.start())); - list.forEach(p -> p.start(cfg)); ++ list.forEach(p -> s.execute(() -> p.start(cfg))); } - private static void stop(List<? extends NCLifecycle> list) { + private static void stop(ExecutorService s, List<? extends NCLifecycle> list) { + assert s != null; + if (list != null) - list.forEach(p -> p.stop()); + list.forEach(p -> s.execute(() -> p.stop())); + } + + private static void stopExecutorService(ExecutorService s) { + try { + s.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); + } + catch (InterruptedException e) { + throw new NCException("Thread interrupted.", e); + } + } + + private static ExecutorService getExecutorService() { + return Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); } @Override - public void start() { + public void start(NCModelConfig cfg) { verify(); - start(cfg.getTokenParsers(), cfg); - start(cfg.getEntityParsers(), cfg); - start(cfg.getEntityEnrichers(), cfg); - start(cfg.getTokenEnrichers(), cfg); + NCModelConfig cfg = mdl.getConfig(); + ExecutorService s = getExecutorService(); + + try { + s.execute(() -> cfg.getTokenParser().start()); - start(s, cfg.getEntityParsers()); - start(s, cfg.getEntityEnrichers()); - start(s, cfg.getTokenEnrichers()); ++ start(s, cfg.getEntityParsers(), cfg); ++ start(s, cfg.getEntityEnrichers(), cfg); ++ start(s, cfg.getTokenEnrichers(), cfg); + } + finally { + stopExecutorService(s); + } } @Override diff --cc nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala index f12936c,097981f..07b4e3a --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnOpenNlpImpl.scala @@@ -156,10 -156,9 +156,9 @@@ class NCEnOpenNlpImpl (lemma, idx) => fixes.getOrElse(idx, lemma) } - val res: Seq[NCToken] = holders.zip(posTags).zip(lemmas).toIndexedSeq.map { case ((h, pos), lemma) => + val res: Seq[NCToken] = holders.zip(posTags).zip(lemmas).toIndexedSeq.zipWithIndex.map { case (((h, pos), lemma), idx) => new NCPropertyMapAdapter with NCToken: - override def getOriginalText: String = h.origin - override def getNormalizedText: String = h.normalized + override def getText: String = h.origin override def getLemma: String = lemma override def getStem: String = stemmer.stem(h.normalized) override def getPos: String = pos diff --cc nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala index 77bb9d7,b756e63..12875e4 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala @@@ -870,9 -870,9 +870,9 @@@ object NCUtils extends LazyLogging def readResource(res: String, enc: String = "UTF-8", log: Logger = logger): List[String] = val list = try - Using.resource(Source.fromInputStream(getStream(res), enc))(_.getLines()).toList + Using.resource(Source.fromInputStream(getStream(res), enc))(_.getLines().toSeq).toList catch - case e: IOException => throw new NCException(s"Failed to read stream.", e) + case e: IOException => throw new NCException(s"Failed to read stream: $res", e) log.trace(s"Loaded resource: $res") diff --cc nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java index 974e826,193dd00..a91dc81 --- a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java +++ b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java @@@ -53,9 -52,13 +53,9 @@@ public class NCEnOpenNlpTokenParserBenc * @return */ private static NCEnOpenNlpTokenParser prepareParser() { - NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser( - "opennlp/en-token.bin", - "opennlp/en-pos-maxent.bin", - "opennlp/en-lemmatizer.dict" - ); + NCEnOpenNlpTokenParser p = NCTestUtils.mkEnParser(); - p.start(); + p.start(null); // TODO: fix it. return p; } diff --cc nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala index 25bb543,06accfd..6ef2feb --- a/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala +++ b/nlpcraft/src/test/java/org/apache/nlpcraft/internal/nlp/util/NCTestUtils.scala @@@ -27,27 -25,44 +27,26 @@@ import scala.jdk.CollectionConverters. */ object NCTestUtils: /** - * * @param toks - * @param props */ - def printTokens(toks: Seq[NCToken], props: String*): Unit = + def printTokens(toks: Seq[NCToken]): Unit = val tbl = new NCAsciiTable() - tbl #= ("Origin", "Normalized", "POS", "Stem", "Lemma", "Start", "End", "Length", "Stopword", "Properties") - if props.isEmpty - then tbl #= ("Text", "POS", "Stem", "Lemma", "Start", "End", "Length", "Stopword") - else tbl #= ("Text", "POS", "Stem", "Lemma", "Start", "End", "Length", "Stopword", "Properties") - - toks.foreach(t => - if props.isEmpty then - tbl += ( - t.getText, - t.getPos, - t.getStem, - t.getLemma, - t.getStartCharIndex, - t.getEndCharIndex, - t.getLength, - t.isStopWord - ) - else - tbl += ( - t.getText, - t.getPos, - t.getStem, - t.getLemma, - t.getStartCharIndex, - t.getEndCharIndex, - t.getLength, - t.isStopWord, - props.map(p => s"$p=${t.get[Any](p)}").mkString("{", ", ", "}") - ) - ) ++ tbl #= ("Text", "POS", "Stem", "Lemma", "Start", "End", "Length", "Stopword", "Properties") + for (t <- toks) + tbl += ( - t.getOriginalText, - t.getNormalizedText, ++ t.getText, + t.getPos, + t.getStem, + t.getLemma, + t.getStartCharIndex, + t.getEndCharIndex, + t.getLength, + t.isStopWord, + t.keysSet().asScala.map(p => s"$p=${t.get[Any](p)}").mkString("[", ", ", "]") + ) - println(s"Request: ${toks.map(_.getOriginalText).mkString(" ")}") + println(s"Request: ${toks.map(_.getText).mkString(" ")}") println(tbl.toString) /** @@@ -82,16 -78,6 +81,16 @@@ val t = make val started = now() - t.start() + t.start(null) // TODO: fix it. println(s"'${t.getClass.getSimpleName}' created with time=${started - start} ms and started=${now() - started} ms.") t + + /** - * ++ * + * @return + */ + def mkEnParser: NCEnOpenNlpTokenParser = new NCEnOpenNlpTokenParser( + "opennlp/en-token.bin", + "opennlp/en-pos-maxent.bin", + "opennlp/en-lemmatizer.dict" + )
