This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 1b6c340 WIP.
1b6c340 is described below
commit 1b6c340259caeb11d071b0de8c9c8d7f63163186
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Jan 5 00:07:41 2022 +0300
WIP.
---
nlpcraft-stanford/pom.xml | 64 +++++++++++++++++
.../parser/stanford/NCStanfordEntityParser.java | 34 +++++++--
.../stanford/impl/NCStanfordEntityParserImpl.scala | 82 ++++++++++++++++++++++
.../parser/stanford/NCStanfordTokenParser.java | 29 ++------
.../parser/stanford/impl/NCStanfordNlpImpl.scala | 82 ++++++++++++++++++++++
.../opennlp/{en => }/NCOpenNlpTokenParser.java | 4 +-
.../{en => }/NCOpenNlpTokenParserStemmer.java | 2 +-
.../opennlp/{en => }/impl/NCOpenNlpImpl.scala | 4 +-
.../impl/en/NCOpenNlpTokenParserStemmerImpl.java | 5 +-
.../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 4 +-
.../{en => }/NCEnOpenNlpTokenParserSpec.scala | 2 +-
.../apache/nlpcraft/nlp/util/NCTestConfig.scala | 5 +-
pom.xml | 27 +++++++
13 files changed, 305 insertions(+), 39 deletions(-)
diff --git a/nlpcraft-stanford/pom.xml b/nlpcraft-stanford/pom.xml
new file mode 100644
index 0000000..2034364
--- /dev/null
+++ b/nlpcraft-stanford/pom.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <name>NLPCraft Stanford CoreNLP</name>
+ <artifactId>nlpcraft-stanford</artifactId>
+
+ <parent>
+ <groupId>org.apache.nlpcraft</groupId>
+ <artifactId>nlpcraft-parent</artifactId>
+ <version>1.0.0</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>nlpcraft</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.stanford.nlp</groupId>
+ <artifactId>stanford-corenlp</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>edu.stanford.nlp</groupId>
+ <artifactId>stanford-corenlp</artifactId>
+ <classifier>models</classifier>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/scala</directory>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.*</include>
+ </includes>
+ </resource>
+ </resources>
+ </build>
+</project>
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
similarity index 53%
copy from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
copy to
nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
index 2af38c3..d29ebc3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordEntityParser.java
@@ -15,8 +15,34 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
+package org.apache.nlpcraft.nlp.entity.parser.stanford;
-public interface NCOpenNlpTokenParserStemmer {
- String stem(String s);
-}
\ No newline at end of file
+import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.nlp.entity.parser.stanford.impl.*;
+import java.util.List;
+
+/**
+ *
+ */
+public class NCStanfordEntityParser implements NCEntityParser {
+ private final NCStanfordEntityParserImpl impl;
+
+ public NCStanfordEntityParser(NCStanfordEntityParserImpl impl) {
+ this.impl = impl;
+ }
+
+ @Override
+ public List<NCEntity> parse(NCRequest req, NCModelConfig cfg,
List<NCToken> toks) {
+ return impl.parse(req, cfg, toks);
+ }
+
+ @Override
+ public void start(NCModelConfig cfg) {
+ impl.start(cfg);
+ }
+
+ @Override
+ public void stop() {
+ impl.stop();
+ }
+}
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
new file mode 100644
index 0000000..15c6347
--- /dev/null
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/entity/parser/stanford/impl/NCStanfordEntityParserImpl.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.stanford.impl
+
+import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
+import org.apache.nlpcraft.*
+
+import java.util
+import java.util.Properties
+import scala.jdk.CollectionConverters.*
+
+class NCStanfordEntityParserImpl extends NCEntityParser:
+ @volatile private var stanford: StanfordCoreNLP = _
+
+ override def start(cfg: NCModelConfig): Unit =
+ val p = new Properties()
+
+ p.setProperty("annotators", "nctokenize, ssplit, pos, lemma, ner")
+
+ // Created with hardcoded properties just for minimize configuration
issues.
+ stanford = new StanfordCoreNLP(p)
+
+ override def stop(): Unit = stanford = null
+ override def parse(req: NCRequest, cfg: NCModelConfig, toks:
util.List[NCToken]): util.List[NCEntity] =
+ null
+// val doc = new CoreDocument("a")
+//
+// stanford.annotate(req.getText)
+//
+// doc.entityMentions().asScala.
+// filter(e => ebiTokens.contains(e.entityType().toLowerCase)).
+// foreach(e => {
+// val offsets = e.charOffsets()
+//
+// val t1 = toks.find(_.startCharIndex == offsets.first)
+// val t2 = toks.find(_.endCharIndex == offsets.second)
+//
+// if (t1.nonEmpty && t2.nonEmpty) {
+// val buf = collection.mutable.ArrayBuffer.empty[(String, Any)]
+//
+// val nne =
e.coreMap().get(classOf[NormalizedNamedEntityTagAnnotation])
+//
+// if (nne != null)
+// buf += "nne" -> nne
+//
+// val conf = e.entityTypeConfidences()
+//
+// // Key ignored because it can be category with higher level
(`location` for type `country`)
+// if (conf.size() == 1)
+// buf += "confidence" -> conf.asScala.head._2
+//
+// val typ = e.entityType().toLowerCase
+//
+// val i1 = t1.get.startCharIndex
+// val i2 = t2.get.endCharIndex
+// val toks = ns.filter(t => t.startCharIndex >= i1 &&
t.endCharIndex <= i2)
+//
+// val note = NCNlpSentenceNote(
+// toks.map(_.index),
+// s"stanford:$typ",
+// buf.toSeq: _*
+// )
+//
+// toks.foreach(_.add(note))
+// }
+// })
+//
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
similarity index 61%
copy from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
copy to
nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
index c5f6692..7ec5386 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java
@@ -15,28 +15,18 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
-
-import org.apache.nlpcraft.NCException;
-import org.apache.nlpcraft.NCModelConfig;
-import org.apache.nlpcraft.NCTokenParser;
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.NCOpenNlpImpl;
+package org.apache.nlpcraft.nlp.token.parser.stanford;
+import org.apache.nlpcraft.*;
+import org.apache.nlpcraft.nlp.token.parser.stanford.impl.*;
import java.util.List;
import java.util.Objects;
-/*
- * Models can be downloaded from the following resources:
- * - tokenizer: http://opennlp.sourceforge.net/models-1.5/en-token.bin
- * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
- * - lemmatizer:
https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
- */
-
/**
*
*/
-public class NCOpenNlpTokenParser implements NCTokenParser {
- private final NCOpenNlpImpl impl;
+public class NCStanfordTokenParser implements NCTokenParser {
+ private final NCStanfordNlpImpl impl;
@Override
public void start(NCModelConfig cfg) {
@@ -55,14 +45,9 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
* @param lemmaDicSrc Local filesystem path, resources file path or URL
for OpenNLP lemmatizer dictionary.
* @throws NCException
*/
- public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String
lemmaDicSrc, NCOpenNlpTokenParserStemmer stemmer) {
- Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be
null.");
- Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
- Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be
null.");
- Objects.requireNonNull(stemmer, "Stemmer cannot be null.");
-
+ public NCStanfordTokenParser() {
try {
- impl = new NCOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc,
stemmer);
+ impl = new NCStanfordNlpImpl();
}
catch (Exception e) {
throw new NCException("Failed to create OpenNLP token parser.", e);
diff --git
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
new file mode 100644
index 0000000..95d3b2d
--- /dev/null
+++
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/impl/NCStanfordNlpImpl.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.token.parser.stanford.impl
+
+import org.apache.nlpcraft.*
+
+import java.io.StringReader
+import java.util
+import java.util.{Properties, List as JList}
+import edu.stanford.nlp.ling.CoreAnnotations.*
+import edu.stanford.nlp.ling.*
+import edu.stanford.nlp.util.*
+import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
+import edu.stanford.nlp.process.PTBTokenizer
+import scala.jdk.CollectionConverters.*
+
+import java.util.stream.Collectors
+
+class NCStanfordNlpImpl extends NCTokenParser:
+ @volatile private var stanford: StanfordCoreNLP = _
+
+ override def start(cfg: NCModelConfig): Unit =
+ val p = new Properties()
+
+ p.setProperty("annotators", "nctokenize, ssplit, pos, lemma, ner")
+
+ // Created with hardcoded properties just for minimize configuration
issues.
+ stanford = new StanfordCoreNLP(p)
+
+ override def stop(): Unit = stanford = null
+
+ override def tokenize(text: String): JList[String] =
+ PTBTokenizer.newPTBTokenizer(new
StringReader(text)).tokenize().stream().map(p =>
p.word()).collect(Collectors.toList)
+ override def getStem(s: String): String = null // TODO:
+
+ // TODO: getPoses and getLemmas are equal.
+ override def getPoses(toks: JList[String]): JList[String] =
+ val doc = new CoreDocument("a")
+
+ stanford.annotate(doc)
+
+ val a: JList[CoreMap] =
doc.annotation().get(classOf[SentencesAnnotation])
+
+ if (a == null)
+ throw new NCException("Sentence annotation not found.")
+
+ a.stream().flatMap(p => {
+ val value: JList[CoreLabel] =
p.asInstanceOf[ArrayCoreMap].get(classOf[TokensAnnotation])
+
+ value.stream().map(_.tag())
+ }).collect(Collectors.toList)
+
+ override def getLemmas(toks: JList[String], poses: JList[String]):
JList[String] =
+ val doc = new CoreDocument("a")
+
+ stanford.annotate(doc)
+
+ val a: JList[CoreMap] =
doc.annotation().get(classOf[SentencesAnnotation])
+
+ if (a == null)
+ throw new NCException("Sentence annotation not found.")
+
+ a.stream().flatMap(p => {
+ val value: JList[CoreLabel] =
p.asInstanceOf[ArrayCoreMap].get(classOf[TokensAnnotation])
+
+ value.stream().map(_.lemma())
+ }).collect(Collectors.toList)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
similarity index 95%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index c5f6692..3276ef1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -15,12 +15,12 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
+package org.apache.nlpcraft.nlp.token.parser.opennlp;
import org.apache.nlpcraft.NCException;
import org.apache.nlpcraft.NCModelConfig;
import org.apache.nlpcraft.NCTokenParser;
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.NCOpenNlpImpl;
+import org.apache.nlpcraft.nlp.token.parser.opennlp.impl.NCOpenNlpImpl;
import java.util.List;
import java.util.Objects;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserStemmer.java
similarity index 93%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserStemmer.java
index 2af38c3..bd290f0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCOpenNlpTokenParserStemmer.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserStemmer.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en;
+package org.apache.nlpcraft.nlp.token.parser.opennlp;
public interface NCOpenNlpTokenParserStemmer {
String stem(String s);
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCOpenNlpImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
similarity index 96%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCOpenNlpImpl.scala
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index 8e58cd1..768bc89 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCOpenNlpImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl
+package org.apache.nlpcraft.nlp.token.parser.opennlp.impl
import com.typesafe.scalalogging.LazyLogging
import opennlp.tools.lemmatizer.*
@@ -24,7 +24,7 @@ import opennlp.tools.stemmer.*
import opennlp.tools.tokenize.*
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCUtils
-import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCOpenNlpTokenParserStemmer
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParserStemmer
import java.io.*
import java.util
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/en/NCOpenNlpTokenParserStemmerImpl.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
similarity index 55%
rename from
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/en/NCOpenNlpTokenParserStemmerImpl.java
rename to
nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
index a2bf80e..6ecbbfd 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/en/NCOpenNlpTokenParserStemmerImpl.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/en/NCOpenNlpTokenParserStemmerImpl.java
@@ -1,7 +1,8 @@
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.en;
+package org.apache.nlpcraft.nlp.token.parser.opennlp.impl.en;
import opennlp.tools.stemmer.PorterStemmer;
-import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCOpenNlpTokenParserStemmer;
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParserStemmer;
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParserStemmer;
public class NCOpenNlpTokenParserStemmerImpl implements
NCOpenNlpTokenParserStemmer {
private PorterStemmer stemmer = new PorterStemmer();
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
index d729f6f..60de192 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java
@@ -18,8 +18,8 @@
package org.apache.nlpcraft.nlp.benchmark.token.parser.opennlp;
import org.apache.nlpcraft.nlp.benchmark.NCBenchmarkAdapter;
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCOpenNlpTokenParser;
-import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.en.NCOpenNlpTokenParserStemmerImpl;
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParser;
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.impl.en.NCOpenNlpTokenParserStemmerImpl;
import org.junit.jupiter.api.Disabled;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
similarity index 98%
rename from
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
rename to
nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
index c84dd99..b0cf72f 100644
---
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserSpec.scala
+++
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/token/parser/opennlp/NCEnOpenNlpTokenParserSpec.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.nlp.token.parser.opennlp.en
+package org.apache.nlpcraft.nlp.token.parser.opennlp
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.ascii.NCAsciiTable
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index 88f5a74..701eee8 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -18,9 +18,8 @@
package org.apache.nlpcraft.nlp.util
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.*
-import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.*
-import
org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.en.NCOpenNlpTokenParserStemmerImpl
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNlpTokenParser
+import
org.apache.nlpcraft.nlp.token.parser.opennlp.impl.en.NCOpenNlpTokenParserStemmerImpl
import java.util.{Optional, ArrayList as JAList, List as JList}
diff --git a/pom.xml b/pom.xml
index c156505..c1d4ca7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -40,6 +40,7 @@
<modules>
<module>nlpcraft</module>
+ <module>nlpcraft-stanford</module>
</modules>
<developers>
@@ -115,6 +116,13 @@
<!-- Jar with all dependencies. -->
<nlpcraft.all.deps.jar>apache-${project.artifactId}-incubating-${project.version}-all-deps.jar</nlpcraft.all.deps.jar>
+
+ <!--
+ NOTE: this library is NOT included in binary build and is optional
+ for NLPCraft. It is licensed under GPLv3 and has to be downloaded
separately by
+ the end user, when required.
+ -->
+ <stanford.corenlp.ver>4.3.2</stanford.corenlp.ver>
</properties>
<dependencyManagement>
@@ -224,6 +232,25 @@
</dependency>
<!--
+ Stanford dependencies.
+ ==================
+ -->
+
+ <dependency>
+ <groupId>edu.stanford.nlp</groupId>
+ <artifactId>stanford-corenlp</artifactId>
+ <version>${stanford.corenlp.ver}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>edu.stanford.nlp</groupId>
+ <artifactId>stanford-corenlp</artifactId>
+ <version>${stanford.corenlp.ver}</version>
+ <classifier>models</classifier>
+ </dependency>
+
+
+ <!--
Test scope.
===========
-->