This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 9d1dfa96 OPENNLP-385: Add unit tests for OpenNLP UIMA component (#748)
9d1dfa96 is described below
commit 9d1dfa96d8882d17cde5343e88782d7562609b19
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Feb 14 12:59:39 2025 +0100
OPENNLP-385: Add unit tests for OpenNLP UIMA component (#748)
- adapts and reactivates AnnotatorsInitializationTest by T. Teofili,
originally provided in JIRA issue, to execute for all xml descriptors
- converts AnnotatorsInitializationTest to several integration tests
executed via Maven failsafe plugin
- adds Parser.xml to 'test-descriptors'
- adds more test classes
- configures Maven resource filtering for xml 'test-descriptors'
- moves 'ci' profile to upper-level pom.xml for re-use in opennlp-uima
component
- adds simplelogger.properties to test resources to avoid log spam during
build caused by internal UIMA logger config
---
opennlp-tools/pom.xml | 15 +-
opennlp-uima/pom.xml | 34 ++-
.../main/java/opennlp/uima/chunker/Chunker.java | 7 +-
.../uima/doccat/AbstractDocumentCategorizer.java | 5 +-
.../opennlp/uima/doccat/DocumentCategorizer.java | 4 +-
.../opennlp/uima/namefind/AbstractNameFinder.java | 14 +-
.../uima/namefind/DictionaryNameFinder.java | 19 +-
.../java/opennlp/uima/namefind/NameFinder.java | 9 +-
.../java/opennlp/uima/normalizer/Normalizer.java | 13 +-
.../opennlp/uima/normalizer/StringDictionary.java | 1 +
.../src/main/java/opennlp/uima/parser/Parser.java | 1 +
.../uima/sentdetect/AbstractSentenceDetector.java | 4 +-
.../opennlp/uima/sentdetect/SentenceDetector.java | 6 +-
.../uima/sentdetect/SentenceModelResourceImpl.java | 1 +
.../opennlp/uima/tokenize/AbstractTokenizer.java | 8 +-
.../opennlp/uima/tokenize/SimpleTokenizer.java | 5 +
.../main/java/opennlp/uima/tokenize/Tokenizer.java | 3 +
.../opennlp/uima/tokenize/WhitespaceTokenizer.java | 6 +
.../opennlp/uima/util/AbstractModelResource.java | 6 +-
.../opennlp/uima/util/AnnotationComparator.java | 4 +-
.../main/java/opennlp/uima/util/AnnotatorUtil.java | 62 ++---
.../opennlp/uima/util/ContainingConstraint.java | 2 +
.../main/java/opennlp/uima/util/OpennlpUtil.java | 30 ++-
.../src/main/java/opennlp/uima/util/UimaUtil.java | 18 +-
.../src/test/java/opennlp/uima/AbstractIT.java | 237 ++++++++++++++++++
.../src/test/java/opennlp/uima/AbstractTest.java | 49 ++++
.../test/java/opennlp/uima/AbstractUimaTest.java | 77 ++++++
.../opennlp/uima/AnnotatorsInitializationTest.java | 66 -----
.../java/opennlp/uima/FullAnnotatorsFlowIT.java | 68 ++++++
.../test/java/opennlp/uima/SingleAnnotatorIT.java | 85 +++++++
.../uima/dictionary/DictionaryResourceTest.java | 7 +-
.../opennlp/uima/normalizer/NumberUtilTest.java | 5 +-
.../uima/normalizer/StringDictionaryTest.java | 78 ++++++
.../uima/util/AnnotationComboIteratorTest.java | 4 +-
.../uima/util/AnnotationComparatorTest.java | 118 +++++++++
.../java/opennlp/uima/util/AnnotatorUtilTest.java | 235 ++++++++++++++++++
.../src/test/java/opennlp/uima/util/CasUtil.java | 2 +-
.../java/opennlp/uima/util/OpennlpUtilTest.java | 144 +++++++++++
.../test/java/opennlp/uima/util/UimaUtilTest.java | 117 +++++++++
.../src/test/resources/simplelogger.properties | 19 ++
.../test/resources/test-descriptors/Chunker.xml | 5 +-
.../resources/test-descriptors/DateNameFinder.xml | 4 +-
.../test-descriptors/DictionaryNameFinder.xml | 4 +-
.../test-descriptors/LocationNameFinder.xml | 4 +-
.../resources/test-descriptors/MoneyNameFinder.xml | 4 +-
.../test-descriptors/OpenNlpTextAnalyzer.xml | 266 +++++++++++++++++++++
.../test-descriptors/OrganizationNameFinder.xml | 4 +-
.../test-descriptors/{Tokenizer.xml => Parser.xml} | 94 ++++++--
.../test-descriptors/PercentageNameFinder.xml | 4 +-
.../test-descriptors/PersonNameFinder.xml | 18 +-
.../test/resources/test-descriptors/PosTagger.xml | 18 +-
.../test-descriptors/SentenceDetector.xml | 17 +-
.../{Tokenizer.xml => SimpleTokenizer.xml} | 35 +--
.../resources/test-descriptors/TimeNameFinder.xml | 4 +-
.../test/resources/test-descriptors/Tokenizer.xml | 15 +-
.../test/resources/test-descriptors/TypeSystem.xml | 45 +++-
.../{Tokenizer.xml => WhitespaceTokenizer.xml} | 35 +--
.../test/resources/training-params-invalid.conf | 22 ++
.../src/test/resources/training-params-test.conf | 22 ++
pom.xml | 30 ++-
60 files changed, 1916 insertions(+), 322 deletions(-)
diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml
index 39a0ca9f..8023e79c 100644
--- a/opennlp-tools/pom.xml
+++ b/opennlp-tools/pom.xml
@@ -136,25 +136,12 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
- <version>${maven.failsafe.plugin}</version>
- <configuration>
- <argLine>-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine>
- </configuration>
</plugin>
+
</plugins>
</build>
- <properties>
- <opennlp.download.home>${user.home}</opennlp.download.home>
- </properties>
-
<profiles>
- <profile>
- <id>ci</id>
- <properties>
-
<opennlp.download.home>${project.build.directory}</opennlp.download.home>
- </properties>
- </profile>
<profile>
<id>jmh</id>
<dependencies>
diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml
index 91627082..b6b9a666 100644
--- a/opennlp-uima/pom.xml
+++ b/opennlp-uima/pom.xml
@@ -79,6 +79,12 @@
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ <scope>test</scope>
+ </dependency>
+
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
@@ -87,11 +93,35 @@
</dependencies>
<build>
+ <testResources>
+ <testResource>
+ <directory>src/test/resources</directory>
+ <filtering>true</filtering>
+ </testResource>
+ </testResources>
+
<plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <argLine>-Xmx2048m
-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine>
+
<forkCount>${opennlp.forkCount}</forkCount>
+
<failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
+ <excludes>
+ <exclude>**/*IT.java</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ </plugin>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
- <version>3.8.1</version>
<executions>
<execution>
<id>copy-dependencies</id>
@@ -100,7 +130,7 @@
<goal>copy-dependencies</goal>
</goals>
<configuration>
-
<excludeScope>provided</excludeScope>
+
<includeScope>runtime</includeScope>
<stripVersion>true</stripVersion>
</configuration>
</execution>
diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
index 213c5abf..d356f4fb 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
@@ -109,8 +109,8 @@ public final class Chunker extends CasAnnotator_ImplBase {
Logger mLogger = context.getLogger();
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker annotator.");
+ if (mLogger.isLoggable(Level.DEBUG)) {
+ mLogger.log(Level.DEBUG, "Initializing the OpenNLP Chunker annotator.");
}
ChunkerModel model;
@@ -172,8 +172,7 @@ public final class Chunker extends CasAnnotator_ImplBase {
String[] tokens = new String[tokenAnnotationIndex.size()];
String[] pos = new String[tokenAnnotationIndex.size()];
- AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex
- .size()];
+ AnnotationFS[] tokenAnnotations = new
AnnotationFS[tokenAnnotationIndex.size()];
int index = 0;
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
index 768738b3..4d374fb6 100644
---
a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
+++
b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java
@@ -50,6 +50,7 @@ abstract class AbstractDocumentCategorizer extends
CasAnnotator_ImplBase {
private Type mTokenType;
+ @Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
@@ -59,8 +60,8 @@ abstract class AbstractDocumentCategorizer extends
CasAnnotator_ImplBase {
Logger mLogger = context.getLogger();
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
+ if (mLogger.isLoggable(Level.DEBUG)) {
+ mLogger.log(Level.DEBUG, "Initializing the OpenNLP Categorizer.");
}
DoccatModel model;
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
index 6a3ca174..1071c09a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java
@@ -36,7 +36,7 @@ public class DocumentCategorizer extends
AbstractDocumentCategorizer {
private Feature mCategoryFeature;
-
+ @Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
@@ -55,7 +55,7 @@ public class DocumentCategorizer extends
AbstractDocumentCategorizer {
AnnotationFS categoryAnnotation;
- if (categoryIndex.size() > 0) {
+ if (!categoryIndex.isEmpty()) {
categoryAnnotation = categoryIndex.iterator().next();
} else {
categoryAnnotation = tcas.createAnnotation(mCategoryType, 0,
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
index 579a0ca6..3d99d266 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java
@@ -77,8 +77,8 @@ abstract class AbstractNameFinder extends
CasAnnotator_ImplBase {
mLogger = context.getLogger();
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the " + name + ".");
+ if (mLogger.isLoggable(Level.DEBUG)) {
+ mLogger.log(Level.DEBUG, "Initializing the " + name + ".");
}
isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter(
@@ -133,21 +133,19 @@ abstract class AbstractNameFinder extends
CasAnnotator_ImplBase {
mNameTypeMapping = Collections.unmodifiableMap(nameTypeMap);
}
- if (mNameType == null && mNameTypeMapping.size() == 0) {
+ if (mNameType == null && mNameTypeMapping.isEmpty()) {
throw new AnalysisEngineProcessException(
new Exception("No name type or valid name type mapping
configured!"));
}
}
- protected void postProcessAnnotations(Span[] detectedNames,
- AnnotationFS[] nameAnnotations) {
- }
+ protected abstract void postProcessAnnotations(Span[] detectedNames,
+ AnnotationFS[] nameAnnotations);
/**
* Called if the current document is completely processed.
*/
- protected void documentDone(CAS cas) {
- }
+ protected abstract void documentDone(CAS cas);
protected abstract Span[] find(CAS cas, String[] tokens);
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
index b3196246..f1568609 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
@@ -18,9 +18,9 @@
package opennlp.uima.namefind;
import java.io.IOException;
-import java.io.InputStream;
import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
@@ -66,10 +66,8 @@ public class DictionaryNameFinder extends AbstractNameFinder
{
String modelName = AnnotatorUtil.getRequiredStringParameter(context,
UimaUtil.DICTIONARY_PARAMETER);
- InputStream inModel = AnnotatorUtil.getResourceAsStream(context,
- modelName);
-
- nameFinderDictionary = new Dictionary(inModel);
+ nameFinderDictionary = new Dictionary(
+ AnnotatorUtil.getResourceAsStream(context, modelName));
} catch (IOException ie) {
throw new ResourceInitializationException(
@@ -77,13 +75,22 @@ public class DictionaryNameFinder extends
AbstractNameFinder {
ExceptionMessages.IO_ERROR_DICTIONARY_READING,
new Object[] {ie.getMessage()});
}
-
}
mNameFinder = new opennlp.tools.namefind.DictionaryNameFinder(
nameFinderDictionary);
}
+ @Override
+ protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[]
nameAnnotations) {
+ // nothing to do
+ }
+
+ @Override
+ protected void documentDone(CAS cas) {
+ // nothing to do
+ }
+
@Override
protected Span[] find(CAS cas, String[] tokens) {
return mNameFinder.find(tokens);
diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
index bce0105d..6d6a9186 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java
@@ -113,8 +113,7 @@ public final class NameFinder extends AbstractNameFinder {
* Note: Do all initialization in this method, do not use the constructor.
*/
@Override
- public void initialize()
- throws ResourceInitializationException {
+ public void initialize() throws ResourceInitializationException {
super.initialize();
@@ -136,13 +135,12 @@ public final class NameFinder extends AbstractNameFinder {
* Initializes the type system.
*/
@Override
- public void typeSystemInit(TypeSystem typeSystem)
- throws AnalysisEngineProcessException {
+ public void typeSystemInit(TypeSystem typeSystem) throws
AnalysisEngineProcessException {
super.typeSystemInit(typeSystem);
probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
mNameType,
- UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
+ UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
documentConfidenceType = AnnotatorUtil.getOptionalTypeParameter(context,
typeSystem,
"opennlp.uima.DocumentConfidenceType");
@@ -160,7 +158,6 @@ public final class NameFinder extends AbstractNameFinder {
Span[] names = mNameFinder.find(tokens);
double[] probs = mNameFinder.probs();
-
for (double prob : probs) {
documentConfidence.add(prob);
}
diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
index f90e6fce..5acda276 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
@@ -43,7 +43,7 @@ import opennlp.uima.util.UimaUtil;
/**
* The Normalizer tries the structure annotations. The structured value
- * is than assigned to a field of the annotation.
+ * is then assigned to a field of the annotation.
* <p>
* The process depends on the
* <p>
@@ -108,6 +108,7 @@ public class Normalizer extends CasAnnotator_ImplBase {
*
* @implNote Do all initialization in this method, do not use the
constructor.
*/
+ @Override
public void initialize(UimaContext context) throws
ResourceInitializationException {
super.initialize(context);
@@ -116,8 +117,8 @@ public class Normalizer extends CasAnnotator_ImplBase {
mLogger = context.getLogger();
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP Normalizer
annotator.");
+ if (mLogger.isLoggable(Level.DEBUG)) {
+ mLogger.log(Level.DEBUG, "Initializing the OpenNLP Normalizer
annotator.");
}
try {
@@ -141,6 +142,7 @@ public class Normalizer extends CasAnnotator_ImplBase {
* Initializes the type system.
* @param typeSystem type system to initialize
*/
+ @Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
@@ -165,6 +167,7 @@ public class Normalizer extends CasAnnotator_ImplBase {
}
}
+ @Override
public void process(CAS tcas) {
FSIndex<AnnotationFS> sentenceIndex = tcas.getAnnotationIndex(mNameType);
@@ -203,8 +206,8 @@ public class Normalizer extends CasAnnotator_ImplBase {
try {
number = NumberUtil.parse(text, language);
} catch (ParseException e) {
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Invalid number format: " + text);
+ if (mLogger.isLoggable(Level.WARN)) {
+ mLogger.log(Level.WARN, "Invalid number format: " + text);
}
continue;
}
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
index d8a63f60..d930f821 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
@@ -43,6 +43,7 @@ public class StringDictionary {
* @throws IOException Thrown if IO errors occurred.
*/
public StringDictionary(InputStream in) throws IOException {
+ this();
DictionaryEntryPersistor.create(in, entry -> {
String valueString = entry.attributes().getValue("value");
put(entry.tokens(), valueString);
diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
index ad6771f2..652cf249 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
@@ -145,6 +145,7 @@ public class Parser extends CasAnnotator_ImplBase {
/**
* Performs parsing on the given {@link CAS} object.
*/
+ @Override
public void process(CAS cas) {
FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
index e959d4fc..6348a893 100644
---
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
+++
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
@@ -57,8 +57,8 @@ public abstract class AbstractSentenceDetector extends
CasAnnotator_ImplBase {
logger = context.getLogger();
- if (logger.isLoggable(Level.INFO)) {
- logger.log(Level.INFO, "Initializing the OpenNLP Sentence annotator.");
+ if (logger.isLoggable(Level.DEBUG)) {
+ logger.log(Level.DEBUG, "Initializing the OpenNLP Sentence annotator.");
}
isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter(
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
index acb5c6bb..59bc3e6a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
@@ -76,6 +76,7 @@ public final class SentenceDetector extends
AbstractSentenceDetector {
* <p>
* Note: Do all initialization in this method, do not use the constructor.
*/
+ @Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
@@ -98,14 +99,14 @@ public final class SentenceDetector extends
AbstractSentenceDetector {
/**
* Initializes the type system.
*/
+ @Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
super.typeSystemInit(typeSystem);
probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
- sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
- CAS.TYPE_NAME_DOUBLE);
+ sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
CAS.TYPE_NAME_DOUBLE);
}
@Override
@@ -128,6 +129,7 @@ public final class SentenceDetector extends
AbstractSentenceDetector {
/**
* Releases allocated resources.
*/
+ @Override
public void destroy() {
// dereference model to allow garbage collection
sentenceDetector = null;
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
index f41b7db7..4545f8ec 100644
---
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
+++
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
@@ -26,6 +26,7 @@ import opennlp.uima.util.AbstractModelResource;
public class SentenceModelResourceImpl extends
AbstractModelResource<SentenceModel>
implements SentenceModelResource {
+ @Override
public SentenceModel getModel() {
return model;
}
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
index b1f7abcb..3c658a0a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
@@ -91,9 +91,8 @@ public abstract class AbstractTokenizer extends
CasAnnotator_ImplBase {
UimaUtil.TOKEN_TYPE_PARAMETER);
}
- protected void postProcessAnnotations(Span[] tokens,
- AnnotationFS[] tokenAnnotations) {
- }
+ protected abstract void postProcessAnnotations(Span[] tokens,
+ AnnotationFS[] tokenAnnotations);
protected abstract Span[] tokenize(CAS cas, AnnotationFS sentence);
@@ -116,8 +115,7 @@ public abstract class AbstractTokenizer extends
CasAnnotator_ImplBase {
AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenSpans.length];
for (int i = 0; i < tokenSpans.length; i++) {
- tokenAnnotations[i] = cas
- .createAnnotation(tokenType,
+ tokenAnnotations[i] = cas.createAnnotation(tokenType,
sentenceOffset + tokenSpans[i].getStart(), sentenceOffset
+ tokenSpans[i].getEnd());
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
index a62d531c..7991582f 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
@@ -57,4 +57,9 @@ public final class SimpleTokenizer extends AbstractTokenizer {
protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
return tokenizer.tokenizePos(sentence.getCoveredText());
}
+
+ @Override
+ protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[]
nameAnnotations) {
+ // nothing to do
+ }
}
diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
index e2af1eb9..38d2b343 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
@@ -80,6 +80,7 @@ public final class Tokenizer extends AbstractTokenizer {
* <p>
* Note: Do all initialization in this method, do not use the constructor.
*/
+ @Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
@@ -102,6 +103,7 @@ public final class Tokenizer extends AbstractTokenizer {
/**
* Initializes the type system.
*/
+ @Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
@@ -135,6 +137,7 @@ public final class Tokenizer extends AbstractTokenizer {
/**
* Releases allocated resources.
*/
+ @Override
public void destroy() {
// dereference model to allow garbage collection
tokenizer = null;
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
index 0151b755..bc8c6527 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
@@ -52,4 +52,10 @@ public final class WhitespaceTokenizer extends
AbstractTokenizer {
return opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE.
tokenizePos(sentence.getCoveredText());
}
+
+ @Override
+ protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[]
nameAnnotations) {
+ // nothing to do
+ }
+
}
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java
b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java
index 21405a7f..3124592a 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java
@@ -42,10 +42,8 @@ public abstract class AbstractModelResource<T> implements
SharedResourceObject {
try {
model = loadModel(resource.getInputStream());
} catch (IOException e) {
- throw new ResourceInitializationException(
- ExceptionMessages.MESSAGE_CATALOG,
- ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {
- e.getMessage()}, e);
+ throw new
ResourceInitializationException(ExceptionMessages.MESSAGE_CATALOG,
+ ExceptionMessages.IO_ERROR_MODEL_READING, new Object[]
{e.getMessage()}, e);
}
}
}
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
index 59a2290d..30061d95 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
@@ -24,8 +24,7 @@ import org.apache.uima.cas.text.AnnotationFS;
/**
* Checks two {@link AnnotationFS annotations} for equality.
*/
-public class AnnotationComparator implements Comparator<AnnotationFS>
-{
+public class AnnotationComparator implements Comparator<AnnotationFS> {
/**
* Compares the start indexes of the annotations.
@@ -35,6 +34,7 @@ public class AnnotationComparator implements
Comparator<AnnotationFS>
*
* @return 0 if equals, < 0 if before and > 0 if after
*/
+ @Override
public int compare(AnnotationFS a, AnnotationFS b) {
return a.getBegin() - b.getBegin();
}
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
index 17227270..d2685efe 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
@@ -33,7 +33,7 @@ import org.apache.uima.util.Logger;
import opennlp.tools.dictionary.Dictionary;
/**
- * This is a utility class for Annotators.
+ * Provides utility methods for Annotators.
*/
public final class AnnotatorUtil {
@@ -48,10 +48,14 @@ public final class AnnotatorUtil {
* @param name The name of the type to retrieve.
*
* @return The {@link Type} for the {@code name}.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
* @throws OpenNlpAnnotatorProcessException Thrown if no {@link Type} could
be found.
*/
public static Type getType(TypeSystem typeSystem, String name)
throws AnalysisEngineProcessException {
+ if (typeSystem == null) {
+ throw new IllegalArgumentException("Parameter 'typeSystem' must not be
null");
+ }
Type type = typeSystem.getType(name);
if (type == null) {
@@ -82,7 +86,6 @@ public final class AnnotatorUtil {
}
}
-
/**
* Retrieves a {@link Feature} for a specified type and {@code featureName},
* otherwise an exception is thrown.
@@ -91,13 +94,16 @@ public final class AnnotatorUtil {
* @param featureName The name of the feature to retrieve.
*
* @return The {@link Feature} if found.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
* @throws OpenNlpAnnotatorProcessException Thrown if no {@link Feature} did
match.
*/
public static Feature getRequiredFeature(Type type, String featureName)
throws AnalysisEngineProcessException {
+ if (type == null) {
+ throw new IllegalArgumentException("Parameter 'type' must not be null");
+ }
Feature feature = type.getFeatureByBaseName(featureName);
-
if (feature == null) {
throw new OpenNlpAnnotatorProcessException(
ExceptionMessages.FEATURE_NOT_FOUND, new Object[] {type.getName(),
featureName});
@@ -114,16 +120,13 @@ public final class AnnotatorUtil {
* @param rangeType The expected range type.
*
* @return The {@link Feature} if found.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
* @throws OpenNlpAnnotatorProcessException Thrown if no {@link Feature} did
match.
*/
- public static Feature getRequiredFeature(Type type, String featureName,
- String rangeType)
+ public static Feature getRequiredFeature(Type type, String featureName,
String rangeType)
throws AnalysisEngineProcessException {
-
Feature feature = getRequiredFeature(type, featureName);
-
checkFeatureType(feature, rangeType);
-
return feature;
}
@@ -142,13 +145,11 @@ public final class AnnotatorUtil {
throws AnalysisEngineProcessException {
String featureName;
-
try {
featureName = getRequiredStringParameter(context, featureNameParameter);
} catch (ResourceInitializationException e) {
throw new OpenNlpAnnotatorProcessException(e);
}
-
return getRequiredFeature(type, featureName);
}
@@ -174,7 +175,6 @@ public final class AnnotatorUtil {
} catch (ResourceInitializationException e) {
throw new OpenNlpAnnotatorProcessException(e);
}
-
return getRequiredFeature(type, featureName, rangeTypeName);
}
@@ -193,13 +193,11 @@ public final class AnnotatorUtil {
throws AnalysisEngineProcessException {
String typeName;
-
try {
typeName = getRequiredStringParameter(context, parameter);
} catch (ResourceInitializationException e) {
throw new OpenNlpAnnotatorProcessException(e);
}
-
return getType(typeSystem, typeName);
}
@@ -216,9 +214,7 @@ public final class AnnotatorUtil {
throws ResourceInitializationException {
String value = getOptionalStringParameter(context, parameter);
-
checkForNull(value, parameter);
-
return value;
}
@@ -236,9 +232,7 @@ public final class AnnotatorUtil {
throws ResourceInitializationException {
Integer value = getOptionalIntegerParameter(context, parameter);
-
checkForNull(value, parameter);
-
return value;
}
@@ -256,9 +250,7 @@ public final class AnnotatorUtil {
throws ResourceInitializationException {
Float value = getOptionalFloatParameter(context, parameter);
-
checkForNull(value, parameter);
-
return value;
}
@@ -276,9 +268,7 @@ public final class AnnotatorUtil {
throws ResourceInitializationException {
Boolean value = getOptionalBooleanParameter(context, parameter);
-
checkForNull(value, parameter);
-
return value;
}
@@ -509,18 +499,15 @@ public final class AnnotatorUtil {
* @param parameter The name of the parameter to retrieve.
*
* @return The {@link Object parameter} or {@code null} if not set.
- * @throws ResourceInitializationException Thrown if the parameter type was
not of the expected type.
*/
private static Object getOptionalParameter(UimaContext context,
- String parameter)
- throws ResourceInitializationException {
+ String parameter) {
Object value = context.getConfigParameterValue(parameter);
-
Logger logger = context.getLogger();
- if (logger.isLoggable(Level.INFO)) {
- logger.log(Level.INFO, parameter + " = " +
+ if (logger.isLoggable(Level.DEBUG)) {
+ logger.log(Level.DEBUG, parameter + " = " +
(value != null ? value.toString() : "not set"));
}
@@ -557,8 +544,7 @@ public final class AnnotatorUtil {
* @return A valid, open {@link InputStream}.
* @throws ResourceInitializationException Thrown if the resource could not
be found.
*/
- public static InputStream getOptionalResourceAsStream(UimaContext context,
- String name)
+ public static InputStream getOptionalResourceAsStream(UimaContext context,
String name)
throws ResourceInitializationException {
final InputStream inResource;
@@ -581,30 +567,20 @@ public final class AnnotatorUtil {
* @return A valid {@link Dictionary} or {@code null} if IO errors occurred.
* @throws ResourceInitializationException Thrown if the resource could not
be found.
*/
- public static Dictionary createOptionalDictionary(UimaContext context,
- String dictionaryParameter)
+ public static Dictionary createOptionalDictionary(UimaContext context,
String dictionaryParameter)
throws ResourceInitializationException {
- String dictionaryName = AnnotatorUtil.getOptionalStringParameter(context,
- dictionaryParameter);
+ String dictionaryName = AnnotatorUtil.getOptionalStringParameter(context,
dictionaryParameter);
Dictionary dictionary = null;
-
if (dictionaryName != null) {
-
Logger logger = context.getLogger();
-
- try (InputStream dictIn =
AnnotatorUtil.getOptionalResourceAsStream(context,
- dictionaryName)) {
-
+ try (InputStream dictIn =
AnnotatorUtil.getOptionalResourceAsStream(context, dictionaryName)) {
if (dictIn == null) {
- String message = "The dictionary file " + dictionaryName
- + " does not exist!";
-
+ String message = "The dictionary file " + dictionaryName + " does
not exist!";
if (logger.isLoggable(Level.WARNING)) {
logger.log(Level.WARNING, message);
}
-
return null;
}
diff --git
a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
index c7433b17..93cf7055 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
@@ -17,6 +17,7 @@
package opennlp.uima.util;
+import java.io.Serial;
import java.util.Collection;
import java.util.LinkedList;
@@ -28,6 +29,7 @@ import org.apache.uima.cas.text.AnnotationFS;
* Checks if an {@link AnnotationFS} is contained by the given AnnotationFS.
*/
public final class ContainingConstraint implements FSMatchConstraint {
+ @Serial
private static final long serialVersionUID = 8393109549729168545L;
private final Collection<AnnotationFS> mContainingAnnotations = new
LinkedList<>();
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
index 30c039ec..43ae0590 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java
@@ -17,6 +17,7 @@
package opennlp.uima.util;
+import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -33,7 +34,7 @@ import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.BaseModel;
/**
- * This class contains util methods for the maxent library.
+ * Provides utility methods for OpenNLP's maxent library.
*/
final public class OpennlpUtil {
@@ -49,8 +50,13 @@ final public class OpennlpUtil {
* @param modelFile The {@link File} to serialize into.
* @throws IOException Thrown if IO errors occurred.
*/
- public static void serialize(BaseModel model, File modelFile)
- throws IOException {
+ public static void serialize(BaseModel model, File modelFile) throws
IOException {
+ if (model == null) {
+ throw new IllegalArgumentException("Parameter 'model' must not be null");
+ }
+ if (modelFile == null) {
+ throw new IllegalArgumentException("Parameter 'modelFile' must not be
null");
+ }
try (OutputStream fileOut = new FileOutputStream(modelFile);
OutputStream modelOut = new BufferedOutputStream(fileOut)) {
model.serialize(modelOut);
@@ -80,34 +86,36 @@ final public class OpennlpUtil {
}
/**
- * Loads data from a given {@link File}.
+ * Loads data from the specified training parameters {@link File file}.
*
- * @param inFileValue The name of the {@link File} to read bytes from.
+ * @param trainingFilePath The path to the {@link File} to load the training
parameters from.
* @param isSequenceTrainingAllowed Whether the {@link
TrainerFactory.TrainerType#SEQUENCE_TRAINER}
* method is allowed or not.
* @return The {@link TrainingParameters} that have been read.
*
- * @throws ResourceInitializationException Thrown if IO errors occurred or
the {@code inFileValue}
+ * @throws ResourceInitializationException Thrown if IO errors occurred or
the {@code trainingFilePath}
* does not reference a valid
training parameters file.
*/
- public static TrainingParameters loadTrainingParams(String inFileValue,
+ public static TrainingParameters loadTrainingParams(String trainingFilePath,
boolean isSequenceTrainingAllowed) throws
ResourceInitializationException {
TrainingParameters params;
- if (inFileValue != null) {
- try (InputStream paramsIn = new FileInputStream(inFileValue)) {
+ if (trainingFilePath != null) {
+ try (InputStream paramsIn = new BufferedInputStream(new
FileInputStream(trainingFilePath))) {
params = new opennlp.tools.util.TrainingParameters(paramsIn);
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
if (!TrainerFactory.isValid(params)) {
- throw new ResourceInitializationException(new Exception("Training
parameters file is invalid!"));
+ throw new ResourceInitializationException(
+ new RuntimeException("Training parameters file is invalid!"));
}
TrainerFactory.TrainerType trainerType =
TrainerFactory.getTrainerType(params);
if (!isSequenceTrainingAllowed &&
TrainerFactory.TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
- throw new ResourceInitializationException(new Exception("Sequence
training is not supported!"));
+ throw new ResourceInitializationException(
+ new RuntimeException("Sequence training is not supported!"));
}
}
else {
diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
index 4b2526eb..070d4c10 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java
@@ -27,7 +27,7 @@ import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
/**
- * This is a util class for uima operations.
+ * Defines constants and provides utility methods for uima operations.
*/
public final class UimaUtil {
@@ -88,13 +88,21 @@ public final class UimaUtil {
*
* @param cas The {@link CAS} to use.
* @param containerAnnotation The {@link AnnotationFS} of the container.
- * @param removeAnnotationType The {@link Type type} to remove annotations
for.
+ * @param type The {@link Type type} to remove annotations for.
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public static void removeAnnotations(CAS cas,
- AnnotationFS containerAnnotation, Type removeAnnotationType) {
+ AnnotationFS containerAnnotation, Type type) {
+
+ if (cas == null) {
+ throw new IllegalArgumentException("Parameter 'cas' must not be null");
+ }
+ if (type == null) {
+ throw new IllegalArgumentException("Parameter 'type' must not be null");
+ }
- FSIndex<AnnotationFS> allRemoveAnnotations = cas
- .getAnnotationIndex(removeAnnotationType);
+ FSIndex<AnnotationFS> allRemoveAnnotations = cas.getAnnotationIndex(type);
ContainingConstraint containingConstraint = new ContainingConstraint(
containerAnnotation);
diff --git a/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java
b/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java
new file mode 100644
index 00000000..1295dead
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.FloatArrayFS;
+import org.apache.uima.cas.IntArrayFS;
+import org.apache.uima.cas.StringArrayFS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.BeforeAll;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.DownloadUtil;
+
+abstract class AbstractIT extends AbstractUimaTest {
+
+ private static final Logger logger =
LoggerFactory.getLogger(AbstractIT.class);
+
+ protected static final String BIN = ".bin";
+
+ private static final String BASE_URL_MODELS_V15 =
"https://opennlp.sourceforge.net/models-1.5/";
+
+ @BeforeAll
+ public static void initEnv() throws IOException {
+ // ensure referenced UD models are present in download home
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.TOKENIZER,
TokenizerModel.class);
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.SENTENCE_DETECTOR,
SentenceModel.class);
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.POS,
POSModel.class);
+
+ // ensure referenced classic model files are present in download home
+ for (String modelName: List.of("en-ner-organization", "en-ner-location",
"en-ner-person",
+ "en-ner-date", "en-ner-time", "en-ner-percentage", "en-ner-money",
+ "en-chunker", "en-parser-chunking")) {
+ downloadVersion15Model(modelName + BIN);
+ }
+ }
+
+ private static void downloadVersion15Model(String modelName) throws
IOException {
+ downloadModel(new URL(BASE_URL_MODELS_V15 + modelName));
+ }
+
+ private static void downloadModel(URL url) throws IOException {
+ if (!Files.isDirectory(OPENNLP_DIR)) {
+ OPENNLP_DIR.toFile().mkdir();
+ }
+ final String filename =
url.toString().substring(url.toString().lastIndexOf("/") + 1);
+ final Path localFile = Paths.get(OPENNLP_DIR.toString(), filename);
+
+ if (!Files.exists(localFile)) {
+ logger.debug("Downloading model from {} to {}.", url, localFile);
+ try (final InputStream in = new BufferedInputStream(url.openStream())) {
+ Files.copy(in, localFile, StandardCopyOption.REPLACE_EXISTING);
+ }
+ logger.debug("Download complete.");
+ }
+ }
+
+ /**
+ * Prints all Annotations to a PrintStream.
+ *
+ * @param aCAS
+ * the CAS containing the FeatureStructures to print
+ * @param aOut
+ * the PrintStream to which output will be written
+ */
+ public static void printAnnotations(CAS aCAS, PrintStream aOut) {
+
+ // Version 3 using select with Stream support
+ aCAS.select(Annotation.class).forEach(fs -> printFS(fs, aCAS, 0, aOut));
+ }
+
+ /**
+ * Prints a FeatureStructure to a PrintStream.
+ *
+ * @param aFS
+ * the FeatureStructure to print
+ * @param aCAS
+ * the CAS containing the FeatureStructure
+ * @param aNestingLevel
+ * number of tabs to print before each line
+ * @param aOut
+ * the PrintStream to which output will be written
+ */
+ public static void printFS(FeatureStructure aFS, CAS aCAS, int
aNestingLevel, PrintStream aOut) {
+ Type stringType = aCAS.getTypeSystem().getType(CAS.TYPE_NAME_STRING);
+
+ printTabs(aNestingLevel, aOut);
+ aOut.println(aFS.getType().getName());
+
+ // if it's an annotation, print the first 64 chars of its covered text
+ if (aFS instanceof AnnotationFS annot) {
+ String coveredText = annot.getCoveredText();
+ printTabs(aNestingLevel + 1, aOut);
+ aOut.print("\"");
+ if (coveredText.length() <= 64) {
+ aOut.print(coveredText);
+ } else {
+ aOut.println(coveredText.substring(0, 64) + "...");
+ }
+ aOut.println("\"");
+ }
+
+ // print all features
+ List<Feature> aFeatures = aFS.getType().getFeatures();
+ for (Feature feat : aFeatures) {
+ printTabs(aNestingLevel + 1, aOut);
+ // print feature name
+ aOut.print(feat.getShortName());
+ aOut.print(" = ");
+ // prnt feature value (how we get this depends on feature's range type)
+ String rangeTypeName = feat.getRange().getName();
+ if (aCAS.getTypeSystem().subsumes(stringType, feat.getRange())) // must
check for subtypes of
+ // string
+ {
+ String str = aFS.getStringValue(feat);
+ if (str == null) {
+ aOut.println("null");
+ } else {
+ aOut.print("\"");
+ if (str.length() > 64) {
+ str = str.substring(0, 64) + "...";
+ }
+ aOut.print(str);
+ aOut.println("\"");
+ }
+ } else if (CAS.TYPE_NAME_INTEGER.equals(rangeTypeName)) {
+ aOut.println(aFS.getIntValue(feat));
+ } else if (CAS.TYPE_NAME_FLOAT.equals(rangeTypeName)) {
+ aOut.println(aFS.getFloatValue(feat));
+ } else if (CAS.TYPE_NAME_STRING_ARRAY.equals(rangeTypeName)) {
+ StringArrayFS arrayFS = (StringArrayFS) aFS.getFeatureValue(feat);
+ if (arrayFS == null) {
+ aOut.println("null");
+ } else {
+ String[] vals = arrayFS.toArray();
+ aOut.print("[");
+ for (int i = 0; i < vals.length - 1; i++) {
+ aOut.print(vals[i]);
+ aOut.print(',');
+ }
+ if (vals.length > 0) {
+ aOut.print(vals[vals.length - 1]);
+ }
+ aOut.println("]\"");
+ }
+ } else if (CAS.TYPE_NAME_INTEGER_ARRAY.equals(rangeTypeName)) {
+ IntArrayFS arrayFS = (IntArrayFS) aFS.getFeatureValue(feat);
+ if (arrayFS == null) {
+ aOut.println("null");
+ } else {
+ int[] vals = arrayFS.toArray();
+ aOut.print("[");
+ for (int i = 0; i < vals.length - 1; i++) {
+ aOut.print(vals[i]);
+ aOut.print(',');
+ }
+ if (vals.length > 0) {
+ aOut.print(vals[vals.length - 1]);
+ }
+ aOut.println("]\"");
+ }
+ } else if (CAS.TYPE_NAME_FLOAT_ARRAY.equals(rangeTypeName)) {
+ FloatArrayFS arrayFS = (FloatArrayFS) aFS.getFeatureValue(feat);
+ if (arrayFS == null) {
+ aOut.println("null");
+ } else {
+ float[] vals = arrayFS.toArray();
+ aOut.print("[");
+ for (int i = 0; i < vals.length - 1; i++) {
+ aOut.print(vals[i]);
+ aOut.print(',');
+ }
+ if (vals.length > 0) {
+ aOut.print(vals[vals.length - 1]);
+ }
+ aOut.println("]\"");
+ }
+ } else // non-primitive type
+ {
+ FeatureStructure val = aFS.getFeatureValue(feat);
+ if (val == null) {
+ aOut.println("null");
+ } else {
+ printFS(val, aCAS, aNestingLevel + 1, aOut);
+ }
+ }
+ }
+ }
+
+ /**
+ * Prints tabs to a PrintStream.
+ *
+ * @param aNumTabs
+ * number of tabs to print
+ * @param aOut
+ * the PrintStream to which output will be written
+ */
+ private static void printTabs(int aNumTabs, PrintStream aOut) {
+ for (int i = 0; i < aNumTabs; i++) {
+ aOut.print("\t");
+ }
+ }
+}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java
b/opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java
new file mode 100644
index 00000000..7d427c47
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima;
+
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public abstract class AbstractTest {
+
+ protected static final String FILE_URL = "fileUrl";
+
+ protected static final String TARGET_DIR;
+
+ protected static final String PATH_DESCRIPTORS;
+
+ static {
+ String targetDir;
+ String descriptorsDir;
+ try {
+ targetDir = Path.of(AbstractTest.class.getProtectionDomain().
+ getCodeSource().getLocation().toURI()).toString();
+ descriptorsDir = Paths.get(targetDir, "test-descriptors/").toString();
+ } catch (URISyntaxException e) {
+ throw new RuntimeException(e);
+ }
+ TARGET_DIR = targetDir;
+ PATH_DESCRIPTORS = descriptorsDir;
+ }
+
+ protected static final Path OPENNLP_DIR =
Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME",
+ System.getProperty("user.home"))).resolve(".opennlp");
+
+}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java
b/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java
new file mode 100644
index 00000000..709a7f81
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Paths;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.resource.ExternalResourceDescription;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.resource.metadata.ResourceManagerConfiguration;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+
+public abstract class AbstractUimaTest extends AbstractTest {
+
+ protected AnalysisEngine produceAE(String descName)
+ throws IOException, InvalidXMLException,
ResourceInitializationException {
+ File descFile = new File(PATH_DESCRIPTORS + "/" + descName);
+ XMLInputSource in = new XMLInputSource(descFile);
+ ResourceSpecifier specifier =
UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+ adaptModelURL(specifier);
+ return UIMAFramework.produceAnalysisEngine(specifier);
+ }
+
+ /*
+ * Dynamically resolves the model URL for the test environment
+ * and reconfigures the resource specification accordingly.
+ *
+ * Note:
+ * In the xml test-descriptors files only stub urls exist.
+ * Therefore, the actual 'url' has to be set at runtime
+ * and used to compose a valid 'file' URL for the resource
+ * specification object ('resourceSpec').
+ */
+ private void adaptModelURL(ResourceSpecifier specifier) {
+ ResourceManagerConfiguration config = (ResourceManagerConfiguration)
+ specifier.getAttributeValue("resourceManagerConfiguration");
+ ExternalResourceDescription[] resources = config.getExternalResources();
+ for (ExternalResourceDescription modelDesc : resources) {
+ ResourceSpecifier resourceSpec = modelDesc.getResourceSpecifier();
+ String genericValue =
resourceSpec.getAttributeValue(FILE_URL).toString();
+ String modelName = genericValue.split(":")[1]; // always right of
'file:' -> idx 1
+ try {
+ if ("dictionary.dic".equals(modelName)) {
+ URL fileURL = Paths.get(TARGET_DIR, modelName).toUri().toURL();
+ resourceSpec.setAttributeValue(FILE_URL, fileURL.toExternalForm());
+ } else {
+ URL modelURL = OPENNLP_DIR.resolve(modelName).toUri().toURL();
+ resourceSpec.setAttributeValue(FILE_URL, modelURL.toExternalForm());
+ }
+ } catch (MalformedURLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java
b/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java
deleted file mode 100644
index 44519d2c..00000000
--- a/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.uima;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.uima.UIMAFramework;
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.pear.util.FileUtil;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceSpecifier;
-import org.apache.uima.util.InvalidXMLException;
-import org.apache.uima.util.XMLInputSource;
-import org.junit.jupiter.api.Assertions;
-
-/**
- * Test for initialization of the opennlp.uima Annotators
- */
-public class AnnotatorsInitializationTest {
-
- private static final String PATHNAME =
"src/test/resources/test-descriptors/";
-
- // TODO: This test requires the SourceForge models, or other models to run,
- // but they are missing due to license issues since the project was migrated
to Apache
- //@Test
- public void testInitializationExecutionAndReconfigure() {
- File f = new File(PATHNAME);
- for (String descName : f.list(new FileUtil.ExtFilenameFilter("xml"))) {
- if (!descName.equals("TypeSystem.xml")) {
- try {
- AnalysisEngine ae = produceAE(descName);
- CAS cas = ae.newCAS();
- cas.setDocumentText("this is a dummy document text for
initialization and reconfiguration");
- ae.process(cas);
- ae.reconfigure();
- } catch (Exception e) {
- Assertions.fail(e.getLocalizedMessage() + " for desc " + descName);
- }
- }
- }
- }
-
- private AnalysisEngine produceAE(String descName)
- throws IOException, InvalidXMLException, ResourceInitializationException
{
- File descFile = new File(PATHNAME + descName);
- XMLInputSource in = new XMLInputSource(descFile);
- ResourceSpecifier specifier =
UIMAFramework.getXMLParser().parseResourceSpecifier(in);
- return UIMAFramework.produceAnalysisEngine(specifier);
- }
-}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java
b/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java
new file mode 100644
index 00000000..84e3e969
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.uima;
+
+import java.io.IOException;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * Test for initialization of the opennlp.uima Annotators classes.
+ */
+@EnabledWhenCDNAvailable(hostname = "opennlp.sourceforge.net")
+public class FullAnnotatorsFlowIT extends AbstractIT {
+
+ private static final String DOCUMENT_TEXT =
+ "This document was written by Martin for initialization and
reconfiguration." +
+ "The text was changed in February 2025 by Apache Software
Foundation." +
+ "It is at least 80% in line with the annotation guidelines.";
+
+ @ParameterizedTest
+ @ValueSource(strings = {"OpenNlpTextAnalyzer.xml"})
+ public void testInitializationExecutionAndReconfigure(String descName) {
+ AnalysisEngine ae = null;
+ try {
+ ae = produceAE(descName);
+ assertNotNull(ae);
+ CAS cas = ae.newCAS();
+ cas.setDocumentLanguage("en");
+ cas.setDocumentText(DOCUMENT_TEXT);
+ ae.process(cas);
+ ae.reconfigure();
+ } catch (IOException | InvalidXMLException |
AnalysisEngineProcessException |
+ ResourceConfigurationException | ResourceInitializationException
e) {
+ fail(e.getLocalizedMessage() + " for desc " + descName +
+ ", cause: " + e.getCause().getLocalizedMessage());
+ } finally {
+ if (ae != null) {
+ ae.destroy();
+ }
+ }
+ }
+}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java
b/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java
new file mode 100644
index 00000000..9abea968
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.uima;
+
+import java.io.IOException;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * Test for initialization of the opennlp.uima Annotators classes.
+ */
+@EnabledWhenCDNAvailable(hostname = "opennlp.sourceforge.net")
+public class SingleAnnotatorIT extends AbstractIT {
+
+ private static final String DOCUMENT_TEXT =
+ "This is a dummy document text for initialization and
reconfiguration.";
+
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "Chunker.xml", "DateNameFinder.xml", "DictionaryNameFinder.xml",
+ "LocationNameFinder.xml", "MoneyNameFinder.xml",
"OrganizationNameFinder.xml",
+ "Parser.xml", "PercentageNameFinder.xml", "PersonNameFinder.xml",
"PosTagger.xml",
+ "SentenceDetector.xml", "SimpleTokenizer.xml", "Tokenizer.xml",
"TimeNameFinder.xml",
+ "WhitespaceTokenizer.xml"
+ })
+ public void testInitializationExecutionAndReconfigure(String descName) {
+ AnalysisEngine ae = null;
+ try {
+ ae = produceAE(descName);
+ assertNotNull(ae);
+ CAS cas = ae.newCAS();
+ cas.setDocumentLanguage("en");
+ cas.setDocumentText(DOCUMENT_TEXT);
+ ae.process(cas);
+ ae.reconfigure();
+ /*
+ CasIterator casIterator = ae.processAndOutputNewCASes(cas);
+ while (casIterator.hasNext()) {
+ CAS outCas = casIterator.next();
+
+ //dump the document text and annotations for this segment
+ System.out.println("********* NEW SEGMENT *********");
+ System.out.println(outCas.getDocumentText());
+ printAnnotations(outCas, System.out);
+ //release the CAS (important)
+ outCas.release();
+ }
+ */
+ } catch (IOException | InvalidXMLException |
AnalysisEngineProcessException |
+ ResourceConfigurationException | ResourceInitializationException
e) {
+ fail(e.getLocalizedMessage() + " for desc " + descName +
+ ", cause: " + e.getCause().getLocalizedMessage());
+ } finally {
+ if (ae != null) {
+ ae.destroy();
+ }
+ }
+ }
+}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
b/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
index 11b0e980..9f1804b0 100644
---
a/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
+++
b/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
@@ -39,11 +39,10 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import opennlp.tools.util.StringList;
+import opennlp.uima.AbstractTest;
import opennlp.uima.util.CasUtil;
-public class DictionaryResourceTest {
-
- private static final String PATHNAME =
"src/test/resources/test-descriptors/";
+public class DictionaryResourceTest extends AbstractTest {
private static AnalysisEngine AE;
@@ -59,7 +58,7 @@ public class DictionaryResourceTest {
private static AnalysisEngine produceAE(String descName)
throws IOException, InvalidXMLException, ResourceInitializationException
{
- File descFile = new File(PATHNAME + descName);
+ File descFile = new File(PATH_DESCRIPTORS + "/" + descName);
XMLInputSource in = new XMLInputSource(descFile);
ResourceSpecifier specifier = UIMAFramework.getXMLParser()
.parseResourceSpecifier(in);
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java
b/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java
index 7ff18a7d..4491fa74 100644
--- a/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java
+++ b/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java
@@ -38,7 +38,6 @@ class NumberUtilTest {
Assertions.assertFalse(NumberUtil.isLanguageSupported(INVALID_LANGUAGE_CODE));
}
-
@Test
void parse_long() throws ParseException {
String numberStr = " 1 2 3 4 5 6 7 8 9 1 0 ";
@@ -59,8 +58,8 @@ class NumberUtilTest {
void parse_double_with_exception() throws ParseException {
String numberStr = " 12 3456.78 910 ";
Double doubleValue = 123456.78910;
- IllegalArgumentException thrown =
Assertions.assertThrows(IllegalArgumentException.class , () -> {
- Number result = NumberUtil.parse(numberStr , INVALID_LANGUAGE_CODE);
+ Assertions.assertThrows(IllegalArgumentException.class , () -> {
+ NumberUtil.parse(numberStr , INVALID_LANGUAGE_CODE);
} , "java.lang.IllegalArgumentException: Language INVALID is not
supported!");
}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java
b/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java
new file mode 100644
index 00000000..1c6a78fc
--- /dev/null
+++
b/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.normalizer;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.util.StringList;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class StringDictionaryTest {
+
+ // SUT
+ private StringDictionary dictionary;
+
+ @Test
+ void testInitEmptyDictionary() {
+ dictionary = new StringDictionary();
+ Iterator<StringList> it = dictionary.iterator();
+ assertFalse(it.hasNext());
+ }
+
+ @Test
+ void testPutAndGetEntry() {
+ // prepare
+ dictionary = new StringDictionary();
+ StringList sl = new StringList("foo", "bar");
+ // test
+ dictionary.put(sl, "foo bar");
+ Iterator<StringList> it = dictionary.iterator();
+ assertTrue(it.hasNext());
+ assertEquals("foo bar", dictionary.get(sl));
+ }
+
+ @Test
+ void testSerialization() throws IOException {
+ // prepare
+ dictionary = new StringDictionary();
+ StringList sl = new StringList("foo", "bar");
+ dictionary.put(sl, "foo bar");
+ byte[] serialized;
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+ dictionary.serialize(baos);
+ baos.flush();
+ serialized = baos.toByteArray();
+ }
+ try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized)) {
+ StringDictionary read = new StringDictionary(bais);
+ // test
+ Iterator<StringList> it = read.iterator();
+ assertTrue(it.hasNext());
+ assertEquals("foo bar", read.get(sl));
+ }
+
+ }
+}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
index e3204fe8..fb58e2a8 100644
---
a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
+++
b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java
@@ -58,11 +58,9 @@ public class AnnotationComboIteratorTest {
List<List<String>> tokensBySentence = new ArrayList<>();
for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
-
final List<String> tokens = new ArrayList<>();
- for (AnnotationFS tokenAnnotation : annotationIteratorPair
- .getSubIterator()) {
+ for (AnnotationFS tokenAnnotation :
annotationIteratorPair.getSubIterator()) {
tokens.add(tokenAnnotation.getCoveredText());
}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java
b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java
new file mode 100644
index 00000000..bcab5d01
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.IOException;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.DownloadUtil;
+import opennlp.uima.AbstractUimaTest;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.fail;
+
+@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+class AnnotationComparatorTest extends AbstractUimaTest {
+
+ private static final String DOCUMENT_TEXT =
+ "This is a dummy document text for initialization and
reconfiguration.";
+
+ private AnalysisEngine ae;
+
+ private Type type;
+
+ // SUT
+ private CAS cas;
+
+ @BeforeAll
+ public static void initEnv() throws IOException {
+ // ensure referenced UD models are present in download home
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.SENTENCE_DETECTOR,
SentenceModel.class);
+ }
+
+ @BeforeEach
+ public void setUp() {
+ String descName = "SentenceDetector.xml";
+ try {
+ ae = produceAE(descName);
+ assertNotNull(ae);
+ cas = ae.newCAS();
+ cas.setDocumentLanguage("en");
+ cas.setDocumentText(DOCUMENT_TEXT);
+ ae.process(cas);
+ // type that matches the descriptors topic: sentences
+ type = AnnotatorUtil.getType(cas.getTypeSystem(),
"opennlp.uima.Sentence");
+ } catch (IOException | InvalidXMLException |
ResourceInitializationException |
+ AnalysisEngineProcessException e) {
+ fail(e.getLocalizedMessage() + " for desc " + descName +
+ ", cause: " + e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @AfterEach
+ public void tearDown() {
+ if (ae != null) {
+ ae.destroy();
+ }
+ }
+
+ @Test
+ void testCompareEquality() {
+ // prepare
+ AnnotationFS fa1 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length());
+ AnnotationFS fa2 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length());
+ AnnotationComparator comparator = new AnnotationComparator();
+ // test
+ assertEquals(0, comparator.compare(fa1, fa2));
+ }
+
+ @Test
+ void testCompareDifference1() {
+ // prepare
+ AnnotationFS fa1 = cas.createAnnotation(type, 1, DOCUMENT_TEXT.length());
+ AnnotationFS fa2 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length());
+ AnnotationComparator comparator = new AnnotationComparator();
+ // test
+ assertEquals(1, comparator.compare(fa1, fa2));
+ }
+
+ @Test
+ void testCompareDifference2() {
+ // prepare
+ AnnotationFS fa1 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length());
+ AnnotationFS fa2 = cas.createAnnotation(type, 1, DOCUMENT_TEXT.length());
+ AnnotationComparator comparator = new AnnotationComparator();
+ // test
+ assertEquals(-1, comparator.compare(fa1, fa2));
+ }
+
+}
diff --git
a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java
b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java
new file mode 100644
index 00000000..0aeba0b6
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.DownloadUtil;
+import opennlp.uima.AbstractUimaTest;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.fail;
+
+@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+class AnnotatorUtilTest extends AbstractUimaTest {
+
+ private static final String DOCUMENT_TEXT =
+ "This is a dummy document text for initialization and
reconfiguration.";
+
+ private AnalysisEngine ae;
+
+ // SUT
+ private CAS cas;
+
+ @BeforeAll
+ public static void initEnv() throws IOException {
+ // ensure referenced UD models are present in download home
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.SENTENCE_DETECTOR,
SentenceModel.class);
+ }
+
+ @BeforeEach
+ public void setUp() {
+ String descName = "SentenceDetector.xml";
+ try {
+ ae = produceAE(descName);
+ assertNotNull(ae);
+ cas = ae.newCAS();
+ cas.setDocumentLanguage("en");
+ cas.setDocumentText(DOCUMENT_TEXT);
+ // note: no actual need to process the CAS here!
+
+ } catch (IOException | InvalidXMLException |
ResourceInitializationException e) {
+ fail(e.getLocalizedMessage() + " for desc " + descName +
+ ", cause: " + e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @AfterEach
+ public void tearDown() {
+ if (ae != null) {
+ ae.destroy();
+ }
+ }
+
+ @Test
+ void testGetType() {
+ try {
+ Type t = AnnotatorUtil.getType(cas.getTypeSystem(),
"opennlp.uima.Sentence");
+ assertNotNull(t);
+ } catch (AnalysisEngineProcessException e) {
+ fail(e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @Test
+ void testGetTypeWithInvalidTypeSystem() {
+ assertThrows(IllegalArgumentException.class, () ->
+ AnnotatorUtil.getType(null, "opennlp.uima.Sentence"));
+ }
+
+ @ParameterizedTest
+ @NullAndEmptySource
+ @ValueSource(strings = {" ", "\t", "\n"})
+ void testGetTypeWithEmptyTypeName(String typeName) {
+ assertThrows(OpenNlpAnnotatorProcessException.class, () ->
+ AnnotatorUtil.getType(cas.getTypeSystem(), typeName));
+ }
+
+ @Test
+ void testGetRequiredFeature() {
+ try {
+ final Type t =
AnnotatorUtil.getRequiredTypeParameter(ae.getUimaContext(),
+ cas.getTypeSystem(), UimaUtil.SENTENCE_TYPE_PARAMETER);
+ Feature f = AnnotatorUtil.getRequiredFeature(t, "sofa");
+ assertNotNull(f);
+ assertEquals("sofa", f.getShortName());
+ } catch (AnalysisEngineProcessException e) {
+ fail(e.getLocalizedMessage());
+ }
+ }
+
+ @Test
+ void testGetFeatureWithInvalidType() {
+ assertThrows(IllegalArgumentException.class, () ->
+ AnnotatorUtil.getRequiredFeature(null, "opennlp.uima.Sentence"));
+ }
+
+ @Test
+ void testGetRequiredFeatureWithInvalidFeatureName() throws
AnalysisEngineProcessException {
+ final Type t = AnnotatorUtil.getRequiredTypeParameter(ae.getUimaContext(),
+ cas.getTypeSystem(), UimaUtil.SENTENCE_TYPE_PARAMETER);
+ assertThrows(OpenNlpAnnotatorProcessException.class, () ->
+ AnnotatorUtil.getRequiredFeature(t, "xyz"));
+ }
+
+ @Test
+ void testGetOptionalFeatureParameter() {
+ UimaContext ctx = ae.getUimaContext();
+ try {
+ final Type t = AnnotatorUtil.getRequiredTypeParameter(ctx,
cas.getTypeSystem(),
+ UimaUtil.SENTENCE_TYPE_PARAMETER);
+ Feature f = AnnotatorUtil.getOptionalFeatureParameter(ctx, t,
+ UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
+ assertNotNull(f);
+ assertEquals("prob", f.getShortName());
+ } catch (AnalysisEngineProcessException e) {
+ fail(e.getLocalizedMessage());
+ }
+ }
+
+ @Test
+ void testGetOptionalFeatureParameterWithInvalidFeatureName() {
+ UimaContext ctx = ae.getUimaContext();
+ try {
+ final Type t = AnnotatorUtil.getRequiredTypeParameter(ctx,
cas.getTypeSystem(),
+ UimaUtil.SENTENCE_TYPE_PARAMETER);
+ Feature f = AnnotatorUtil.getOptionalFeatureParameter(ctx, t,
+ "xyz", CAS.TYPE_NAME_DOUBLE);
+ assertNull(f);
+ } catch (AnalysisEngineProcessException e) {
+ fail(e.getLocalizedMessage());
+ }
+ }
+
+ @Test
+ void testGetOptionalBooleanParameterWithMismatchingName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getOptionalBooleanParameter(
+ ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER));
+ }
+
+ @Test
+ void testGetOptionalFloatParameterWithMismatchingName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getRequiredFloatParameter(
+ ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER));
+ }
+
+ @Test
+ void testGetOptionalIntegerParameterWithMismatchingName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getOptionalIntegerParameter(
+ ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER));
+ }
+
+ @Test
+ void testGetOptionalStringArrayParameterWithMismatchingName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getOptionalStringArrayParameter(
+ ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER));
+ }
+
+ @Test
+ void testGetRequiredBooleanParameterWithInvalidName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getRequiredBooleanParameter(
+ ae.getUimaContext(), "xyz"));
+ }
+
+ @Test
+ void testGetRequiredFloatParameterWithInvalidName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getRequiredFloatParameter(
+ ae.getUimaContext(), "xyz"));
+ }
+
+ @Test
+ void testGetRequiredIntegerParameterWithInvalidName() {
+ assertThrows(ResourceInitializationException.class, () ->
+ AnnotatorUtil.getRequiredIntegerParameter(
+ ae.getUimaContext(), "xyz"));
+ }
+
+ /*
+ * This test won't pass as OpenNLP's resource-like classes do not implement:
+ * 'org.apache.uima.resource.DataSource', conflict:
ResourceManager_impl.class -> line 517
+ */
+ @Test
+ @Disabled
+ void testGetOptionalResourceAsStream() {
+ try (InputStream in = AnnotatorUtil.getOptionalResourceAsStream(
+ ae.getUimaContext(), "opennlp.uima.ModelName")) {
+ assertNotNull(in);
+ } catch (ResourceInitializationException | IOException e) {
+ fail(e.getLocalizedMessage());
+ }
+ }
+}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
b/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
index f909afc3..5e2d029b 100644
--- a/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
+++ b/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java
@@ -41,7 +41,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
-public class CasUtil {
+public final class CasUtil {
private static final Logger logger = LoggerFactory.getLogger(CasUtil.class);
diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java
b/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java
new file mode 100644
index 00000000..332784a1
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.DownloadUtil;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.uima.AbstractTest;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+public class OpennlpUtilTest extends AbstractTest {
+
+ @TempDir
+ private Path tmp;
+
+ private static SentenceModel sentModel;
+
+ @BeforeAll
+ public static void initEnv() throws IOException {
+ sentModel = DownloadUtil.downloadModel(
+ "en", DownloadUtil.ModelType.SENTENCE_DETECTOR,
SentenceModel.class);
+ }
+
+ @Test
+ void testSerialize() throws IOException {
+ // prepare
+ final File outModel = tmp.resolve("sent-detect-model.bin").toFile();
+ outModel.deleteOnExit();
+ assertFalse(outModel.exists());
+ // test
+ OpennlpUtil.serialize(sentModel, outModel);
+ assertTrue(outModel.exists());
+ }
+
+ @Test
+ void testSerializeInvalid1() {
+ final File outModel = tmp.resolve("sent-detect-model.bin").toFile();
+ outModel.deleteOnExit();
+ assertFalse(outModel.exists());
+ assertThrows(IllegalArgumentException.class, () ->
OpennlpUtil.serialize(null, outModel));
+ }
+
+ @Test
+ void testSerializeInvalid2() {
+ assertThrows(IllegalArgumentException.class, () ->
OpennlpUtil.serialize(sentModel, null));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin"})
+ void testLoadBytes(String file) {
+ try {
+ byte[] data = OpennlpUtil.loadBytes(OPENNLP_DIR.resolve(file).toFile());
+ assertNotNull(data);
+ assertTrue(data.length > 0);
+ } catch (IOException e) {
+ fail(e.getLocalizedMessage());
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(booleans = {true, false})
+ void testLoadTrainingParams(boolean seqTrainingAllowed) {
+ final String trainingParamsFile = "training-params-test.conf";
+ final String trainingParamsPath =
Path.of(TARGET_DIR).resolve(trainingParamsFile).
+ toAbsolutePath().toString();
+ try {
+ TrainingParameters params =
OpennlpUtil.loadTrainingParams(trainingParamsPath, seqTrainingAllowed);
+ assertNotNull(params);
+ assertEquals("MAXENT", params.getStringParameter("Algorithm", "?"));
+ assertEquals(150, params.getIntParameter("Iterations", 1));
+ assertEquals(5, params.getIntParameter("Cutoff", 1));
+ assertEquals(4, params.getIntParameter("Threads", 1));
+ } catch (ResourceInitializationException e) {
+ fail(e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @Test
+ void testLoadTrainingParamsWithInvalidFileContent() {
+ final String trainingParamsFile = "training-params-invalid.conf";
+ final String trainingParamsPath =
Path.of(TARGET_DIR).resolve(trainingParamsFile).
+ toAbsolutePath().toString();
+ assertThrows(ResourceInitializationException.class, () ->
+ OpennlpUtil.loadTrainingParams(trainingParamsPath, false));
+ }
+
+ @Test
+ void testLoadTrainingParamsNullYieldsDefaultParams() {
+ try {
+ TrainingParameters params = OpennlpUtil.loadTrainingParams(null, true);
+ assertNotNull(params);
+ assertEquals("MAXENT", params.getStringParameter("Algorithm", "?"));
+ assertEquals(100, params.getIntParameter("Iterations", 1));
+ assertEquals(5, params.getIntParameter("Cutoff", 1));
+ assertEquals(1, params.getIntParameter("Threads", 1));
+ } catch (ResourceInitializationException e) {
+ fail(e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {" ", "\t", "\n"})
+ void testLoadTrainingParamsInvalid(String fileName) {
+ assertThrows(ResourceInitializationException.class, () ->
+ OpennlpUtil.loadTrainingParams(fileName, false));
+ }
+
+}
diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java
b/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java
new file mode 100644
index 00000000..d101a76b
--- /dev/null
+++ b/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.util;
+
+import java.io.IOException;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import opennlp.tools.EnabledWhenCDNAvailable;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.DownloadUtil;
+import opennlp.uima.AbstractUimaTest;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.fail;
+
+@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+class UimaUtilTest extends AbstractUimaTest {
+
+ private static final String DOCUMENT_TEXT =
+ "This is a dummy document text for initialization and
reconfiguration.";
+
+ private AnalysisEngine ae;
+
+ private AnnotationFS featureAnnotation;
+ private Type type;
+
+ // SUT
+ private CAS cas;
+
+ @BeforeAll
+ public static void initEnv() throws IOException {
+ // ensure referenced UD models are present in download home
+ DownloadUtil.downloadModel("en", DownloadUtil.ModelType.SENTENCE_DETECTOR,
SentenceModel.class);
+ }
+
+ @BeforeEach
+ public void setUp() {
+ String descName = "SentenceDetector.xml";
+ try {
+ ae = produceAE(descName);
+ assertNotNull(ae);
+ cas = ae.newCAS();
+ cas.setDocumentLanguage("en");
+ cas.setDocumentText(DOCUMENT_TEXT);
+ ae.process(cas);
+ // type that matches the descriptors topic: sentences
+ type = AnnotatorUtil.getType(cas.getTypeSystem(),
"opennlp.uima.Sentence");
+ featureAnnotation = cas.createAnnotation(type, 0,
DOCUMENT_TEXT.length());
+ } catch (IOException | InvalidXMLException |
ResourceInitializationException |
+ AnalysisEngineProcessException e) {
+ fail(e.getLocalizedMessage() + " for desc " + descName +
+ ", cause: " + e.getCause().getLocalizedMessage());
+ }
+ }
+
+ @AfterEach
+ public void tearDown() {
+ if (ae != null) {
+ ae.destroy();
+ }
+ }
+
+ @Test
+ void testRemoveAnnotations() {
+ // prepare
+ AnnotationIndex<AnnotationFS> annotationIndex =
cas.getAnnotationIndex(type);
+ assertNotNull(annotationIndex);
+ assertEquals(1, annotationIndex.size());
+ // test
+ UimaUtil.removeAnnotations(cas, featureAnnotation, type);
+ annotationIndex = cas.getAnnotationIndex(type);
+ assertNotNull(annotationIndex);
+ assertEquals(0, annotationIndex.size());
+ }
+
+ @Test
+ void testRemoveAnnotationsNoAnnotationsInvalidCas() {
+ assertThrows(IllegalArgumentException.class, () ->
+ UimaUtil.removeAnnotations(null, featureAnnotation, type));
+ }
+
+ @Test
+ void testRemoveAnnotationsNoAnnotationsInvalidType() {
+ assertThrows(IllegalArgumentException.class, () ->
+ UimaUtil.removeAnnotations(cas, featureAnnotation, null));
+ }
+
+}
diff --git a/opennlp-uima/src/test/resources/simplelogger.properties
b/opennlp-uima/src/test/resources/simplelogger.properties
new file mode 100644
index 00000000..eea25a61
--- /dev/null
+++ b/opennlp-uima/src/test/resources/simplelogger.properties
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.slf4j.simpleLogger.defaultLogLevel=warn
diff --git a/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml
b/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml
index ecca12e4..4aede3d9 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml
@@ -25,7 +25,8 @@
<annotatorImplementationName>opennlp.uima.chunker.Chunker</annotatorImplementationName>
<analysisEngineMetaData>
<name>Chunker</name>
- <version>1.5.2-incubating</version>
+ <description/>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -131,7 +132,7 @@
<externalResource>
<name>ChunkerModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-chunker.bin</fileUrl>
+ <fileUrl>file:en-chunker.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.chunker.ChunkerModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml
index d6191608..164ac991 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Date Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -104,7 +104,7 @@
<externalResource>
<name>DateModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-date.bin</fileUrl>
+ <fileUrl>file:en-ner-date.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
index 97e19b24..a4c91067 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.DictionaryNameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Dictionary Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -108,7 +108,7 @@
<name>NameFinderDictionary</name>
<description/>
<fileResourceSpecifier>
- <fileUrl>file:src/test/resources/dictionary.dic</fileUrl>
+ <fileUrl>file:dictionary.dic</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.dictionary.DictionaryResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml
index 51f5079d..82385b6a 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Location Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -104,7 +104,7 @@
<externalResource>
<name>LocationModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-location.bin</fileUrl>
+ <fileUrl>file:en-ner-location.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml
index 8df918dc..fb76b23b 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Money Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -104,7 +104,7 @@
<externalResource>
<name>MoneyModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-money.bin</fileUrl>
+ <fileUrl>file:en-ner-money.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml
b/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml
new file mode 100644
index 00000000..9c38a0b4
--- /dev/null
+++ b/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml
@@ -0,0 +1,266 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="SentenceDetector">
+ <import location="SentenceDetector.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="Tokenizer">
+ <import location="Tokenizer.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="DateFinder">
+ <import location="DateNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="PersonFinder">
+ <import location="PersonNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="OrganizationFinder">
+ <import location="OrganizationNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="MoneyFinder">
+ <import location="MoneyNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="LocationFinder">
+ <import location="LocationNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="PercentageFinder">
+ <import location="PercentageNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="TimeFinder">
+ <import location="TimeNameFinder.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="PosTagger">
+ <import location="PosTagger.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="Chunker">
+ <import location="Chunker.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="Parser">
+ <import location="Parser.xml" />
+ </delegateAnalysisEngine>
+ </delegateAnalysisEngineSpecifiers>
+
+ <analysisEngineMetaData>
+ <name>OpenNlpTextAnalyzer</name>
+ <description />
+ <version>${project.version}</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters />
+ <configurationParameterSettings />
+ <flowConstraints>
+ <fixedFlow>
+ <node>SentenceDetector</node>
+ <node>Tokenizer</node>
+ <node>PersonFinder</node>
+ <node>OrganizationFinder</node>
+ <node>MoneyFinder</node>
+ <node>DateFinder</node>
+ <node>LocationFinder</node>
+ <node>PercentageFinder</node>
+ <node>TimeFinder</node>
+ <node>PosTagger</node>
+ <node>Chunker</node>
+ <node>Parser</node>
+ </fixedFlow>
+ </flowConstraints>
+ <capabilities>
+ <capability>
+ <inputs />
+ <outputs />
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+
+ <resourceManagerConfiguration>
+
+ <externalResources>
+ <externalResource>
+ <name>SentenceModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>TokenModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:opennlp-en-ud-ewt-tokens-1.2-2.5.0.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>PersonModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-ner-person.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>PercentageModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-ner-percentage.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>OrganizationModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-ner-organization.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>MoneyModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-ner-money.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>DateModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-ner-date.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>LocationModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-ner-location.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>TimeModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-ner-time.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>PosModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:opennlp-en-ud-ewt-pos-1.2-2.5.0.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.postag.POSModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>ChunkerModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-chunker.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.chunker.ChunkerModelResourceImpl</implementationName>
+ </externalResource>
+ <externalResource>
+ <name>ParserModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-parser-chunking.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.parser.ParserModelResourceImpl</implementationName>
+ </externalResource>
+ </externalResources>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+
<key>SentenceDetector/opennlp.uima.ModelName</key>
+ <resourceName>SentenceModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>Tokenizer/opennlp.uima.ModelName</key>
+ <resourceName>TokenModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>PersonFinder/opennlp.uima.ModelName</key>
+ <resourceName>PersonModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+
<key>PercentageFinder/opennlp.uima.ModelName</key>
+ <resourceName>PercentageModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+
<key>OrganizationFinder/opennlp.uima.ModelName</key>
+ <resourceName>OrganizationModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>MoneyFinder/opennlp.uima.ModelName</key>
+ <resourceName>MoneyModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>DateFinder/opennlp.uima.ModelName</key>
+ <resourceName>DateModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>LocationFinder/opennlp.uima.ModelName</key>
+ <resourceName>LocationModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>TimeFinder/opennlp.uima.ModelName</key>
+ <resourceName>TimeModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>PosTagger/opennlp.uima.ModelName</key>
+ <resourceName>PosModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>Chunker/opennlp.uima.ModelName</key>
+ <resourceName>ChunkerModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>Parser/opennlp.uima.ModelName</key>
+ <resourceName>ParserModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+
+ </resourceManagerConfiguration>
+</analysisEngineDescription>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml
index 15d9cb52..280e17f4 100644
---
a/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml
+++
b/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Organization Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -104,7 +104,7 @@
<externalResource>
<name>OrganizationModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-organization.bin</fileUrl>
+ <fileUrl>file:en-ner-organization.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
b/opennlp-uima/src/test/resources/test-descriptors/Parser.xml
similarity index 59%
copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
copy to opennlp-uima/src/test/resources/test-descriptors/Parser.xml
index c10dad06..36709ad3 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/Parser.xml
@@ -22,9 +22,10 @@
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
-
<annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName>
+
<annotatorImplementationName>opennlp.uima.parser.Parser</annotatorImplementationName>
<analysisEngineMetaData>
- <name>Tokenizer</name>
+ <name>Parser</name>
+ <description/>
<version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -41,14 +42,36 @@
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.ParseType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.TypeFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
<configurationParameter>
-
<name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name>
+ <name>opennlp.uima.ChildrenFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.ProbabilityFeature</name>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
+
<configurationParameterSettings>
<nameValuePair>
<name>opennlp.uima.TokenType</name>
@@ -59,7 +82,31 @@
<nameValuePair>
<name>opennlp.uima.SentenceType</name>
<value>
-
<string>uima.tcas.DocumentAnnotation</string>
+ <string>opennlp.uima.Sentence</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.ParseType</name>
+ <value>
+ <string>opennlp.uima.Parse</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.TypeFeature</name>
+ <value>
+ <string>parseType</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.ChildrenFeature</name>
+ <value>
+ <string>children</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <value>
+ <string>prob</string>
</value>
</nameValuePair>
</configurationParameterSettings>
@@ -85,28 +132,29 @@
</operationalProperties>
</analysisEngineMetaData>
+ <resourceManagerConfiguration>
+ <externalResources>
+ <externalResource>
+ <name>ParserModel</name>
+ <fileResourceSpecifier>
+
<fileUrl>file:en-parser-chunking.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.parser.ParserModelResourceImpl</implementationName>
+ </externalResource>
+ </externalResources>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>opennlp.uima.ModelName</key>
+ <resourceName>ParserModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
+
<externalResourceDependencies>
<externalResourceDependency>
<key>opennlp.uima.ModelName</key>
-
<interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName>
+
<interfaceName>opennlp.uima.parser.ParserModelResource</interfaceName>
</externalResourceDependency>
</externalResourceDependencies>
-
- <resourceManagerConfiguration>
- <externalResources>
- <externalResource>
- <name>TokenModel</name>
- <fileResourceSpecifier>
-
<fileUrl>file:test-models/en-token.bin</fileUrl>
- </fileResourceSpecifier>
-
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
- </externalResource>
- </externalResources>
- <externalResourceBindings>
- <externalResourceBinding>
- <key>opennlp.uima.ModelName</key>
- <resourceName>TokenModel</resourceName>
- </externalResourceBinding>
- </externalResourceBindings>
- </resourceManagerConfiguration>
</analysisEngineDescription>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml
index 695d58e2..34c481d9 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Percentage Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -104,7 +104,7 @@
<externalResource>
<name>PercentageModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-percentage.bin</fileUrl>
+ <fileUrl>file:en-ner-percentage.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml
index 250629e7..1f5d2d15 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Person Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -49,6 +49,13 @@
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
@@ -73,6 +80,13 @@
<string>opennlp.uima.Person</string>
</value>
</nameValuePair>
+
+ <nameValuePair>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <value>
+ <string>prob</string>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
@@ -104,7 +118,7 @@
<externalResource>
<name>PersonModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-person.bin</fileUrl>
+ <fileUrl>file:en-ner-person.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git a/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml
b/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml
index e3eba91f..fab54f87 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.postag.POSTagger</annotatorImplementationName>
<analysisEngineMetaData>
<name>POS Tagger</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -49,6 +49,13 @@
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
@@ -72,6 +79,13 @@
<string>pos</string>
</value>
</nameValuePair>
+
+ <nameValuePair>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <value>
+ <string>prob</string>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
@@ -103,7 +117,7 @@
<externalResource>
<name>PosModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-pos-maxent.bin</fileUrl>
+ <fileUrl>file:opennlp-en-ud-ewt-pos-1.2-2.5.0.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.postag.POSModelResourceImpl</implementationName>
</externalResource>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml
b/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml
index 2a020341..3b901664 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml
@@ -26,7 +26,7 @@
<annotatorImplementationName>opennlp.uima.sentdetect.SentenceDetector</annotatorImplementationName>
<analysisEngineMetaData>
<name>Sentence Detector</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
<configurationParameter>
@@ -41,8 +41,13 @@
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
+ <configurationParameter>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
</configurationParameters>
-
<configurationParameterSettings>
<nameValuePair>
@@ -51,6 +56,12 @@
<string>opennlp.uima.Sentence</string>
</value>
</nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <value>
+ <string>prob</string>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
@@ -82,7 +93,7 @@
<externalResource>
<name>SentenceModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-sent.bin</fileUrl>
+ <fileUrl>file:opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName>
</externalResource>
diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
b/opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml
similarity index 68%
copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
copy to opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml
index c10dad06..1279922a 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml
@@ -22,7 +22,7 @@
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
-
<annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName>
+
<annotatorImplementationName>opennlp.uima.tokenize.SimpleTokenizer</annotatorImplementationName>
<analysisEngineMetaData>
<name>Tokenizer</name>
<version>${project.version}</version>
@@ -41,13 +41,6 @@
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
-
- <configurationParameter>
-
<name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
@@ -84,29 +77,7 @@
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
</operationalProperties>
</analysisEngineMetaData>
-
- <externalResourceDependencies>
- <externalResourceDependency>
- <key>opennlp.uima.ModelName</key>
-
<interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName>
- </externalResourceDependency>
- </externalResourceDependencies>
- <resourceManagerConfiguration>
- <externalResources>
- <externalResource>
- <name>TokenModel</name>
- <fileResourceSpecifier>
-
<fileUrl>file:test-models/en-token.bin</fileUrl>
- </fileResourceSpecifier>
-
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
- </externalResource>
- </externalResources>
- <externalResourceBindings>
- <externalResourceBinding>
- <key>opennlp.uima.ModelName</key>
- <resourceName>TokenModel</resourceName>
- </externalResourceBinding>
- </externalResourceBindings>
- </resourceManagerConfiguration>
+ <resourceManagerConfiguration/>
+
</analysisEngineDescription>
diff --git
a/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml
b/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml
index 846b5a85..f447f078 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml
@@ -25,7 +25,7 @@
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
<analysisEngineMetaData>
<name>Time Name Finder</name>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<configurationParameters>
@@ -105,7 +105,7 @@
<externalResource>
<name>TimeModel</name>
<fileResourceSpecifier>
- <fileUrl>file:test-models/en-ner-time.bin</fileUrl>
+ <fileUrl>file:en-ner-time.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
</externalResource>
diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
b/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
index c10dad06..fd916542 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
@@ -48,6 +48,13 @@
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
@@ -62,6 +69,12 @@
<string>uima.tcas.DocumentAnnotation</string>
</value>
</nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.ProbabilityFeature</name>
+ <value>
+ <string>prob</string>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
@@ -97,7 +110,7 @@
<externalResource>
<name>TokenModel</name>
<fileResourceSpecifier>
-
<fileUrl>file:test-models/en-token.bin</fileUrl>
+
<fileUrl>file:opennlp-en-ud-ewt-tokens-1.2-2.5.0.bin</fileUrl>
</fileResourceSpecifier>
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
</externalResource>
diff --git a/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml
b/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml
index d1994e0d..1d76fd51 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml
@@ -20,19 +20,26 @@
-->
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
- <name>OpenNLP TypeSystem</name>
+ <name>Apache OpenNLP TypeSystem</name>
<description>
This is the default OpenNLP type system. All the sample
descriptors reference the types in this type system. To replace
it against
a custom type system change the mapping in the descriptors to
the
custom types and reference the custom type system.
</description>
- <version>1.5.2-incubating</version>
+ <version>${project.version}</version>
<vendor>Apache Software Foundation</vendor>
<types>
<typeDescription>
<name>opennlp.uima.Sentence</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>prob</name>
+ <description>Probability</description>
+
<rangeTypeName>uima.cas.Double</rangeTypeName>
+ </featureDescription>
+ </features>
</typeDescription>
<typeDescription>
@@ -45,6 +52,11 @@
<description>Part of
speech</description>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
+ <featureDescription>
+ <name>prob</name>
+ <description>Probability</description>
+
<rangeTypeName>uima.cas.Double</rangeTypeName>
+ </featureDescription>
</features>
</typeDescription>
@@ -63,6 +75,13 @@
<typeDescription>
<name>opennlp.uima.Person</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>prob</name>
+ <description>Probability</description>
+
<rangeTypeName>uima.cas.Double</rangeTypeName>
+ </featureDescription>
+ </features>
</typeDescription>
<typeDescription>
@@ -94,5 +113,27 @@
<name>opennlp.uima.Percentage</name>
<supertypeName>uima.tcas.Annotation</supertypeName>
</typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Parse</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>parseType</name>
+ <description>Type of the parse
node</description>
+
<rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>children</name>
+ <description>Leaf nodes</description>
+
<rangeTypeName>uima.cas.FSArray</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>prob</name>
+ <description>Leaf nodes</description>
+
<rangeTypeName>uima.cas.Double</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
</types>
</typeSystemDescription>
\ No newline at end of file
diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
b/opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml
similarity index 68%
copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
copy to opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml
index c10dad06..3ac16a8f 100644
--- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml
+++ b/opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml
@@ -22,7 +22,7 @@
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
-
<annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName>
+
<annotatorImplementationName>opennlp.uima.tokenize.WhitespaceTokenizer</annotatorImplementationName>
<analysisEngineMetaData>
<name>Tokenizer</name>
<version>${project.version}</version>
@@ -41,13 +41,6 @@
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
-
- <configurationParameter>
-
<name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
@@ -84,29 +77,7 @@
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
</operationalProperties>
</analysisEngineMetaData>
-
- <externalResourceDependencies>
- <externalResourceDependency>
- <key>opennlp.uima.ModelName</key>
-
<interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName>
- </externalResourceDependency>
- </externalResourceDependencies>
- <resourceManagerConfiguration>
- <externalResources>
- <externalResource>
- <name>TokenModel</name>
- <fileResourceSpecifier>
-
<fileUrl>file:test-models/en-token.bin</fileUrl>
- </fileResourceSpecifier>
-
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
- </externalResource>
- </externalResources>
- <externalResourceBindings>
- <externalResourceBinding>
- <key>opennlp.uima.ModelName</key>
- <resourceName>TokenModel</resourceName>
- </externalResourceBinding>
- </externalResourceBindings>
- </resourceManagerConfiguration>
+ <resourceManagerConfiguration/>
+
</analysisEngineDescription>
diff --git a/opennlp-uima/src/test/resources/training-params-invalid.conf
b/opennlp-uima/src/test/resources/training-params-invalid.conf
new file mode 100644
index 00000000..2faff3de
--- /dev/null
+++ b/opennlp-uima/src/test/resources/training-params-invalid.conf
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+Algorithm=XYZ
+Iterations=100
+Cutoff=5
+Threads=1
\ No newline at end of file
diff --git a/opennlp-uima/src/test/resources/training-params-test.conf
b/opennlp-uima/src/test/resources/training-params-test.conf
new file mode 100644
index 00000000..cac2f921
--- /dev/null
+++ b/opennlp-uima/src/test/resources/training-params-test.conf
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+Algorithm=MAXENT
+Iterations=150
+Cutoff=5
+Threads=4
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index f25a07ae..47a2e958 100644
--- a/pom.xml
+++ b/pom.xml
@@ -166,11 +166,18 @@
</dependencyManagement>
<properties>
- <!-- Build Properties -->
+ <!-- Build properties -->
<java.version>17</java.version>
+ <maven.version>3.3.9</maven.version>
<maven.compiler.release>${java.version}</maven.compiler.release>
<maven.compiler.target>${java.version}</maven.compiler.target>
- <maven.version>3.3.9</maven.version>
+
+ <!-- OpenNLP properties -->
+ <opennlp.download.home>${user.home}</opennlp.download.home>
+ <opennlp.forkCount>1.0C</opennlp.forkCount>
+ <opennlp.models.version>1.2.0</opennlp.models.version>
+
+ <!-- Dependency versions -->
<junit.version>5.11.4</junit.version>
<junit5-system-exit.version>2.0.2</junit5-system-exit.version>
<uimaj.version>3.6.0</uimaj.version>
@@ -179,12 +186,10 @@
<slf4j.version>2.0.16</slf4j.version>
<log4j2.version>2.24.3</log4j2.version>
<logcaptor.version>2.10.1</logcaptor.version>
- <jmh.version>1.37</jmh.version>
<classgraph.version>4.8.179</classgraph.version>
+ <jmh.version>1.37</jmh.version>
-
- <opennlp.models.version>1.2.0</opennlp.models.version>
- <opennlp.forkCount>1.0C</opennlp.forkCount>
+ <!-- Plugin versions -->
<coveralls.maven.plugin>4.3.0</coveralls.maven.plugin>
<jacoco.maven.plugin>0.8.12</jacoco.maven.plugin>
<maven.assembly.plugin>3.7.1</maven.assembly.plugin>
@@ -192,7 +197,6 @@
<maven.javadoc.plugin>3.11.2</maven.javadoc.plugin>
<forbiddenapis.plugin>3.8</forbiddenapis.plugin>
<license-maven-plugin.version>2.5.0</license-maven-plugin.version>
-
</properties>
<build>
@@ -251,9 +255,9 @@
<version>${jacoco.maven.plugin}</version>
<configuration>
<excludes>
- <exclude>**/stemmer/*</exclude>
+
<exclude>**/stemmer/*</exclude>
<exclude>**/stemmer/snowball/*</exclude>
- </excludes>
+ </excludes>
</configuration>
<executions>
<execution>
@@ -314,6 +318,7 @@
</execution>
</executions>
<configuration>
+
<argLine>-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine>
<excludes>
<exclude>**/*Test.java</exclude>
</excludes>
@@ -612,6 +617,13 @@
</build>
</profile>
+ <profile>
+ <id>ci</id>
+ <properties>
+
<opennlp.download.home>${project.build.directory}</opennlp.download.home>
+ </properties>
+ </profile>
+
<profile>
<id>eval-tests</id>
<properties>