This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-385-Add-unit-tests-for-OpenNLP-UIMA-component in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit a1cd7919bd9c836e0e5ff452484ededb233c6af8 Author: Martin Wiesner <[email protected]> AuthorDate: Sun Jan 26 11:13:12 2025 +0100 OPENNLP-385: Add unit tests for OpenNLP UIMA component - adapts and reactivates AnnotatorsInitializationTest by T. Teofili, originally provided in JIRA issue, to execute for all xml descriptors - converts AnnotatorsInitializationTest to several integration tests executed via Maven failsafe plugin - adds Parser.xml to 'test-descriptors' - adds more test classes - configures Maven resource filtering for xml 'test-descriptors' - moves 'ci' profile to upper-level pom.xml for re-use in opennlp-uima component - adds simplelogger.properties to test resources to avoid log spam during build caused by internal UIMA logger config --- opennlp-tools/pom.xml | 15 +- opennlp-uima/pom.xml | 21 +- .../main/java/opennlp/uima/chunker/Chunker.java | 7 +- .../uima/doccat/AbstractDocumentCategorizer.java | 5 +- .../opennlp/uima/doccat/DocumentCategorizer.java | 4 +- .../opennlp/uima/namefind/AbstractNameFinder.java | 14 +- .../uima/namefind/DictionaryNameFinder.java | 19 +- .../java/opennlp/uima/namefind/NameFinder.java | 9 +- .../java/opennlp/uima/normalizer/Normalizer.java | 13 +- .../opennlp/uima/normalizer/StringDictionary.java | 1 + .../src/main/java/opennlp/uima/parser/Parser.java | 1 + .../uima/sentdetect/AbstractSentenceDetector.java | 4 +- .../opennlp/uima/sentdetect/SentenceDetector.java | 6 +- .../uima/sentdetect/SentenceModelResourceImpl.java | 1 + .../opennlp/uima/tokenize/AbstractTokenizer.java | 8 +- .../opennlp/uima/tokenize/SimpleTokenizer.java | 5 + .../main/java/opennlp/uima/tokenize/Tokenizer.java | 3 + .../opennlp/uima/tokenize/WhitespaceTokenizer.java | 6 + .../opennlp/uima/util/AbstractModelResource.java | 6 +- .../opennlp/uima/util/AnnotationComparator.java | 4 +- .../main/java/opennlp/uima/util/AnnotatorUtil.java | 62 ++--- .../opennlp/uima/util/ContainingConstraint.java | 2 + .../main/java/opennlp/uima/util/OpennlpUtil.java | 30 ++- .../src/main/java/opennlp/uima/util/UimaUtil.java | 18 +- .../src/test/java/opennlp/uima/AbstractIT.java | 237 ++++++++++++++++++ .../java/opennlp/uima/AbstractTest.java} | 26 +- .../test/java/opennlp/uima/AbstractUimaTest.java | 77 ++++++ .../opennlp/uima/AnnotatorsInitializationTest.java | 66 ----- .../java/opennlp/uima/FullAnnotatorsFlowIT.java | 65 +++++ .../test/java/opennlp/uima/SingleAnnotatorIT.java | 82 +++++++ .../uima/dictionary/DictionaryResourceTest.java | 7 +- .../opennlp/uima/normalizer/NumberUtilTest.java | 5 +- .../uima/normalizer/StringDictionaryTest.java | 78 ++++++ .../uima/util/AnnotationComboIteratorTest.java | 4 +- .../uima/util/AnnotationComparatorTest.java | 107 +++++++++ .../java/opennlp/uima/util/AnnotatorUtilTest.java | 224 +++++++++++++++++ .../src/test/java/opennlp/uima/util/CasUtil.java | 2 +- .../java/opennlp/uima/util/OpennlpUtilTest.java | 142 +++++++++++ .../test/java/opennlp/uima/util/UimaUtilTest.java | 106 ++++++++ .../src/test/resources/simplelogger.properties | 19 ++ .../test/resources/test-descriptors/Chunker.xml | 5 +- .../resources/test-descriptors/DateNameFinder.xml | 4 +- .../test-descriptors/DictionaryNameFinder.xml | 4 +- .../test-descriptors/LocationNameFinder.xml | 4 +- .../resources/test-descriptors/MoneyNameFinder.xml | 4 +- .../test-descriptors/OpenNlpTextAnalyzer.xml | 266 +++++++++++++++++++++ .../test-descriptors/OrganizationNameFinder.xml | 4 +- .../test-descriptors/{Tokenizer.xml => Parser.xml} | 94 ++++++-- .../test-descriptors/PercentageNameFinder.xml | 4 +- .../test-descriptors/PersonNameFinder.xml | 18 +- .../test/resources/test-descriptors/PosTagger.xml | 18 +- .../test-descriptors/SentenceDetector.xml | 17 +- .../{Tokenizer.xml => SimpleTokenizer.xml} | 35 +-- .../resources/test-descriptors/TimeNameFinder.xml | 4 +- .../test/resources/test-descriptors/Tokenizer.xml | 15 +- .../test/resources/test-descriptors/TypeSystem.xml | 45 +++- .../{Tokenizer.xml => WhitespaceTokenizer.xml} | 35 +-- .../test/resources/training-params-invalid.conf | 22 ++ .../src/test/resources/training-params-test.conf | 22 ++ pom.xml | 30 ++- 60 files changed, 1825 insertions(+), 336 deletions(-) diff --git a/opennlp-tools/pom.xml b/opennlp-tools/pom.xml index 39a0ca9f..8023e79c 100644 --- a/opennlp-tools/pom.xml +++ b/opennlp-tools/pom.xml @@ -136,25 +136,12 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-failsafe-plugin</artifactId> - <version>${maven.failsafe.plugin}</version> - <configuration> - <argLine>-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine> - </configuration> </plugin> + </plugins> </build> - <properties> - <opennlp.download.home>${user.home}</opennlp.download.home> - </properties> - <profiles> - <profile> - <id>ci</id> - <properties> - <opennlp.download.home>${project.build.directory}</opennlp.download.home> - </properties> - </profile> <profile> <id>jmh</id> <dependencies> diff --git a/opennlp-uima/pom.xml b/opennlp-uima/pom.xml index 91627082..2571c457 100644 --- a/opennlp-uima/pom.xml +++ b/opennlp-uima/pom.xml @@ -79,6 +79,12 @@ <scope>test</scope> </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-params</artifactId> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-simple</artifactId> @@ -87,11 +93,22 @@ </dependencies> <build> + <testResources> + <testResource> + <directory>src/test/resources</directory> + <filtering>true</filtering> + </testResource> + </testResources> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-failsafe-plugin</artifactId> + </plugin> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> - <version>3.8.1</version> <executions> <execution> <id>copy-dependencies</id> @@ -100,7 +117,7 @@ <goal>copy-dependencies</goal> </goals> <configuration> - <excludeScope>provided</excludeScope> + <includeScope>runtime</includeScope> <stripVersion>true</stripVersion> </configuration> </execution> diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java index 213c5abf..d356f4fb 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java @@ -109,8 +109,8 @@ public final class Chunker extends CasAnnotator_ImplBase { Logger mLogger = context.getLogger(); - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker annotator."); + if (mLogger.isLoggable(Level.DEBUG)) { + mLogger.log(Level.DEBUG, "Initializing the OpenNLP Chunker annotator."); } ChunkerModel model; @@ -172,8 +172,7 @@ public final class Chunker extends CasAnnotator_ImplBase { String[] tokens = new String[tokenAnnotationIndex.size()]; String[] pos = new String[tokenAnnotationIndex.size()]; - AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex - .size()]; + AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex.size()]; int index = 0; diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java index 768738b3..4d374fb6 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java @@ -50,6 +50,7 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase { private Type mTokenType; + @Override public void initialize(UimaContext context) throws ResourceInitializationException { @@ -59,8 +60,8 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase { Logger mLogger = context.getLogger(); - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer."); + if (mLogger.isLoggable(Level.DEBUG)) { + mLogger.log(Level.DEBUG, "Initializing the OpenNLP Categorizer."); } DoccatModel model; diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java index 6a3ca174..1071c09a 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java @@ -36,7 +36,7 @@ public class DocumentCategorizer extends AbstractDocumentCategorizer { private Feature mCategoryFeature; - + @Override public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { @@ -55,7 +55,7 @@ public class DocumentCategorizer extends AbstractDocumentCategorizer { AnnotationFS categoryAnnotation; - if (categoryIndex.size() > 0) { + if (!categoryIndex.isEmpty()) { categoryAnnotation = categoryIndex.iterator().next(); } else { categoryAnnotation = tcas.createAnnotation(mCategoryType, 0, diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java index 579a0ca6..3d99d266 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java @@ -77,8 +77,8 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase { mLogger = context.getLogger(); - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the " + name + "."); + if (mLogger.isLoggable(Level.DEBUG)) { + mLogger.log(Level.DEBUG, "Initializing the " + name + "."); } isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter( @@ -133,21 +133,19 @@ abstract class AbstractNameFinder extends CasAnnotator_ImplBase { mNameTypeMapping = Collections.unmodifiableMap(nameTypeMap); } - if (mNameType == null && mNameTypeMapping.size() == 0) { + if (mNameType == null && mNameTypeMapping.isEmpty()) { throw new AnalysisEngineProcessException( new Exception("No name type or valid name type mapping configured!")); } } - protected void postProcessAnnotations(Span[] detectedNames, - AnnotationFS[] nameAnnotations) { - } + protected abstract void postProcessAnnotations(Span[] detectedNames, + AnnotationFS[] nameAnnotations); /** * Called if the current document is completely processed. */ - protected void documentDone(CAS cas) { - } + protected abstract void documentDone(CAS cas); protected abstract Span[] find(CAS cas, String[] tokens); diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java index b3196246..f1568609 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java @@ -18,9 +18,9 @@ package opennlp.uima.namefind; import java.io.IOException; -import java.io.InputStream; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.resource.ResourceAccessException; import org.apache.uima.resource.ResourceInitializationException; @@ -66,10 +66,8 @@ public class DictionaryNameFinder extends AbstractNameFinder { String modelName = AnnotatorUtil.getRequiredStringParameter(context, UimaUtil.DICTIONARY_PARAMETER); - InputStream inModel = AnnotatorUtil.getResourceAsStream(context, - modelName); - - nameFinderDictionary = new Dictionary(inModel); + nameFinderDictionary = new Dictionary( + AnnotatorUtil.getResourceAsStream(context, modelName)); } catch (IOException ie) { throw new ResourceInitializationException( @@ -77,13 +75,22 @@ public class DictionaryNameFinder extends AbstractNameFinder { ExceptionMessages.IO_ERROR_DICTIONARY_READING, new Object[] {ie.getMessage()}); } - } mNameFinder = new opennlp.tools.namefind.DictionaryNameFinder( nameFinderDictionary); } + @Override + protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[] nameAnnotations) { + // nothing to do + } + + @Override + protected void documentDone(CAS cas) { + // nothing to do + } + @Override protected Span[] find(CAS cas, String[] tokens) { return mNameFinder.find(tokens); diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java index bce0105d..6d6a9186 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java @@ -113,8 +113,7 @@ public final class NameFinder extends AbstractNameFinder { * Note: Do all initialization in this method, do not use the constructor. */ @Override - public void initialize() - throws ResourceInitializationException { + public void initialize() throws ResourceInitializationException { super.initialize(); @@ -136,13 +135,12 @@ public final class NameFinder extends AbstractNameFinder { * Initializes the type system. */ @Override - public void typeSystemInit(TypeSystem typeSystem) - throws AnalysisEngineProcessException { + public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { super.typeSystemInit(typeSystem); probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context, mNameType, - UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); + UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); documentConfidenceType = AnnotatorUtil.getOptionalTypeParameter(context, typeSystem, "opennlp.uima.DocumentConfidenceType"); @@ -160,7 +158,6 @@ public final class NameFinder extends AbstractNameFinder { Span[] names = mNameFinder.find(tokens); double[] probs = mNameFinder.probs(); - for (double prob : probs) { documentConfidence.add(prob); } diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java index f90e6fce..5acda276 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java @@ -43,7 +43,7 @@ import opennlp.uima.util.UimaUtil; /** * The Normalizer tries the structure annotations. The structured value - * is than assigned to a field of the annotation. + * is then assigned to a field of the annotation. * <p> * The process depends on the * <p> @@ -108,6 +108,7 @@ public class Normalizer extends CasAnnotator_ImplBase { * * @implNote Do all initialization in this method, do not use the constructor. */ + @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); @@ -116,8 +117,8 @@ public class Normalizer extends CasAnnotator_ImplBase { mLogger = context.getLogger(); - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Initializing the OpenNLP Normalizer annotator."); + if (mLogger.isLoggable(Level.DEBUG)) { + mLogger.log(Level.DEBUG, "Initializing the OpenNLP Normalizer annotator."); } try { @@ -141,6 +142,7 @@ public class Normalizer extends CasAnnotator_ImplBase { * Initializes the type system. * @param typeSystem type system to initialize */ + @Override public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { @@ -165,6 +167,7 @@ public class Normalizer extends CasAnnotator_ImplBase { } } + @Override public void process(CAS tcas) { FSIndex<AnnotationFS> sentenceIndex = tcas.getAnnotationIndex(mNameType); @@ -203,8 +206,8 @@ public class Normalizer extends CasAnnotator_ImplBase { try { number = NumberUtil.parse(text, language); } catch (ParseException e) { - if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, "Invalid number format: " + text); + if (mLogger.isLoggable(Level.WARN)) { + mLogger.log(Level.WARN, "Invalid number format: " + text); } continue; } diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java index d8a63f60..d930f821 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java +++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java @@ -43,6 +43,7 @@ public class StringDictionary { * @throws IOException Thrown if IO errors occurred. */ public StringDictionary(InputStream in) throws IOException { + this(); DictionaryEntryPersistor.create(in, entry -> { String valueString = entry.attributes().getValue("value"); put(entry.tokens(), valueString); diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java index ad6771f2..652cf249 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java +++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java @@ -145,6 +145,7 @@ public class Parser extends CasAnnotator_ImplBase { /** * Performs parsing on the given {@link CAS} object. */ + @Override public void process(CAS cas) { FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType); diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java index e959d4fc..6348a893 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java @@ -57,8 +57,8 @@ public abstract class AbstractSentenceDetector extends CasAnnotator_ImplBase { logger = context.getLogger(); - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, "Initializing the OpenNLP Sentence annotator."); + if (logger.isLoggable(Level.DEBUG)) { + logger.log(Level.DEBUG, "Initializing the OpenNLP Sentence annotator."); } isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter( diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java index acb5c6bb..59bc3e6a 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java @@ -76,6 +76,7 @@ public final class SentenceDetector extends AbstractSentenceDetector { * <p> * Note: Do all initialization in this method, do not use the constructor. */ + @Override public void initialize(UimaContext context) throws ResourceInitializationException { @@ -98,14 +99,14 @@ public final class SentenceDetector extends AbstractSentenceDetector { /** * Initializes the type system. */ + @Override public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { super.typeSystemInit(typeSystem); probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context, - sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER, - CAS.TYPE_NAME_DOUBLE); + sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); } @Override @@ -128,6 +129,7 @@ public final class SentenceDetector extends AbstractSentenceDetector { /** * Releases allocated resources. */ + @Override public void destroy() { // dereference model to allow garbage collection sentenceDetector = null; diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java index f41b7db7..4545f8ec 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java @@ -26,6 +26,7 @@ import opennlp.uima.util.AbstractModelResource; public class SentenceModelResourceImpl extends AbstractModelResource<SentenceModel> implements SentenceModelResource { + @Override public SentenceModel getModel() { return model; } diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java index b1f7abcb..3c658a0a 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java @@ -91,9 +91,8 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase { UimaUtil.TOKEN_TYPE_PARAMETER); } - protected void postProcessAnnotations(Span[] tokens, - AnnotationFS[] tokenAnnotations) { - } + protected abstract void postProcessAnnotations(Span[] tokens, + AnnotationFS[] tokenAnnotations); protected abstract Span[] tokenize(CAS cas, AnnotationFS sentence); @@ -116,8 +115,7 @@ public abstract class AbstractTokenizer extends CasAnnotator_ImplBase { AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenSpans.length]; for (int i = 0; i < tokenSpans.length; i++) { - tokenAnnotations[i] = cas - .createAnnotation(tokenType, + tokenAnnotations[i] = cas.createAnnotation(tokenType, sentenceOffset + tokenSpans[i].getStart(), sentenceOffset + tokenSpans[i].getEnd()); diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java index a62d531c..7991582f 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java @@ -57,4 +57,9 @@ public final class SimpleTokenizer extends AbstractTokenizer { protected Span[] tokenize(CAS cas, AnnotationFS sentence) { return tokenizer.tokenizePos(sentence.getCoveredText()); } + + @Override + protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[] nameAnnotations) { + // nothing to do + } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java index e2af1eb9..38d2b343 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java @@ -80,6 +80,7 @@ public final class Tokenizer extends AbstractTokenizer { * <p> * Note: Do all initialization in this method, do not use the constructor. */ + @Override public void initialize(UimaContext context) throws ResourceInitializationException { @@ -102,6 +103,7 @@ public final class Tokenizer extends AbstractTokenizer { /** * Initializes the type system. */ + @Override public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { @@ -135,6 +137,7 @@ public final class Tokenizer extends AbstractTokenizer { /** * Releases allocated resources. */ + @Override public void destroy() { // dereference model to allow garbage collection tokenizer = null; diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java index 0151b755..bc8c6527 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java @@ -52,4 +52,10 @@ public final class WhitespaceTokenizer extends AbstractTokenizer { return opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE. tokenizePos(sentence.getCoveredText()); } + + @Override + protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[] nameAnnotations) { + // nothing to do + } + } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java index 21405a7f..3124592a 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java @@ -42,10 +42,8 @@ public abstract class AbstractModelResource<T> implements SharedResourceObject { try { model = loadModel(resource.getInputStream()); } catch (IOException e) { - throw new ResourceInitializationException( - ExceptionMessages.MESSAGE_CATALOG, - ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] { - e.getMessage()}, e); + throw new ResourceInitializationException(ExceptionMessages.MESSAGE_CATALOG, + ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] {e.getMessage()}, e); } } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java index 59a2290d..30061d95 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java @@ -24,8 +24,7 @@ import org.apache.uima.cas.text.AnnotationFS; /** * Checks two {@link AnnotationFS annotations} for equality. */ -public class AnnotationComparator implements Comparator<AnnotationFS> -{ +public class AnnotationComparator implements Comparator<AnnotationFS> { /** * Compares the start indexes of the annotations. @@ -35,6 +34,7 @@ public class AnnotationComparator implements Comparator<AnnotationFS> * * @return 0 if equals, < 0 if before and > 0 if after */ + @Override public int compare(AnnotationFS a, AnnotationFS b) { return a.getBegin() - b.getBegin(); } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java index 17227270..d2685efe 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java @@ -33,7 +33,7 @@ import org.apache.uima.util.Logger; import opennlp.tools.dictionary.Dictionary; /** - * This is a utility class for Annotators. + * Provides utility methods for Annotators. */ public final class AnnotatorUtil { @@ -48,10 +48,14 @@ public final class AnnotatorUtil { * @param name The name of the type to retrieve. * * @return The {@link Type} for the {@code name}. + * @throws IllegalArgumentException Thrown if parameters were invalid. * @throws OpenNlpAnnotatorProcessException Thrown if no {@link Type} could be found. */ public static Type getType(TypeSystem typeSystem, String name) throws AnalysisEngineProcessException { + if (typeSystem == null) { + throw new IllegalArgumentException("Parameter 'typeSystem' must not be null"); + } Type type = typeSystem.getType(name); if (type == null) { @@ -82,7 +86,6 @@ public final class AnnotatorUtil { } } - /** * Retrieves a {@link Feature} for a specified type and {@code featureName}, * otherwise an exception is thrown. @@ -91,13 +94,16 @@ public final class AnnotatorUtil { * @param featureName The name of the feature to retrieve. * * @return The {@link Feature} if found. + * @throws IllegalArgumentException Thrown if parameters were invalid. * @throws OpenNlpAnnotatorProcessException Thrown if no {@link Feature} did match. */ public static Feature getRequiredFeature(Type type, String featureName) throws AnalysisEngineProcessException { + if (type == null) { + throw new IllegalArgumentException("Parameter 'type' must not be null"); + } Feature feature = type.getFeatureByBaseName(featureName); - if (feature == null) { throw new OpenNlpAnnotatorProcessException( ExceptionMessages.FEATURE_NOT_FOUND, new Object[] {type.getName(), featureName}); @@ -114,16 +120,13 @@ public final class AnnotatorUtil { * @param rangeType The expected range type. * * @return The {@link Feature} if found. + * @throws IllegalArgumentException Thrown if parameters were invalid. * @throws OpenNlpAnnotatorProcessException Thrown if no {@link Feature} did match. */ - public static Feature getRequiredFeature(Type type, String featureName, - String rangeType) + public static Feature getRequiredFeature(Type type, String featureName, String rangeType) throws AnalysisEngineProcessException { - Feature feature = getRequiredFeature(type, featureName); - checkFeatureType(feature, rangeType); - return feature; } @@ -142,13 +145,11 @@ public final class AnnotatorUtil { throws AnalysisEngineProcessException { String featureName; - try { featureName = getRequiredStringParameter(context, featureNameParameter); } catch (ResourceInitializationException e) { throw new OpenNlpAnnotatorProcessException(e); } - return getRequiredFeature(type, featureName); } @@ -174,7 +175,6 @@ public final class AnnotatorUtil { } catch (ResourceInitializationException e) { throw new OpenNlpAnnotatorProcessException(e); } - return getRequiredFeature(type, featureName, rangeTypeName); } @@ -193,13 +193,11 @@ public final class AnnotatorUtil { throws AnalysisEngineProcessException { String typeName; - try { typeName = getRequiredStringParameter(context, parameter); } catch (ResourceInitializationException e) { throw new OpenNlpAnnotatorProcessException(e); } - return getType(typeSystem, typeName); } @@ -216,9 +214,7 @@ public final class AnnotatorUtil { throws ResourceInitializationException { String value = getOptionalStringParameter(context, parameter); - checkForNull(value, parameter); - return value; } @@ -236,9 +232,7 @@ public final class AnnotatorUtil { throws ResourceInitializationException { Integer value = getOptionalIntegerParameter(context, parameter); - checkForNull(value, parameter); - return value; } @@ -256,9 +250,7 @@ public final class AnnotatorUtil { throws ResourceInitializationException { Float value = getOptionalFloatParameter(context, parameter); - checkForNull(value, parameter); - return value; } @@ -276,9 +268,7 @@ public final class AnnotatorUtil { throws ResourceInitializationException { Boolean value = getOptionalBooleanParameter(context, parameter); - checkForNull(value, parameter); - return value; } @@ -509,18 +499,15 @@ public final class AnnotatorUtil { * @param parameter The name of the parameter to retrieve. * * @return The {@link Object parameter} or {@code null} if not set. - * @throws ResourceInitializationException Thrown if the parameter type was not of the expected type. */ private static Object getOptionalParameter(UimaContext context, - String parameter) - throws ResourceInitializationException { + String parameter) { Object value = context.getConfigParameterValue(parameter); - Logger logger = context.getLogger(); - if (logger.isLoggable(Level.INFO)) { - logger.log(Level.INFO, parameter + " = " + + if (logger.isLoggable(Level.DEBUG)) { + logger.log(Level.DEBUG, parameter + " = " + (value != null ? value.toString() : "not set")); } @@ -557,8 +544,7 @@ public final class AnnotatorUtil { * @return A valid, open {@link InputStream}. * @throws ResourceInitializationException Thrown if the resource could not be found. */ - public static InputStream getOptionalResourceAsStream(UimaContext context, - String name) + public static InputStream getOptionalResourceAsStream(UimaContext context, String name) throws ResourceInitializationException { final InputStream inResource; @@ -581,30 +567,20 @@ public final class AnnotatorUtil { * @return A valid {@link Dictionary} or {@code null} if IO errors occurred. * @throws ResourceInitializationException Thrown if the resource could not be found. */ - public static Dictionary createOptionalDictionary(UimaContext context, - String dictionaryParameter) + public static Dictionary createOptionalDictionary(UimaContext context, String dictionaryParameter) throws ResourceInitializationException { - String dictionaryName = AnnotatorUtil.getOptionalStringParameter(context, - dictionaryParameter); + String dictionaryName = AnnotatorUtil.getOptionalStringParameter(context, dictionaryParameter); Dictionary dictionary = null; - if (dictionaryName != null) { - Logger logger = context.getLogger(); - - try (InputStream dictIn = AnnotatorUtil.getOptionalResourceAsStream(context, - dictionaryName)) { - + try (InputStream dictIn = AnnotatorUtil.getOptionalResourceAsStream(context, dictionaryName)) { if (dictIn == null) { - String message = "The dictionary file " + dictionaryName - + " does not exist!"; - + String message = "The dictionary file " + dictionaryName + " does not exist!"; if (logger.isLoggable(Level.WARNING)) { logger.log(Level.WARNING, message); } - return null; } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java index c7433b17..93cf7055 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java @@ -17,6 +17,7 @@ package opennlp.uima.util; +import java.io.Serial; import java.util.Collection; import java.util.LinkedList; @@ -28,6 +29,7 @@ import org.apache.uima.cas.text.AnnotationFS; * Checks if an {@link AnnotationFS} is contained by the given AnnotationFS. */ public final class ContainingConstraint implements FSMatchConstraint { + @Serial private static final long serialVersionUID = 8393109549729168545L; private final Collection<AnnotationFS> mContainingAnnotations = new LinkedList<>(); diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java index 30c039ec..43ae0590 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java @@ -17,6 +17,7 @@ package opennlp.uima.util; +import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; @@ -33,7 +34,7 @@ import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.BaseModel; /** - * This class contains util methods for the maxent library. + * Provides utility methods for OpenNLP's maxent library. */ final public class OpennlpUtil { @@ -49,8 +50,13 @@ final public class OpennlpUtil { * @param modelFile The {@link File} to serialize into. * @throws IOException Thrown if IO errors occurred. */ - public static void serialize(BaseModel model, File modelFile) - throws IOException { + public static void serialize(BaseModel model, File modelFile) throws IOException { + if (model == null) { + throw new IllegalArgumentException("Parameter 'model' must not be null"); + } + if (modelFile == null) { + throw new IllegalArgumentException("Parameter 'modelFile' must not be null"); + } try (OutputStream fileOut = new FileOutputStream(modelFile); OutputStream modelOut = new BufferedOutputStream(fileOut)) { model.serialize(modelOut); @@ -80,34 +86,36 @@ final public class OpennlpUtil { } /** - * Loads data from a given {@link File}. + * Loads data from the specified training parameters {@link File file}. * - * @param inFileValue The name of the {@link File} to read bytes from. + * @param trainingFilePath The path to the {@link File} to load the training parameters from. * @param isSequenceTrainingAllowed Whether the {@link TrainerFactory.TrainerType#SEQUENCE_TRAINER} * method is allowed or not. * @return The {@link TrainingParameters} that have been read. * - * @throws ResourceInitializationException Thrown if IO errors occurred or the {@code inFileValue} + * @throws ResourceInitializationException Thrown if IO errors occurred or the {@code trainingFilePath} * does not reference a valid training parameters file. */ - public static TrainingParameters loadTrainingParams(String inFileValue, + public static TrainingParameters loadTrainingParams(String trainingFilePath, boolean isSequenceTrainingAllowed) throws ResourceInitializationException { TrainingParameters params; - if (inFileValue != null) { - try (InputStream paramsIn = new FileInputStream(inFileValue)) { + if (trainingFilePath != null) { + try (InputStream paramsIn = new BufferedInputStream(new FileInputStream(trainingFilePath))) { params = new opennlp.tools.util.TrainingParameters(paramsIn); } catch (IOException e) { throw new ResourceInitializationException(e); } if (!TrainerFactory.isValid(params)) { - throw new ResourceInitializationException(new Exception("Training parameters file is invalid!")); + throw new ResourceInitializationException( + new RuntimeException("Training parameters file is invalid!")); } TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params); if (!isSequenceTrainingAllowed && TrainerFactory.TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { - throw new ResourceInitializationException(new Exception("Sequence training is not supported!")); + throw new ResourceInitializationException( + new RuntimeException("Sequence training is not supported!")); } } else { diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java index 4b2526eb..070d4c10 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java @@ -27,7 +27,7 @@ import org.apache.uima.cas.Type; import org.apache.uima.cas.text.AnnotationFS; /** - * This is a util class for uima operations. + * Defines constants and provides utility methods for uima operations. */ public final class UimaUtil { @@ -88,13 +88,21 @@ public final class UimaUtil { * * @param cas The {@link CAS} to use. * @param containerAnnotation The {@link AnnotationFS} of the container. - * @param removeAnnotationType The {@link Type type} to remove annotations for. + * @param type The {@link Type type} to remove annotations for. + * + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public static void removeAnnotations(CAS cas, - AnnotationFS containerAnnotation, Type removeAnnotationType) { + AnnotationFS containerAnnotation, Type type) { + + if (cas == null) { + throw new IllegalArgumentException("Parameter 'cas' must not be null"); + } + if (type == null) { + throw new IllegalArgumentException("Parameter 'type' must not be null"); + } - FSIndex<AnnotationFS> allRemoveAnnotations = cas - .getAnnotationIndex(removeAnnotationType); + FSIndex<AnnotationFS> allRemoveAnnotations = cas.getAnnotationIndex(type); ContainingConstraint containingConstraint = new ContainingConstraint( containerAnnotation); diff --git a/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java b/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java new file mode 100644 index 00000000..1295dead --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractIT.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.List; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Feature; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.FloatArrayFS; +import org.apache.uima.cas.IntArrayFS; +import org.apache.uima.cas.StringArrayFS; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.jcas.tcas.Annotation; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import opennlp.tools.postag.POSModel; +import opennlp.tools.sentdetect.SentenceModel; +import opennlp.tools.tokenize.TokenizerModel; +import opennlp.tools.util.DownloadUtil; + +abstract class AbstractIT extends AbstractUimaTest { + + private static final Logger logger = LoggerFactory.getLogger(AbstractIT.class); + + protected static final String BIN = ".bin"; + + private static final String BASE_URL_MODELS_V15 = "https://opennlp.sourceforge.net/models-1.5/"; + + @BeforeAll + public static void initEnv() throws IOException { + // ensure referenced UD models are present in download home + DownloadUtil.downloadModel("en", DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class); + DownloadUtil.downloadModel("en", DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class); + DownloadUtil.downloadModel("en", DownloadUtil.ModelType.POS, POSModel.class); + + // ensure referenced classic model files are present in download home + for (String modelName: List.of("en-ner-organization", "en-ner-location", "en-ner-person", + "en-ner-date", "en-ner-time", "en-ner-percentage", "en-ner-money", + "en-chunker", "en-parser-chunking")) { + downloadVersion15Model(modelName + BIN); + } + } + + private static void downloadVersion15Model(String modelName) throws IOException { + downloadModel(new URL(BASE_URL_MODELS_V15 + modelName)); + } + + private static void downloadModel(URL url) throws IOException { + if (!Files.isDirectory(OPENNLP_DIR)) { + OPENNLP_DIR.toFile().mkdir(); + } + final String filename = url.toString().substring(url.toString().lastIndexOf("/") + 1); + final Path localFile = Paths.get(OPENNLP_DIR.toString(), filename); + + if (!Files.exists(localFile)) { + logger.debug("Downloading model from {} to {}.", url, localFile); + try (final InputStream in = new BufferedInputStream(url.openStream())) { + Files.copy(in, localFile, StandardCopyOption.REPLACE_EXISTING); + } + logger.debug("Download complete."); + } + } + + /** + * Prints all Annotations to a PrintStream. + * + * @param aCAS + * the CAS containing the FeatureStructures to print + * @param aOut + * the PrintStream to which output will be written + */ + public static void printAnnotations(CAS aCAS, PrintStream aOut) { + + // Version 3 using select with Stream support + aCAS.select(Annotation.class).forEach(fs -> printFS(fs, aCAS, 0, aOut)); + } + + /** + * Prints a FeatureStructure to a PrintStream. + * + * @param aFS + * the FeatureStructure to print + * @param aCAS + * the CAS containing the FeatureStructure + * @param aNestingLevel + * number of tabs to print before each line + * @param aOut + * the PrintStream to which output will be written + */ + public static void printFS(FeatureStructure aFS, CAS aCAS, int aNestingLevel, PrintStream aOut) { + Type stringType = aCAS.getTypeSystem().getType(CAS.TYPE_NAME_STRING); + + printTabs(aNestingLevel, aOut); + aOut.println(aFS.getType().getName()); + + // if it's an annotation, print the first 64 chars of its covered text + if (aFS instanceof AnnotationFS annot) { + String coveredText = annot.getCoveredText(); + printTabs(aNestingLevel + 1, aOut); + aOut.print("\""); + if (coveredText.length() <= 64) { + aOut.print(coveredText); + } else { + aOut.println(coveredText.substring(0, 64) + "..."); + } + aOut.println("\""); + } + + // print all features + List<Feature> aFeatures = aFS.getType().getFeatures(); + for (Feature feat : aFeatures) { + printTabs(aNestingLevel + 1, aOut); + // print feature name + aOut.print(feat.getShortName()); + aOut.print(" = "); + // prnt feature value (how we get this depends on feature's range type) + String rangeTypeName = feat.getRange().getName(); + if (aCAS.getTypeSystem().subsumes(stringType, feat.getRange())) // must check for subtypes of + // string + { + String str = aFS.getStringValue(feat); + if (str == null) { + aOut.println("null"); + } else { + aOut.print("\""); + if (str.length() > 64) { + str = str.substring(0, 64) + "..."; + } + aOut.print(str); + aOut.println("\""); + } + } else if (CAS.TYPE_NAME_INTEGER.equals(rangeTypeName)) { + aOut.println(aFS.getIntValue(feat)); + } else if (CAS.TYPE_NAME_FLOAT.equals(rangeTypeName)) { + aOut.println(aFS.getFloatValue(feat)); + } else if (CAS.TYPE_NAME_STRING_ARRAY.equals(rangeTypeName)) { + StringArrayFS arrayFS = (StringArrayFS) aFS.getFeatureValue(feat); + if (arrayFS == null) { + aOut.println("null"); + } else { + String[] vals = arrayFS.toArray(); + aOut.print("["); + for (int i = 0; i < vals.length - 1; i++) { + aOut.print(vals[i]); + aOut.print(','); + } + if (vals.length > 0) { + aOut.print(vals[vals.length - 1]); + } + aOut.println("]\""); + } + } else if (CAS.TYPE_NAME_INTEGER_ARRAY.equals(rangeTypeName)) { + IntArrayFS arrayFS = (IntArrayFS) aFS.getFeatureValue(feat); + if (arrayFS == null) { + aOut.println("null"); + } else { + int[] vals = arrayFS.toArray(); + aOut.print("["); + for (int i = 0; i < vals.length - 1; i++) { + aOut.print(vals[i]); + aOut.print(','); + } + if (vals.length > 0) { + aOut.print(vals[vals.length - 1]); + } + aOut.println("]\""); + } + } else if (CAS.TYPE_NAME_FLOAT_ARRAY.equals(rangeTypeName)) { + FloatArrayFS arrayFS = (FloatArrayFS) aFS.getFeatureValue(feat); + if (arrayFS == null) { + aOut.println("null"); + } else { + float[] vals = arrayFS.toArray(); + aOut.print("["); + for (int i = 0; i < vals.length - 1; i++) { + aOut.print(vals[i]); + aOut.print(','); + } + if (vals.length > 0) { + aOut.print(vals[vals.length - 1]); + } + aOut.println("]\""); + } + } else // non-primitive type + { + FeatureStructure val = aFS.getFeatureValue(feat); + if (val == null) { + aOut.println("null"); + } else { + printFS(val, aCAS, aNestingLevel + 1, aOut); + } + } + } + } + + /** + * Prints tabs to a PrintStream. + * + * @param aNumTabs + * number of tabs to print + * @param aOut + * the PrintStream to which output will be written + */ + private static void printTabs(int aNumTabs, PrintStream aOut) { + for (int i = 0; i < aNumTabs; i++) { + aOut.print("\t"); + } + } +} diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java b/opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java similarity index 57% copy from opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java copy to opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java index f41b7db7..1a115185 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java +++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractTest.java @@ -15,23 +15,21 @@ * limitations under the License. */ -package opennlp.uima.sentdetect; +package opennlp.uima; -import java.io.IOException; -import java.io.InputStream; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; -import opennlp.tools.sentdetect.SentenceModel; -import opennlp.uima.util.AbstractModelResource; +public abstract class AbstractTest { -public class SentenceModelResourceImpl extends AbstractModelResource<SentenceModel> - implements SentenceModelResource { + protected static final String FILE_URL = "fileUrl"; - public SentenceModel getModel() { - return model; - } + protected static final URL TARGET_DIR = AbstractTest.class.getProtectionDomain(). + getCodeSource().getLocation(); + protected static final String PATHNAME = Paths.get(TARGET_DIR.getPath(), "test-descriptors/").toString(); + + protected static final Path OPENNLP_DIR = Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME", + System.getProperty("user.home"))).resolve(".opennlp"); - @Override - protected SentenceModel loadModel(InputStream in) throws IOException { - return new SentenceModel(in); - } } diff --git a/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java b/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java new file mode 100644 index 00000000..50b55a05 --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/AbstractUimaTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Paths; + +import org.apache.uima.UIMAFramework; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.resource.ExternalResourceDescription; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.resource.ResourceSpecifier; +import org.apache.uima.resource.metadata.ResourceManagerConfiguration; +import org.apache.uima.util.InvalidXMLException; +import org.apache.uima.util.XMLInputSource; + +public abstract class AbstractUimaTest extends AbstractTest { + + protected AnalysisEngine produceAE(String descName) + throws IOException, InvalidXMLException, ResourceInitializationException { + File descFile = new File(PATHNAME + "/" + descName); + XMLInputSource in = new XMLInputSource(descFile); + ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); + adaptModelURL(specifier); + return UIMAFramework.produceAnalysisEngine(specifier); + } + + /* + * Dynamically resolves the model URL for the test environment + * and reconfigures the resource specification accordingly. + * + * Note: + * In the xml test-descriptors files only stub urls exist. + * Therefore, the actual 'url' has to be set at runtime + * and used to compose a valid 'file' URL for the resource + * specification object ('resourceSpec'). + */ + private void adaptModelURL(ResourceSpecifier specifier) { + ResourceManagerConfiguration config = (ResourceManagerConfiguration) + specifier.getAttributeValue("resourceManagerConfiguration"); + ExternalResourceDescription[] resources = config.getExternalResources(); + for (ExternalResourceDescription modelDesc : resources) { + ResourceSpecifier resourceSpec = modelDesc.getResourceSpecifier(); + String genericValue = resourceSpec.getAttributeValue(FILE_URL).toString(); + String modelName = genericValue.split(":")[1]; // always right of 'file:' -> idx 1 + try { + if ("dictionary.dic".equals(modelName)) { + URL fileURL = Paths.get(TARGET_DIR.getPath(), modelName).toUri().toURL(); + resourceSpec.setAttributeValue(FILE_URL, fileURL.toExternalForm()); + } else { + URL modelURL = OPENNLP_DIR.resolve(modelName).toUri().toURL(); + resourceSpec.setAttributeValue(FILE_URL, modelURL.toExternalForm()); + } + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java b/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java deleted file mode 100644 index 44519d2c..00000000 --- a/opennlp-uima/src/test/java/opennlp/uima/AnnotatorsInitializationTest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package opennlp.uima; - -import java.io.File; -import java.io.IOException; - -import org.apache.uima.UIMAFramework; -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.cas.CAS; -import org.apache.uima.pear.util.FileUtil; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceSpecifier; -import org.apache.uima.util.InvalidXMLException; -import org.apache.uima.util.XMLInputSource; -import org.junit.jupiter.api.Assertions; - -/** - * Test for initialization of the opennlp.uima Annotators - */ -public class AnnotatorsInitializationTest { - - private static final String PATHNAME = "src/test/resources/test-descriptors/"; - - // TODO: This test requires the SourceForge models, or other models to run, - // but they are missing due to license issues since the project was migrated to Apache - //@Test - public void testInitializationExecutionAndReconfigure() { - File f = new File(PATHNAME); - for (String descName : f.list(new FileUtil.ExtFilenameFilter("xml"))) { - if (!descName.equals("TypeSystem.xml")) { - try { - AnalysisEngine ae = produceAE(descName); - CAS cas = ae.newCAS(); - cas.setDocumentText("this is a dummy document text for initialization and reconfiguration"); - ae.process(cas); - ae.reconfigure(); - } catch (Exception e) { - Assertions.fail(e.getLocalizedMessage() + " for desc " + descName); - } - } - } - } - - private AnalysisEngine produceAE(String descName) - throws IOException, InvalidXMLException, ResourceInitializationException { - File descFile = new File(PATHNAME + descName); - XMLInputSource in = new XMLInputSource(descFile); - ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); - return UIMAFramework.produceAnalysisEngine(specifier); - } -} diff --git a/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java b/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java new file mode 100644 index 00000000..0b78813b --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/FullAnnotatorsFlowIT.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.uima; + +import java.io.IOException; + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.resource.ResourceConfigurationException; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.InvalidXMLException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Test for initialization of the opennlp.uima Annotators classes. + */ +public class FullAnnotatorsFlowIT extends AbstractIT { + + private static final String DOCUMENT_TEXT = + "This document was written by Martin for initialization and reconfiguration." + + "The text was changed in February 2025 by Apache Software Foundation." + + "It is at least 80% in line with the annotation guidelines."; + + @ParameterizedTest + @ValueSource(strings = {"OpenNlpTextAnalyzer.xml"}) + public void testInitializationExecutionAndReconfigure(String descName) { + AnalysisEngine ae = null; + try { + ae = produceAE(descName); + assertNotNull(ae); + CAS cas = ae.newCAS(); + cas.setDocumentLanguage("en"); + cas.setDocumentText(DOCUMENT_TEXT); + ae.process(cas); + ae.reconfigure(); + } catch (IOException | InvalidXMLException | AnalysisEngineProcessException | + ResourceConfigurationException | ResourceInitializationException e) { + fail(e.getLocalizedMessage() + " for desc " + descName + + ", cause: " + e.getCause().getLocalizedMessage()); + } finally { + if (ae != null) { + ae.destroy(); + } + } + } +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java b/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java new file mode 100644 index 00000000..635df38b --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/SingleAnnotatorIT.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.uima; + +import java.io.IOException; + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.resource.ResourceConfigurationException; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.InvalidXMLException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Test for initialization of the opennlp.uima Annotators classes. + */ +public class SingleAnnotatorIT extends AbstractIT { + + private static final String DOCUMENT_TEXT = + "This is a dummy document text for initialization and reconfiguration."; + + @ParameterizedTest + @ValueSource(strings = { + "Chunker.xml", "DateNameFinder.xml", "DictionaryNameFinder.xml", + "LocationNameFinder.xml", "MoneyNameFinder.xml", "OrganizationNameFinder.xml", + "Parser.xml", "PercentageNameFinder.xml", "PersonNameFinder.xml", "PosTagger.xml", + "SentenceDetector.xml", "SimpleTokenizer.xml", "Tokenizer.xml", "TimeNameFinder.xml", + "WhitespaceTokenizer.xml" + }) + public void testInitializationExecutionAndReconfigure(String descName) { + AnalysisEngine ae = null; + try { + ae = produceAE(descName); + assertNotNull(ae); + CAS cas = ae.newCAS(); + cas.setDocumentLanguage("en"); + cas.setDocumentText(DOCUMENT_TEXT); + ae.process(cas); + ae.reconfigure(); + /* + CasIterator casIterator = ae.processAndOutputNewCASes(cas); + while (casIterator.hasNext()) { + CAS outCas = casIterator.next(); + + //dump the document text and annotations for this segment + System.out.println("********* NEW SEGMENT *********"); + System.out.println(outCas.getDocumentText()); + printAnnotations(outCas, System.out); + //release the CAS (important) + outCas.release(); + } + */ + } catch (IOException | InvalidXMLException | AnalysisEngineProcessException | + ResourceConfigurationException | ResourceInitializationException e) { + fail(e.getLocalizedMessage() + " for desc " + descName + + ", cause: " + e.getCause().getLocalizedMessage()); + } finally { + if (ae != null) { + ae.destroy(); + } + } + } +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java b/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java index 11b0e980..c5419221 100644 --- a/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java +++ b/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java @@ -39,11 +39,10 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import opennlp.tools.util.StringList; +import opennlp.uima.AbstractTest; import opennlp.uima.util.CasUtil; -public class DictionaryResourceTest { - - private static final String PATHNAME = "src/test/resources/test-descriptors/"; +public class DictionaryResourceTest extends AbstractTest { private static AnalysisEngine AE; @@ -59,7 +58,7 @@ public class DictionaryResourceTest { private static AnalysisEngine produceAE(String descName) throws IOException, InvalidXMLException, ResourceInitializationException { - File descFile = new File(PATHNAME + descName); + File descFile = new File(PATHNAME + "/" + descName); XMLInputSource in = new XMLInputSource(descFile); ResourceSpecifier specifier = UIMAFramework.getXMLParser() .parseResourceSpecifier(in); diff --git a/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java b/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java index 7ff18a7d..4491fa74 100644 --- a/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java +++ b/opennlp-uima/src/test/java/opennlp/uima/normalizer/NumberUtilTest.java @@ -38,7 +38,6 @@ class NumberUtilTest { Assertions.assertFalse(NumberUtil.isLanguageSupported(INVALID_LANGUAGE_CODE)); } - @Test void parse_long() throws ParseException { String numberStr = " 1 2 3 4 5 6 7 8 9 1 0 "; @@ -59,8 +58,8 @@ class NumberUtilTest { void parse_double_with_exception() throws ParseException { String numberStr = " 12 3456.78 910 "; Double doubleValue = 123456.78910; - IllegalArgumentException thrown = Assertions.assertThrows(IllegalArgumentException.class , () -> { - Number result = NumberUtil.parse(numberStr , INVALID_LANGUAGE_CODE); + Assertions.assertThrows(IllegalArgumentException.class , () -> { + NumberUtil.parse(numberStr , INVALID_LANGUAGE_CODE); } , "java.lang.IllegalArgumentException: Language INVALID is not supported!"); } diff --git a/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java b/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java new file mode 100644 index 00000000..1c6a78fc --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/normalizer/StringDictionaryTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima.normalizer; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Iterator; + +import org.junit.jupiter.api.Test; + +import opennlp.tools.util.StringList; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class StringDictionaryTest { + + // SUT + private StringDictionary dictionary; + + @Test + void testInitEmptyDictionary() { + dictionary = new StringDictionary(); + Iterator<StringList> it = dictionary.iterator(); + assertFalse(it.hasNext()); + } + + @Test + void testPutAndGetEntry() { + // prepare + dictionary = new StringDictionary(); + StringList sl = new StringList("foo", "bar"); + // test + dictionary.put(sl, "foo bar"); + Iterator<StringList> it = dictionary.iterator(); + assertTrue(it.hasNext()); + assertEquals("foo bar", dictionary.get(sl)); + } + + @Test + void testSerialization() throws IOException { + // prepare + dictionary = new StringDictionary(); + StringList sl = new StringList("foo", "bar"); + dictionary.put(sl, "foo bar"); + byte[] serialized; + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + dictionary.serialize(baos); + baos.flush(); + serialized = baos.toByteArray(); + } + try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized)) { + StringDictionary read = new StringDictionary(bais); + // test + Iterator<StringList> it = read.iterator(); + assertTrue(it.hasNext()); + assertEquals("foo bar", read.get(sl)); + } + + } +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java index e3204fe8..fb58e2a8 100644 --- a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java +++ b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComboIteratorTest.java @@ -58,11 +58,9 @@ public class AnnotationComboIteratorTest { List<List<String>> tokensBySentence = new ArrayList<>(); for (AnnotationIteratorPair annotationIteratorPair : comboIterator) { - final List<String> tokens = new ArrayList<>(); - for (AnnotationFS tokenAnnotation : annotationIteratorPair - .getSubIterator()) { + for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) { tokens.add(tokenAnnotation.getCoveredText()); } diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java new file mode 100644 index 00000000..fe1a7589 --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotationComparatorTest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima.util; + +import java.io.IOException; + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.InvalidXMLException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import opennlp.uima.AbstractUimaTest; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.fail; + +class AnnotationComparatorTest extends AbstractUimaTest { + + private static final String DOCUMENT_TEXT = + "This is a dummy document text for initialization and reconfiguration."; + + private AnalysisEngine ae; + + private Type type; + + // SUT + private CAS cas; + + @BeforeEach + public void setUp() { + String descName = "SentenceDetector.xml"; + try { + ae = produceAE(descName); + assertNotNull(ae); + cas = ae.newCAS(); + cas.setDocumentLanguage("en"); + cas.setDocumentText(DOCUMENT_TEXT); + ae.process(cas); + // type that matches the descriptors topic: sentences + type = AnnotatorUtil.getType(cas.getTypeSystem(), "opennlp.uima.Sentence"); + } catch (IOException | InvalidXMLException | ResourceInitializationException | + AnalysisEngineProcessException e) { + fail(e.getLocalizedMessage() + " for desc " + descName + + ", cause: " + e.getCause().getLocalizedMessage()); + } + } + + @AfterEach + public void tearDown() { + if (ae != null) { + ae.destroy(); + } + } + + @Test + void testCompareEquality() { + // prepare + AnnotationFS fa1 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length()); + AnnotationFS fa2 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length()); + AnnotationComparator comparator = new AnnotationComparator(); + // test + assertEquals(0, comparator.compare(fa1, fa2)); + } + + @Test + void testCompareDifference1() { + // prepare + AnnotationFS fa1 = cas.createAnnotation(type, 1, DOCUMENT_TEXT.length()); + AnnotationFS fa2 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length()); + AnnotationComparator comparator = new AnnotationComparator(); + // test + assertEquals(1, comparator.compare(fa1, fa2)); + } + + @Test + void testCompareDifference2() { + // prepare + AnnotationFS fa1 = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length()); + AnnotationFS fa2 = cas.createAnnotation(type, 1, DOCUMENT_TEXT.length()); + AnnotationComparator comparator = new AnnotationComparator(); + // test + assertEquals(-1, comparator.compare(fa1, fa2)); + } + +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java new file mode 100644 index 00000000..04ed13b2 --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/util/AnnotatorUtilTest.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima.util; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Feature; +import org.apache.uima.cas.Type; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.InvalidXMLException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.NullAndEmptySource; +import org.junit.jupiter.params.provider.ValueSource; + +import opennlp.uima.AbstractUimaTest; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; + +class AnnotatorUtilTest extends AbstractUimaTest { + + private static final String DOCUMENT_TEXT = + "This is a dummy document text for initialization and reconfiguration."; + + private AnalysisEngine ae; + + // SUT + private CAS cas; + + @BeforeEach + public void setUp() { + String descName = "SentenceDetector.xml"; + try { + ae = produceAE(descName); + assertNotNull(ae); + cas = ae.newCAS(); + cas.setDocumentLanguage("en"); + cas.setDocumentText(DOCUMENT_TEXT); + // note: no actual need to process the CAS here! + + } catch (IOException | InvalidXMLException | ResourceInitializationException e) { + fail(e.getLocalizedMessage() + " for desc " + descName + + ", cause: " + e.getCause().getLocalizedMessage()); + } + } + + @AfterEach + public void tearDown() { + if (ae != null) { + ae.destroy(); + } + } + + @Test + void testGetType() { + try { + Type t = AnnotatorUtil.getType(cas.getTypeSystem(), "opennlp.uima.Sentence"); + assertNotNull(t); + } catch (AnalysisEngineProcessException e) { + fail(e.getCause().getLocalizedMessage()); + } + } + + @Test + void testGetTypeWithInvalidTypeSystem() { + assertThrows(IllegalArgumentException.class, () -> + AnnotatorUtil.getType(null, "opennlp.uima.Sentence")); + } + + @ParameterizedTest + @NullAndEmptySource + @ValueSource(strings = {" ", "\t", "\n"}) + void testGetTypeWithEmptyTypeName(String typeName) { + assertThrows(OpenNlpAnnotatorProcessException.class, () -> + AnnotatorUtil.getType(cas.getTypeSystem(), typeName)); + } + + @Test + void testGetRequiredFeature() { + try { + final Type t = AnnotatorUtil.getRequiredTypeParameter(ae.getUimaContext(), + cas.getTypeSystem(), UimaUtil.SENTENCE_TYPE_PARAMETER); + Feature f = AnnotatorUtil.getRequiredFeature(t, "sofa"); + assertNotNull(f); + assertEquals("sofa", f.getShortName()); + } catch (AnalysisEngineProcessException e) { + fail(e.getLocalizedMessage()); + } + } + + @Test + void testGetFeatureWithInvalidType() { + assertThrows(IllegalArgumentException.class, () -> + AnnotatorUtil.getRequiredFeature(null, "opennlp.uima.Sentence")); + } + + @Test + void testGetRequiredFeatureWithInvalidFeatureName() throws AnalysisEngineProcessException { + final Type t = AnnotatorUtil.getRequiredTypeParameter(ae.getUimaContext(), + cas.getTypeSystem(), UimaUtil.SENTENCE_TYPE_PARAMETER); + assertThrows(OpenNlpAnnotatorProcessException.class, () -> + AnnotatorUtil.getRequiredFeature(t, "xyz")); + } + + @Test + void testGetOptionalFeatureParameter() { + UimaContext ctx = ae.getUimaContext(); + try { + final Type t = AnnotatorUtil.getRequiredTypeParameter(ctx, cas.getTypeSystem(), + UimaUtil.SENTENCE_TYPE_PARAMETER); + Feature f = AnnotatorUtil.getOptionalFeatureParameter(ctx, t, + UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); + assertNotNull(f); + assertEquals("prob", f.getShortName()); + } catch (AnalysisEngineProcessException e) { + fail(e.getLocalizedMessage()); + } + } + + @Test + void testGetOptionalFeatureParameterWithInvalidFeatureName() { + UimaContext ctx = ae.getUimaContext(); + try { + final Type t = AnnotatorUtil.getRequiredTypeParameter(ctx, cas.getTypeSystem(), + UimaUtil.SENTENCE_TYPE_PARAMETER); + Feature f = AnnotatorUtil.getOptionalFeatureParameter(ctx, t, + "xyz", CAS.TYPE_NAME_DOUBLE); + assertNull(f); + } catch (AnalysisEngineProcessException e) { + fail(e.getLocalizedMessage()); + } + } + + @Test + void testGetOptionalBooleanParameterWithMismatchingName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getOptionalBooleanParameter( + ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER)); + } + + @Test + void testGetOptionalFloatParameterWithMismatchingName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getRequiredFloatParameter( + ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER)); + } + + @Test + void testGetOptionalIntegerParameterWithMismatchingName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getOptionalIntegerParameter( + ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER)); + } + + @Test + void testGetOptionalStringArrayParameterWithMismatchingName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getOptionalStringArrayParameter( + ae.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER)); + } + + @Test + void testGetRequiredBooleanParameterWithInvalidName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getRequiredBooleanParameter( + ae.getUimaContext(), "xyz")); + } + + @Test + void testGetRequiredFloatParameterWithInvalidName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getRequiredFloatParameter( + ae.getUimaContext(), "xyz")); + } + + @Test + void testGetRequiredIntegerParameterWithInvalidName() { + assertThrows(ResourceInitializationException.class, () -> + AnnotatorUtil.getRequiredIntegerParameter( + ae.getUimaContext(), "xyz")); + } + + /* + * This test won't pass as OpenNLP's resource-like classes do not implement: + * 'org.apache.uima.resource.DataSource', conflict: ResourceManager_impl.class -> line 517 + */ + @Test + @Disabled + void testGetOptionalResourceAsStream() { + try (InputStream in = AnnotatorUtil.getOptionalResourceAsStream( + ae.getUimaContext(), "opennlp.uima.ModelName")) { + assertNotNull(in); + } catch (ResourceInitializationException | IOException e) { + fail(e.getLocalizedMessage()); + } + } +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java b/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java index f909afc3..5e2d029b 100644 --- a/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java +++ b/opennlp-uima/src/test/java/opennlp/uima/util/CasUtil.java @@ -41,7 +41,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; -public class CasUtil { +public final class CasUtil { private static final Logger logger = LoggerFactory.getLogger(CasUtil.class); diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java b/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java new file mode 100644 index 00000000..27e1483e --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/util/OpennlpUtilTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +import org.apache.uima.resource.ResourceInitializationException; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EmptySource; +import org.junit.jupiter.params.provider.ValueSource; + +import opennlp.tools.sentdetect.SentenceModel; +import opennlp.tools.util.DownloadUtil; +import opennlp.tools.util.TrainingParameters; +import opennlp.uima.AbstractTest; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +public class OpennlpUtilTest extends AbstractTest { + + @TempDir + private Path tmp; + + private static SentenceModel sentModel; + + @BeforeAll + public static void initEnv() throws IOException { + sentModel = DownloadUtil.downloadModel( + "en", DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class); + } + + @Test + void testSerialize() throws IOException { + // prepare + final File outModel = tmp.resolve("sent-detect-model.bin").toFile(); + outModel.deleteOnExit(); + assertFalse(outModel.exists()); + // test + OpennlpUtil.serialize(sentModel, outModel); + assertTrue(outModel.exists()); + } + + @Test + void testSerializeInvalid1() { + final File outModel = tmp.resolve("sent-detect-model.bin").toFile(); + outModel.deleteOnExit(); + assertFalse(outModel.exists()); + assertThrows(IllegalArgumentException.class, () -> OpennlpUtil.serialize(null, outModel)); + } + + @Test + void testSerializeInvalid2() { + assertThrows(IllegalArgumentException.class, () -> OpennlpUtil.serialize(sentModel, null)); + } + + @ParameterizedTest + @ValueSource(strings = {"opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin"}) + void testLoadBytes(String file) { + try { + byte[] data = OpennlpUtil.loadBytes(OPENNLP_DIR.resolve(file).toFile()); + assertNotNull(data); + assertTrue(data.length > 0); + } catch (IOException e) { + fail(e.getLocalizedMessage()); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testLoadTrainingParams(boolean seqTrainingAllowed) { + final String trainingParamsFile = "training-params-test.conf"; + final String trainingParamsPath = Path.of(TARGET_DIR.getPath()). + resolve(trainingParamsFile).toAbsolutePath().toString(); + try { + TrainingParameters params = OpennlpUtil.loadTrainingParams(trainingParamsPath, seqTrainingAllowed); + assertNotNull(params); + assertEquals("MAXENT", params.getStringParameter("Algorithm", "?")); + assertEquals(150, params.getIntParameter("Iterations", 1)); + assertEquals(5, params.getIntParameter("Cutoff", 1)); + assertEquals(4, params.getIntParameter("Threads", 1)); + } catch (ResourceInitializationException e) { + fail(e.getCause().getLocalizedMessage()); + } + } + + @Test + void testLoadTrainingParamsWithInvalidFileContent() { + final String trainingParamsFile = "training-params-invalid.conf"; + final String trainingParamsPath = Path.of(TARGET_DIR.getPath()). + resolve(trainingParamsFile).toAbsolutePath().toString(); + assertThrows(ResourceInitializationException.class, () -> + OpennlpUtil.loadTrainingParams(trainingParamsPath, false)); + } + + @Test + void testLoadTrainingParamsNullYieldsDefaultParams() { + try { + TrainingParameters params = OpennlpUtil.loadTrainingParams(null, true); + assertNotNull(params); + assertEquals("MAXENT", params.getStringParameter("Algorithm", "?")); + assertEquals(100, params.getIntParameter("Iterations", 1)); + assertEquals(5, params.getIntParameter("Cutoff", 1)); + assertEquals(1, params.getIntParameter("Threads", 1)); + } catch (ResourceInitializationException e) { + fail(e.getCause().getLocalizedMessage()); + } + } + + @ParameterizedTest + @EmptySource + @ValueSource(strings = {" ", "\t", "\n"}) + void testLoadTrainingParamsInvalid(String fileName) { + assertThrows(ResourceInitializationException.class, () -> + OpennlpUtil.loadTrainingParams(fileName, false)); + } + +} diff --git a/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java b/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java new file mode 100644 index 00000000..212de28b --- /dev/null +++ b/opennlp-uima/src/test/java/opennlp/uima/util/UimaUtilTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.uima.util; + +import java.io.IOException; + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.InvalidXMLException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import opennlp.uima.AbstractUimaTest; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; + +class UimaUtilTest extends AbstractUimaTest { + + private static final String DOCUMENT_TEXT = + "This is a dummy document text for initialization and reconfiguration."; + + private AnalysisEngine ae; + + private AnnotationFS featureAnnotation; + private Type type; + + // SUT + private CAS cas; + + @BeforeEach + public void setUp() { + String descName = "SentenceDetector.xml"; + try { + ae = produceAE(descName); + assertNotNull(ae); + cas = ae.newCAS(); + cas.setDocumentLanguage("en"); + cas.setDocumentText(DOCUMENT_TEXT); + ae.process(cas); + // type that matches the descriptors topic: sentences + type = AnnotatorUtil.getType(cas.getTypeSystem(), "opennlp.uima.Sentence"); + featureAnnotation = cas.createAnnotation(type, 0, DOCUMENT_TEXT.length()); + } catch (IOException | InvalidXMLException | ResourceInitializationException | + AnalysisEngineProcessException e) { + fail(e.getLocalizedMessage() + " for desc " + descName + + ", cause: " + e.getCause().getLocalizedMessage()); + } + } + + @AfterEach + public void tearDown() { + if (ae != null) { + ae.destroy(); + } + } + + @Test + void testRemoveAnnotations() { + // prepare + AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex(type); + assertNotNull(annotationIndex); + assertEquals(1, annotationIndex.size()); + // test + UimaUtil.removeAnnotations(cas, featureAnnotation, type); + annotationIndex = cas.getAnnotationIndex(type); + assertNotNull(annotationIndex); + assertEquals(0, annotationIndex.size()); + } + + @Test + void testRemoveAnnotationsNoAnnotationsInvalidCas() { + assertThrows(IllegalArgumentException.class, () -> + UimaUtil.removeAnnotations(null, featureAnnotation, type)); + } + + @Test + void testRemoveAnnotationsNoAnnotationsInvalidType() { + assertThrows(IllegalArgumentException.class, () -> + UimaUtil.removeAnnotations(cas, featureAnnotation, null)); + } + +} diff --git a/opennlp-uima/src/test/resources/simplelogger.properties b/opennlp-uima/src/test/resources/simplelogger.properties new file mode 100644 index 00000000..eea25a61 --- /dev/null +++ b/opennlp-uima/src/test/resources/simplelogger.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.slf4j.simpleLogger.defaultLogLevel=warn diff --git a/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml b/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml index ecca12e4..4aede3d9 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/Chunker.xml @@ -25,7 +25,8 @@ <annotatorImplementationName>opennlp.uima.chunker.Chunker</annotatorImplementationName> <analysisEngineMetaData> <name>Chunker</name> - <version>1.5.2-incubating</version> + <description/> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -131,7 +132,7 @@ <externalResource> <name>ChunkerModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-chunker.bin</fileUrl> + <fileUrl>file:en-chunker.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.chunker.ChunkerModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml index d6191608..164ac991 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/DateNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Date Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -104,7 +104,7 @@ <externalResource> <name>DateModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-date.bin</fileUrl> + <fileUrl>file:en-ner-date.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml index 97e19b24..a4c91067 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.DictionaryNameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Dictionary Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -108,7 +108,7 @@ <name>NameFinderDictionary</name> <description/> <fileResourceSpecifier> - <fileUrl>file:src/test/resources/dictionary.dic</fileUrl> + <fileUrl>file:dictionary.dic</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.dictionary.DictionaryResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml index 51f5079d..82385b6a 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/LocationNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Location Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -104,7 +104,7 @@ <externalResource> <name>LocationModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-location.bin</fileUrl> + <fileUrl>file:en-ner-location.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml index 8df918dc..fb76b23b 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/MoneyNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Money Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -104,7 +104,7 @@ <externalResource> <name>MoneyModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-money.bin</fileUrl> + <fileUrl>file:en-ner-money.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml b/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml new file mode 100644 index 00000000..9c38a0b4 --- /dev/null +++ b/opennlp-uima/src/test/resources/test-descriptors/OpenNlpTextAnalyzer.xml @@ -0,0 +1,266 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> + <frameworkImplementation>org.apache.uima.java</frameworkImplementation> + <primitive>false</primitive> + + <delegateAnalysisEngineSpecifiers> + <delegateAnalysisEngine key="SentenceDetector"> + <import location="SentenceDetector.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="Tokenizer"> + <import location="Tokenizer.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="DateFinder"> + <import location="DateNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="PersonFinder"> + <import location="PersonNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="OrganizationFinder"> + <import location="OrganizationNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="MoneyFinder"> + <import location="MoneyNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="LocationFinder"> + <import location="LocationNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="PercentageFinder"> + <import location="PercentageNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="TimeFinder"> + <import location="TimeNameFinder.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="PosTagger"> + <import location="PosTagger.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="Chunker"> + <import location="Chunker.xml" /> + </delegateAnalysisEngine> + + <delegateAnalysisEngine key="Parser"> + <import location="Parser.xml" /> + </delegateAnalysisEngine> + </delegateAnalysisEngineSpecifiers> + + <analysisEngineMetaData> + <name>OpenNlpTextAnalyzer</name> + <description /> + <version>${project.version}</version> + <vendor>Apache Software Foundation</vendor> + <configurationParameters /> + <configurationParameterSettings /> + <flowConstraints> + <fixedFlow> + <node>SentenceDetector</node> + <node>Tokenizer</node> + <node>PersonFinder</node> + <node>OrganizationFinder</node> + <node>MoneyFinder</node> + <node>DateFinder</node> + <node>LocationFinder</node> + <node>PercentageFinder</node> + <node>TimeFinder</node> + <node>PosTagger</node> + <node>Chunker</node> + <node>Parser</node> + </fixedFlow> + </flowConstraints> + <capabilities> + <capability> + <inputs /> + <outputs /> + <languagesSupported> + <language>en</language> + </languagesSupported> + </capability> + </capabilities> + <operationalProperties> + <modifiesCas>true</modifiesCas> + <multipleDeploymentAllowed>false</multipleDeploymentAllowed> + <outputsNewCASes>false</outputsNewCASes> + </operationalProperties> + </analysisEngineMetaData> + + <resourceManagerConfiguration> + + <externalResources> + <externalResource> + <name>SentenceModel</name> + <fileResourceSpecifier> + <fileUrl>file:opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>TokenModel</name> + <fileResourceSpecifier> + <fileUrl>file:opennlp-en-ud-ewt-tokens-1.2-2.5.0.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>PersonModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-person.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>PercentageModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-percentage.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>OrganizationModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-organization.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>MoneyModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-money.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>DateModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-date.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>LocationModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-location.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>TimeModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-ner-time.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>PosModel</name> + <fileResourceSpecifier> + <fileUrl>file:opennlp-en-ud-ewt-pos-1.2-2.5.0.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.postag.POSModelResourceImpl</implementationName> + </externalResource> + + <externalResource> + <name>ChunkerModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-chunker.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.chunker.ChunkerModelResourceImpl</implementationName> + </externalResource> + <externalResource> + <name>ParserModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-parser-chunking.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.parser.ParserModelResourceImpl</implementationName> + </externalResource> + </externalResources> + + <externalResourceBindings> + <externalResourceBinding> + <key>SentenceDetector/opennlp.uima.ModelName</key> + <resourceName>SentenceModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>Tokenizer/opennlp.uima.ModelName</key> + <resourceName>TokenModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>PersonFinder/opennlp.uima.ModelName</key> + <resourceName>PersonModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>PercentageFinder/opennlp.uima.ModelName</key> + <resourceName>PercentageModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>OrganizationFinder/opennlp.uima.ModelName</key> + <resourceName>OrganizationModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>MoneyFinder/opennlp.uima.ModelName</key> + <resourceName>MoneyModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>DateFinder/opennlp.uima.ModelName</key> + <resourceName>DateModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>LocationFinder/opennlp.uima.ModelName</key> + <resourceName>LocationModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>TimeFinder/opennlp.uima.ModelName</key> + <resourceName>TimeModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>PosTagger/opennlp.uima.ModelName</key> + <resourceName>PosModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>Chunker/opennlp.uima.ModelName</key> + <resourceName>ChunkerModel</resourceName> + </externalResourceBinding> + <externalResourceBinding> + <key>Parser/opennlp.uima.ModelName</key> + <resourceName>ParserModel</resourceName> + </externalResourceBinding> + </externalResourceBindings> + + </resourceManagerConfiguration> +</analysisEngineDescription> diff --git a/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml index 15d9cb52..280e17f4 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/OrganizationNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Organization Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -104,7 +104,7 @@ <externalResource> <name>OrganizationModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-organization.bin</fileUrl> + <fileUrl>file:en-ner-organization.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml b/opennlp-uima/src/test/resources/test-descriptors/Parser.xml similarity index 59% copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml copy to opennlp-uima/src/test/resources/test-descriptors/Parser.xml index c10dad06..36709ad3 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/Parser.xml @@ -22,9 +22,10 @@ <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> - <annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName> + <annotatorImplementationName>opennlp.uima.parser.Parser</annotatorImplementationName> <analysisEngineMetaData> - <name>Tokenizer</name> + <name>Parser</name> + <description/> <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -41,14 +42,36 @@ <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.ParseType</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>true</mandatory> + </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.TypeFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>true</mandatory> + </configurationParameter> <configurationParameter> - <name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name> + <name>opennlp.uima.ChildrenFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>true</mandatory> + </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.ProbabilityFeature</name> <type>String</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> </configurationParameters> + <configurationParameterSettings> <nameValuePair> <name>opennlp.uima.TokenType</name> @@ -59,7 +82,31 @@ <nameValuePair> <name>opennlp.uima.SentenceType</name> <value> - <string>uima.tcas.DocumentAnnotation</string> + <string>opennlp.uima.Sentence</string> + </value> + </nameValuePair> + <nameValuePair> + <name>opennlp.uima.ParseType</name> + <value> + <string>opennlp.uima.Parse</string> + </value> + </nameValuePair> + <nameValuePair> + <name>opennlp.uima.TypeFeature</name> + <value> + <string>parseType</string> + </value> + </nameValuePair> + <nameValuePair> + <name>opennlp.uima.ChildrenFeature</name> + <value> + <string>children</string> + </value> + </nameValuePair> + <nameValuePair> + <name>opennlp.uima.ProbabilityFeature</name> + <value> + <string>prob</string> </value> </nameValuePair> </configurationParameterSettings> @@ -85,28 +132,29 @@ </operationalProperties> </analysisEngineMetaData> + <resourceManagerConfiguration> + <externalResources> + <externalResource> + <name>ParserModel</name> + <fileResourceSpecifier> + <fileUrl>file:en-parser-chunking.bin</fileUrl> + </fileResourceSpecifier> + <implementationName>opennlp.uima.parser.ParserModelResourceImpl</implementationName> + </externalResource> + </externalResources> + + <externalResourceBindings> + <externalResourceBinding> + <key>opennlp.uima.ModelName</key> + <resourceName>ParserModel</resourceName> + </externalResourceBinding> + </externalResourceBindings> + </resourceManagerConfiguration> + <externalResourceDependencies> <externalResourceDependency> <key>opennlp.uima.ModelName</key> - <interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName> + <interfaceName>opennlp.uima.parser.ParserModelResource</interfaceName> </externalResourceDependency> </externalResourceDependencies> - - <resourceManagerConfiguration> - <externalResources> - <externalResource> - <name>TokenModel</name> - <fileResourceSpecifier> - <fileUrl>file:test-models/en-token.bin</fileUrl> - </fileResourceSpecifier> - <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName> - </externalResource> - </externalResources> - <externalResourceBindings> - <externalResourceBinding> - <key>opennlp.uima.ModelName</key> - <resourceName>TokenModel</resourceName> - </externalResourceBinding> - </externalResourceBindings> - </resourceManagerConfiguration> </analysisEngineDescription> diff --git a/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml index 695d58e2..34c481d9 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/PercentageNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Percentage Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -104,7 +104,7 @@ <externalResource> <name>PercentageModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-percentage.bin</fileUrl> + <fileUrl>file:en-ner-percentage.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml index 250629e7..1f5d2d15 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/PersonNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Person Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -49,6 +49,13 @@ <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.ProbabilityFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>false</mandatory> + </configurationParameter> </configurationParameters> <configurationParameterSettings> @@ -73,6 +80,13 @@ <string>opennlp.uima.Person</string> </value> </nameValuePair> + + <nameValuePair> + <name>opennlp.uima.ProbabilityFeature</name> + <value> + <string>prob</string> + </value> + </nameValuePair> </configurationParameterSettings> <typeSystemDescription> @@ -104,7 +118,7 @@ <externalResource> <name>PersonModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-person.bin</fileUrl> + <fileUrl>file:en-ner-person.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml b/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml index e3eba91f..fab54f87 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/PosTagger.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.postag.POSTagger</annotatorImplementationName> <analysisEngineMetaData> <name>POS Tagger</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -49,6 +49,13 @@ <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.ProbabilityFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>false</mandatory> + </configurationParameter> </configurationParameters> <configurationParameterSettings> @@ -72,6 +79,13 @@ <string>pos</string> </value> </nameValuePair> + + <nameValuePair> + <name>opennlp.uima.ProbabilityFeature</name> + <value> + <string>prob</string> + </value> + </nameValuePair> </configurationParameterSettings> <typeSystemDescription> @@ -103,7 +117,7 @@ <externalResource> <name>PosModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-pos-maxent.bin</fileUrl> + <fileUrl>file:opennlp-en-ud-ewt-pos-1.2-2.5.0.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.postag.POSModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml b/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml index 2a020341..3b901664 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/SentenceDetector.xml @@ -26,7 +26,7 @@ <annotatorImplementationName>opennlp.uima.sentdetect.SentenceDetector</annotatorImplementationName> <analysisEngineMetaData> <name>Sentence Detector</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> <configurationParameter> @@ -41,8 +41,13 @@ <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> + <configurationParameter> + <name>opennlp.uima.ProbabilityFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>false</mandatory> + </configurationParameter> </configurationParameters> - <configurationParameterSettings> <nameValuePair> @@ -51,6 +56,12 @@ <string>opennlp.uima.Sentence</string> </value> </nameValuePair> + <nameValuePair> + <name>opennlp.uima.ProbabilityFeature</name> + <value> + <string>prob</string> + </value> + </nameValuePair> </configurationParameterSettings> <typeSystemDescription> @@ -82,7 +93,7 @@ <externalResource> <name>SentenceModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-sent.bin</fileUrl> + <fileUrl>file:opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml b/opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml similarity index 68% copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml copy to opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml index c10dad06..1279922a 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/SimpleTokenizer.xml @@ -22,7 +22,7 @@ <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> - <annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName> + <annotatorImplementationName>opennlp.uima.tokenize.SimpleTokenizer</annotatorImplementationName> <analysisEngineMetaData> <name>Tokenizer</name> <version>${project.version}</version> @@ -41,13 +41,6 @@ <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> - - <configurationParameter> - <name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name> - <type>String</type> - <multiValued>false</multiValued> - <mandatory>false</mandatory> - </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> @@ -84,29 +77,7 @@ <multipleDeploymentAllowed>true</multipleDeploymentAllowed> </operationalProperties> </analysisEngineMetaData> - - <externalResourceDependencies> - <externalResourceDependency> - <key>opennlp.uima.ModelName</key> - <interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName> - </externalResourceDependency> - </externalResourceDependencies> - <resourceManagerConfiguration> - <externalResources> - <externalResource> - <name>TokenModel</name> - <fileResourceSpecifier> - <fileUrl>file:test-models/en-token.bin</fileUrl> - </fileResourceSpecifier> - <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName> - </externalResource> - </externalResources> - <externalResourceBindings> - <externalResourceBinding> - <key>opennlp.uima.ModelName</key> - <resourceName>TokenModel</resourceName> - </externalResourceBinding> - </externalResourceBindings> - </resourceManagerConfiguration> + <resourceManagerConfiguration/> + </analysisEngineDescription> diff --git a/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml b/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml index 846b5a85..f447f078 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/TimeNameFinder.xml @@ -25,7 +25,7 @@ <annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName> <analysisEngineMetaData> <name>Time Name Finder</name> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <configurationParameters> @@ -105,7 +105,7 @@ <externalResource> <name>TimeModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-ner-time.bin</fileUrl> + <fileUrl>file:en-ner-time.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml b/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml index c10dad06..fd916542 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml @@ -48,6 +48,13 @@ <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> + + <configurationParameter> + <name>opennlp.uima.ProbabilityFeature</name> + <type>String</type> + <multiValued>false</multiValued> + <mandatory>false</mandatory> + </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> @@ -62,6 +69,12 @@ <string>uima.tcas.DocumentAnnotation</string> </value> </nameValuePair> + <nameValuePair> + <name>opennlp.uima.ProbabilityFeature</name> + <value> + <string>prob</string> + </value> + </nameValuePair> </configurationParameterSettings> <typeSystemDescription> @@ -97,7 +110,7 @@ <externalResource> <name>TokenModel</name> <fileResourceSpecifier> - <fileUrl>file:test-models/en-token.bin</fileUrl> + <fileUrl>file:opennlp-en-ud-ewt-tokens-1.2-2.5.0.bin</fileUrl> </fileResourceSpecifier> <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName> </externalResource> diff --git a/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml b/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml index d1994e0d..1d76fd51 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/TypeSystem.xml @@ -20,19 +20,26 @@ --> <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier"> - <name>OpenNLP TypeSystem</name> + <name>Apache OpenNLP TypeSystem</name> <description> This is the default OpenNLP type system. All the sample descriptors reference the types in this type system. To replace it against a custom type system change the mapping in the descriptors to the custom types and reference the custom type system. </description> - <version>1.5.2-incubating</version> + <version>${project.version}</version> <vendor>Apache Software Foundation</vendor> <types> <typeDescription> <name>opennlp.uima.Sentence</name> <supertypeName>uima.tcas.Annotation</supertypeName> + <features> + <featureDescription> + <name>prob</name> + <description>Probability</description> + <rangeTypeName>uima.cas.Double</rangeTypeName> + </featureDescription> + </features> </typeDescription> <typeDescription> @@ -45,6 +52,11 @@ <description>Part of speech</description> <rangeTypeName>uima.cas.String</rangeTypeName> </featureDescription> + <featureDescription> + <name>prob</name> + <description>Probability</description> + <rangeTypeName>uima.cas.Double</rangeTypeName> + </featureDescription> </features> </typeDescription> @@ -63,6 +75,13 @@ <typeDescription> <name>opennlp.uima.Person</name> <supertypeName>uima.tcas.Annotation</supertypeName> + <features> + <featureDescription> + <name>prob</name> + <description>Probability</description> + <rangeTypeName>uima.cas.Double</rangeTypeName> + </featureDescription> + </features> </typeDescription> <typeDescription> @@ -94,5 +113,27 @@ <name>opennlp.uima.Percentage</name> <supertypeName>uima.tcas.Annotation</supertypeName> </typeDescription> + + <typeDescription> + <name>opennlp.uima.Parse</name> + <supertypeName>uima.tcas.Annotation</supertypeName> + <features> + <featureDescription> + <name>parseType</name> + <description>Type of the parse node</description> + <rangeTypeName>uima.cas.String</rangeTypeName> + </featureDescription> + <featureDescription> + <name>children</name> + <description>Leaf nodes</description> + <rangeTypeName>uima.cas.FSArray</rangeTypeName> + </featureDescription> + <featureDescription> + <name>prob</name> + <description>Leaf nodes</description> + <rangeTypeName>uima.cas.Double</rangeTypeName> + </featureDescription> + </features> + </typeDescription> </types> </typeSystemDescription> \ No newline at end of file diff --git a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml b/opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml similarity index 68% copy from opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml copy to opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml index c10dad06..3ac16a8f 100644 --- a/opennlp-uima/src/test/resources/test-descriptors/Tokenizer.xml +++ b/opennlp-uima/src/test/resources/test-descriptors/WhitespaceTokenizer.xml @@ -22,7 +22,7 @@ <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> - <annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName> + <annotatorImplementationName>opennlp.uima.tokenize.WhitespaceTokenizer</annotatorImplementationName> <analysisEngineMetaData> <name>Tokenizer</name> <version>${project.version}</version> @@ -41,13 +41,6 @@ <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> - - <configurationParameter> - <name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name> - <type>String</type> - <multiValued>false</multiValued> - <mandatory>false</mandatory> - </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> @@ -84,29 +77,7 @@ <multipleDeploymentAllowed>true</multipleDeploymentAllowed> </operationalProperties> </analysisEngineMetaData> - - <externalResourceDependencies> - <externalResourceDependency> - <key>opennlp.uima.ModelName</key> - <interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName> - </externalResourceDependency> - </externalResourceDependencies> - <resourceManagerConfiguration> - <externalResources> - <externalResource> - <name>TokenModel</name> - <fileResourceSpecifier> - <fileUrl>file:test-models/en-token.bin</fileUrl> - </fileResourceSpecifier> - <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName> - </externalResource> - </externalResources> - <externalResourceBindings> - <externalResourceBinding> - <key>opennlp.uima.ModelName</key> - <resourceName>TokenModel</resourceName> - </externalResourceBinding> - </externalResourceBindings> - </resourceManagerConfiguration> + <resourceManagerConfiguration/> + </analysisEngineDescription> diff --git a/opennlp-uima/src/test/resources/training-params-invalid.conf b/opennlp-uima/src/test/resources/training-params-invalid.conf new file mode 100644 index 00000000..2faff3de --- /dev/null +++ b/opennlp-uima/src/test/resources/training-params-invalid.conf @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Algorithm=XYZ +Iterations=100 +Cutoff=5 +Threads=1 \ No newline at end of file diff --git a/opennlp-uima/src/test/resources/training-params-test.conf b/opennlp-uima/src/test/resources/training-params-test.conf new file mode 100644 index 00000000..cac2f921 --- /dev/null +++ b/opennlp-uima/src/test/resources/training-params-test.conf @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Algorithm=MAXENT +Iterations=150 +Cutoff=5 +Threads=4 \ No newline at end of file diff --git a/pom.xml b/pom.xml index f25a07ae..47a2e958 100644 --- a/pom.xml +++ b/pom.xml @@ -166,11 +166,18 @@ </dependencyManagement> <properties> - <!-- Build Properties --> + <!-- Build properties --> <java.version>17</java.version> + <maven.version>3.3.9</maven.version> <maven.compiler.release>${java.version}</maven.compiler.release> <maven.compiler.target>${java.version}</maven.compiler.target> - <maven.version>3.3.9</maven.version> + + <!-- OpenNLP properties --> + <opennlp.download.home>${user.home}</opennlp.download.home> + <opennlp.forkCount>1.0C</opennlp.forkCount> + <opennlp.models.version>1.2.0</opennlp.models.version> + + <!-- Dependency versions --> <junit.version>5.11.4</junit.version> <junit5-system-exit.version>2.0.2</junit5-system-exit.version> <uimaj.version>3.6.0</uimaj.version> @@ -179,12 +186,10 @@ <slf4j.version>2.0.16</slf4j.version> <log4j2.version>2.24.3</log4j2.version> <logcaptor.version>2.10.1</logcaptor.version> - <jmh.version>1.37</jmh.version> <classgraph.version>4.8.179</classgraph.version> + <jmh.version>1.37</jmh.version> - - <opennlp.models.version>1.2.0</opennlp.models.version> - <opennlp.forkCount>1.0C</opennlp.forkCount> + <!-- Plugin versions --> <coveralls.maven.plugin>4.3.0</coveralls.maven.plugin> <jacoco.maven.plugin>0.8.12</jacoco.maven.plugin> <maven.assembly.plugin>3.7.1</maven.assembly.plugin> @@ -192,7 +197,6 @@ <maven.javadoc.plugin>3.11.2</maven.javadoc.plugin> <forbiddenapis.plugin>3.8</forbiddenapis.plugin> <license-maven-plugin.version>2.5.0</license-maven-plugin.version> - </properties> <build> @@ -251,9 +255,9 @@ <version>${jacoco.maven.plugin}</version> <configuration> <excludes> - <exclude>**/stemmer/*</exclude> + <exclude>**/stemmer/*</exclude> <exclude>**/stemmer/snowball/*</exclude> - </excludes> + </excludes> </configuration> <executions> <execution> @@ -314,6 +318,7 @@ </execution> </executions> <configuration> + <argLine>-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine> <excludes> <exclude>**/*Test.java</exclude> </excludes> @@ -612,6 +617,13 @@ </build> </profile> + <profile> + <id>ci</id> + <properties> + <opennlp.download.home>${project.build.directory}</opennlp.download.home> + </properties> + </profile> + <profile> <id>eval-tests</id> <properties>
