Author: ogrisel
Date: Tue Feb 22 16:58:15 2011
New Revision: 1073406
URL: http://svn.apache.org/viewvc?rev=1073406&view=rev
Log:
STANBOL-13 / STANBOL-90: upgrade the NER engine to use the OpenNLP 1.5 API and
models packaged in the new defaultdata artifact
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
incubator/stanbol/trunk/enhancer/launchers/full/src/main/bundles/list.xml
incubator/stanbol/trunk/enhancer/launchers/lite/src/main/bundles/list.xml
incubator/stanbol/trunk/enhancer/parent/pom.xml
Modified: incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml Tue Feb 22
16:58:15 2011
@@ -49,23 +49,25 @@
<dependencies>
<!-- obviously we need opennlp -->
<dependency>
- <groupId>org.clojars.pjt</groupId>
+ <groupId>org.clojars.zaxtax</groupId>
<artifactId>opennlp-tools</artifactId>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.clojars.zaxtax</groupId>
+ <artifactId>maxent</artifactId>
+ <scope>compile</scope>
+ </dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
</dependency>
- <!--
- TODO: remove dependency after the openNLP Models are no
longer loaded
- via the AutotaggingProvider
- -->
+
<dependency>
<groupId>org.apache.stanbol</groupId>
-
<artifactId>org.apache.stanbol.enhancer.engines.autotagging</artifactId>
- </dependency>
+ <artifactId>org.apache.stanbol.defaultdata</artifactId>
+ </dependency>
<dependency>
<groupId>org.apache.clerezza</groupId>
@@ -119,16 +121,13 @@
<Private-Package>
org.apache.stanbol.enhancer.engines.opennlp.impl.*
</Private-Package>
-
<Embed-Dependency>opennlp-tools,opennlp-maxent,trove</Embed-Dependency>
+
<Embed-Dependency>opennlp-tools,maxent</Embed-Dependency>
<Embed-Transitive>true</Embed-Transitive>
<Import-Package>
+
org.apache.stanbol.defaultdata.opennlp,
!net.didion.*,
- !gnu.getopt,
- !org.apache.log.*,
- !junit.framework.*,
-
!org.apache.avalon.framework.*,
*
- </Import-Package>
+ </Import-Package>
</instructions>
</configuration>
</plugin>
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
Tue Feb 22 16:58:15 2011
@@ -16,11 +16,18 @@
*/
package org.apache.stanbol.enhancer.engines.opennlp.impl;
-import java.io.DataInputStream;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -29,14 +36,13 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.zip.GZIPInputStream;
-import opennlp.maxent.GISModel;
-import opennlp.maxent.io.BinaryGISModelReader;
import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.tokenize.SimpleTokenizer;
+import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.Span;
import org.apache.clerezza.rdf.core.LiteralFactory;
@@ -49,9 +55,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Property;
-import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
-import org.apache.stanbol.enhancer.engines.autotagging.AutotaggerProvider;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -62,18 +66,13 @@ import org.apache.stanbol.enhancer.servi
import org.osgi.framework.BundleContext;
import org.osgi.service.component.ComponentContext;
-
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
-
/**
- * Apache Stanbol Enhancer Named Entity Recognition enhancement engine based
on
- * opennlp's Maximum Entropy models and a DBpedia index for optionally
matching
- * them to well know DBpedia entities.
+ * Apache Stanbol Enhancer Named Entity Recognition enhancement engine based
on opennlp's Maximum Entropy
+ * models and a DBpedia index for optionally matching them to well know
DBpedia entities.
*/
@Component(immediate = true, metatype = true)
@Service
-public class NamedEntityExtractionEnhancementEngine implements
- EnhancementEngine, ServiceProperties {
+public class NamedEntityExtractionEnhancementEngine implements
EnhancementEngine, ServiceProperties {
/**
* The default value for the Execution of this Engine. Currently set to
@@ -88,42 +87,35 @@ public class NamedEntityExtractionEnhanc
public static final Log log =
LogFactory.getLog(NamedEntityExtractionEnhancementEngine.class);
- protected GISModel sentenceModel;
+ protected SentenceModel sentenceModel;
- protected GISModel personNameModel;
+ protected TokenNameFinderModel personNameModel;
- protected GISModel locationNameModel;
+ protected TokenNameFinderModel locationNameModel;
- protected GISModel organizationNameModel;
+ protected TokenNameFinderModel organizationNameModel;
- protected Map<String, Object[]> entityTypes = new HashMap<String,
Object[]>();
+ protected Map<String,Object[]> entityTypes = new
HashMap<String,Object[]>();
protected BundleContext bundleContext;
- @Reference
- protected AutotaggerProvider autotaggerProvider;
-
- // @Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ce) throws IOException {
bundleContext = ce.getBundleContext();
String directoryPath = null;
if (ce != null) {
- Dictionary<String, String> properties = ce.getProperties();
+ Dictionary<String,String> properties = ce.getProperties();
directoryPath = properties.get(MODELS_PATH);
}
- sentenceModel = loadModel(directoryPath,
- "english/sentdetect/EnglishSD.bin.gz");
+ sentenceModel = new SentenceModel(lookupModelStream(directoryPath,
"en-sent.bin"));
- personNameModel = buildNameModel(directoryPath, "person",
- OntologicalClasses.DBPEDIA_PERSON);
+ personNameModel = buildNameModel(directoryPath, "person",
OntologicalClasses.DBPEDIA_PERSON);
- locationNameModel = buildNameModel(directoryPath, "location",
- OntologicalClasses.DBPEDIA_PLACE);
+ locationNameModel = buildNameModel(directoryPath, "location",
OntologicalClasses.DBPEDIA_PLACE);
organizationNameModel = buildNameModel(directoryPath, "organization",
- OntologicalClasses.DBPEDIA_ORGANISATION);
+ OntologicalClasses.DBPEDIA_ORGANISATION);
}
// @Deactivate
@@ -134,47 +126,30 @@ public class NamedEntityExtractionEnhanc
organizationNameModel = null;
}
- protected GISModel loadModel(String directoryPath, String
modelRelativePath)
- throws IOException {
+ protected InputStream lookupModelStream(String directoryPath, String
modelRelativePath) throws IOException {
ClassLoader loader = this.getClass().getClassLoader();
if (directoryPath != null && directoryPath.length() > 0) {
// load custom models from the provided FS directory
- File modelData = new File(new File(directoryPath),
- modelRelativePath);
- return new BinaryGISModelReader(modelData).getModel();
+ File modelData = new File(new File(directoryPath),
modelRelativePath);
+ return new FileInputStream(modelData);
} else {
- // load default OpenNLP models from jars
- String resourcePath = "opennlp/" + modelRelativePath;
- InputStream in = null;
- if (autotaggerProvider != null) {
- // Lookup the OSGI bundle of the autotagger that embeds the
- // default opennlp models data: this is hackish, the
- // iks-autotagging project should be refactored to do all of
- // this by it-self
- URL entry =
autotaggerProvider.getBundleContext().getBundle().getEntry(
- resourcePath);
- in = entry != null ? entry.openStream() : null;
- } else {
- // regular classloading for the tests
- in = loader.getResourceAsStream(resourcePath);
- }
+ // load default OpenNLP models from classpath (embedded in the
defaultdata bundle)
+ String resourcePath = "org/apache/stanbol/defaultdata/opennlp/" +
modelRelativePath;
+ InputStream in = loader.getResourceAsStream(resourcePath);
if (in == null) {
- throw new IOException("coult not find resource: "
- + resourcePath);
+ throw new IOException("Coult not find resource from the
classpath: " + resourcePath);
}
- return new BinaryGISModelReader(new DataInputStream(
- new GZIPInputStream(in))).getModel();
+ return in;
}
}
- protected GISModel buildNameModel(String directoryPath, String name,
- UriRef typeUri) throws IOException {
- String modelRelativePath = String.format("english/namefind/%s.bin.gz",
- name);
- GISModel model = loadModel(directoryPath, modelRelativePath);
+ protected TokenNameFinderModel buildNameModel(String directoryPath, String
name, UriRef typeUri) throws IOException {
+ String modelRelativePath = String.format("en-ner-%s.bin", name);
+ TokenNameFinderModel model = new
TokenNameFinderModel(lookupModelStream(directoryPath,
+ modelRelativePath));
// register the name finder instances for matching owl class
- entityTypes.put(name, new Object[] { typeUri, model });
+ entityTypes.put(name, new Object[] {typeUri, model});
return model;
}
@@ -194,11 +169,11 @@ public class NamedEntityExtractionEnhanc
}
try {
- for (Map.Entry<String, Object[]> type : entityTypes.entrySet()) {
+ for (Map.Entry<String,Object[]> type : entityTypes.entrySet()) {
String typeLabel = type.getKey();
Object[] typeInfo = type.getValue();
UriRef typeUri = (UriRef) typeInfo[0];
- GISModel nameFinderModel = (GISModel) typeInfo[1];
+ TokenNameFinderModel nameFinderModel = (TokenNameFinderModel)
typeInfo[1];
findNamedEntities(ci, text, typeUri, typeLabel,
nameFinderModel);
}
} catch (Exception e) { // TODO: makes it sense to catch Exception
here?
@@ -206,26 +181,25 @@ public class NamedEntityExtractionEnhanc
}
}
- protected void findNamedEntities(final ContentItem ci, final String text,
- final UriRef typeUri, final String typeLabel,
- final GISModel nameFinderModel) {
+ protected void findNamedEntities(final ContentItem ci,
+ final String text,
+ final UriRef typeUri,
+ final String typeLabel,
+ final TokenNameFinderModel
nameFinderModel) {
if (ci == null) {
- throw new IllegalArgumentException(
- "Parsed ContentItem MUST NOT be NULL");
+ throw new IllegalArgumentException("Parsed ContentItem MUST NOT be
NULL");
}
if (text == null) {
- log.warn("NULL was parsed as text for content item " + ci.getId()
- + "! -> call ignored");
+ log.warn("NULL was parsed as text for content item " + ci.getId()
+ "! -> call ignored");
return;
}
LiteralFactory literalFactory = LiteralFactory.getInstance();
MGraph g = ci.getMetadata();
- Map<String, List<NameOccurrence>> entityNames = extractNameOccurrences(
- nameFinderModel, text);
+ Map<String,List<NameOccurrence>> entityNames =
extractNameOccurrences(nameFinderModel, text);
- Map<String, UriRef> previousAnnotations = new LinkedHashMap<String,
UriRef>();
- for (Map.Entry<String, List<NameOccurrence>> nameInContext :
entityNames.entrySet()) {
+ Map<String,UriRef> previousAnnotations = new
LinkedHashMap<String,UriRef>();
+ for (Map.Entry<String,List<NameOccurrence>> nameInContext :
entityNames.entrySet()) {
String name = nameInContext.getKey();
List<NameOccurrence> occurrences = nameInContext.getValue();
@@ -233,25 +207,19 @@ public class NamedEntityExtractionEnhanc
UriRef firstOccurrenceAnnotation = null;
for (NameOccurrence occurrence : occurrences) {
- UriRef textAnnotation =
EnhancementEngineHelper.createTextEnhancement(
- ci, this);
- g.add(new TripleImpl(textAnnotation,
- ENHANCER_SELECTED_TEXT,
- literalFactory.createTypedLiteral(name)));
- g.add(new TripleImpl(textAnnotation,
- ENHANCER_SELECTION_CONTEXT,
-
literalFactory.createTypedLiteral(occurrence.context)));
- g.add(new TripleImpl(textAnnotation, DC_TYPE,
- typeUri));
- g.add(new TripleImpl(
- textAnnotation,
- ENHANCER_CONFIDENCE,
-
literalFactory.createTypedLiteral(occurrence.confidence)));
+ UriRef textAnnotation =
EnhancementEngineHelper.createTextEnhancement(ci, this);
+ g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
literalFactory
+ .createTypedLiteral(name)));
+ g.add(new TripleImpl(textAnnotation,
ENHANCER_SELECTION_CONTEXT, literalFactory
+ .createTypedLiteral(occurrence.context)));
+ g.add(new TripleImpl(textAnnotation, DC_TYPE, typeUri));
+ g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE,
literalFactory
+ .createTypedLiteral(occurrence.confidence)));
if (occurrence.start != null && occurrence.end != null) {
- g.add(new TripleImpl(textAnnotation, ENHANCER_START,
-
literalFactory.createTypedLiteral(occurrence.start)));
- g.add(new TripleImpl(textAnnotation, ENHANCER_END,
-
literalFactory.createTypedLiteral(occurrence.end)));
+ g.add(new TripleImpl(textAnnotation, ENHANCER_START,
literalFactory
+ .createTypedLiteral(occurrence.start)));
+ g.add(new TripleImpl(textAnnotation, ENHANCER_END,
literalFactory
+ .createTypedLiteral(occurrence.end)));
}
// add the subsumption relationship among occurrences of the
same
@@ -259,14 +227,12 @@ public class NamedEntityExtractionEnhanc
if (firstOccurrenceAnnotation == null) {
// check already extracted annotations to find a first most
// specific occurrence
- for (Map.Entry<String, UriRef> entry :
previousAnnotations.entrySet()) {
+ for (Map.Entry<String,UriRef> entry :
previousAnnotations.entrySet()) {
if (entry.getKey().contains(name)) {
// we have found a most specific previous
// occurrence, use it as subsumption target
firstOccurrenceAnnotation = entry.getValue();
- g.add(new TripleImpl(textAnnotation,
- DC_RELATION,
- firstOccurrenceAnnotation));
+ g.add(new TripleImpl(textAnnotation, DC_RELATION,
firstOccurrenceAnnotation));
break;
}
}
@@ -279,8 +245,7 @@ public class NamedEntityExtractionEnhanc
} else {
// I am referring to a most specific first occurrence of
the
// same name
- g.add(new TripleImpl(textAnnotation,
- DC_RELATION, firstOccurrenceAnnotation));
+ g.add(new TripleImpl(textAnnotation, DC_RELATION,
firstOccurrenceAnnotation));
}
}
}
@@ -298,77 +263,52 @@ public class NamedEntityExtractionEnhanc
return extractNames(organizationNameModel, text);
}
- public Map<String, List<NameOccurrence>> extractPersonNameOccurrences(
- String text) {
+ public Map<String,List<NameOccurrence>>
extractPersonNameOccurrences(String text) {
return extractNameOccurrences(personNameModel, text);
}
- public Map<String, List<NameOccurrence>> extractLocationNameOccurrences(
- String text) {
+ public Map<String,List<NameOccurrence>>
extractLocationNameOccurrences(String text) {
return extractNameOccurrences(locationNameModel, text);
}
- public Map<String, List<NameOccurrence>>
extractOrganizationNameOccurrences(
- String text) {
+ public Map<String,List<NameOccurrence>>
extractOrganizationNameOccurrences(String text) {
return extractNameOccurrences(organizationNameModel, text);
}
- protected Collection<String> extractNames(GISModel nameFinderModel,
- String text) {
+ protected Collection<String> extractNames(TokenNameFinderModel
nameFinderModel, String text) {
return extractNameOccurrences(nameFinderModel, text).keySet();
}
- protected Map<String, List<NameOccurrence>> extractNameOccurrences(
- GISModel nameFinderModel, String text) {
+ protected Map<String,List<NameOccurrence>>
extractNameOccurrences(TokenNameFinderModel nameFinderModel,
+ String
text) {
// version with explicit sentence endings to reflect heading /
paragraph
// structure of an HTML or PDF document converted to text
String textWithDots = text.replaceAll("\\n\\n", ".\n");
- SentenceDetectorME sentenceDetector = new SentenceDetectorME(
- sentenceModel);
+ SentenceDetectorME sentenceDetector = new
SentenceDetectorME(sentenceModel);
- int[] sentenceEndings = sentenceDetector.sentPosDetect(textWithDots);
- int[] sentencePositions = new int[sentenceEndings.length + 1];
- sentencePositions[0] = 0;
- System.arraycopy(sentenceEndings, 0, sentencePositions, 1,
- sentenceEndings.length);
- String[] sentences;
- if(sentenceEndings.length<1){
- //STANBOL-60: if no sentence is detected treat the whole text as
- //one sentence.
- log.debug("No sentence detected -> use whole text as one element");
- sentences = new String[] {text};
- } else {
- sentences = new String[sentenceEndings.length];
- for (int i = 0; i < sentences.length; i++) {
- log.debug(String.format("Sentence %d from char %d to %d", i,
- sentencePositions[i], sentencePositions[i + 1]));
- sentences[i] = text.substring(sentencePositions[i],
- sentencePositions[i + 1]);
- log.debug("Sentence: " + sentences[i]);
- }
- }
+ Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);
NameFinderME finder = new NameFinderME(nameFinderModel);
- Map<String, List<NameOccurrence>> nameOccurrences = new
LinkedHashMap<String, List<NameOccurrence>>();
- Tokenizer tokenizer = new SimpleTokenizer();
- for (int i = 0; i < sentences.length; i++) {
- String sentence = sentences[i];
+ Map<String,List<NameOccurrence>> nameOccurrences = new
LinkedHashMap<String,List<NameOccurrence>>();
+ Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
+ for (int i = 0; i < sentenceSpans.length; i++) {
+ String sentence =
sentenceSpans[i].getCoveredText(text).toString().trim();
// build a context by concatenating three sentences to be used for
// similarity ranking / disambiguation + contextual snippet in the
// extraction structure
List<String> contextElements = new ArrayList<String>();
- if (i - 1 > 0) {
- String previousSentence = sentences[i - 1];
- contextElements.add(previousSentence.trim());
+ if (i > 0) {
+ CharSequence previousSentence = sentenceSpans[i -
1].getCoveredText(text);
+ contextElements.add(previousSentence.toString().trim());
}
- contextElements.add(sentence.trim());
- if (i + 1 < sentences.length) {
- String nextSentence = sentences[i + 1];
- contextElements.add(nextSentence.trim());
+ contextElements.add(sentence.toString().trim());
+ if (i + 1 < sentenceSpans.length) {
+ CharSequence nextSentence = sentenceSpans[i +
1].getCoveredText(text);
+ contextElements.add(nextSentence.toString().trim());
}
String context = StringUtils.join(contextElements, " ");
@@ -390,19 +330,17 @@ public class NamedEntityExtractionEnhanc
Integer absoluteEnd = null;
if (start != -1) {
/*
- * NOTE (rw, issue 19, 20100615) Here we need to set the
new
- * start position, by adding the current start to the
- * lastStartPosion. we need also to use the
- * lastStartPosition to calculate the start of the element.
- * The old code had not worked if names contains more than
a
- * single element!
+ * NOTE (rw, issue 19, 20100615) Here we need to set the
new start position, by adding the
+ * current start to the lastStartPosion. we need also to
use the lastStartPosition to
+ * calculate the start of the element. The old code had
not worked if names contains more
+ * than a single element!
*/
lastStartPosition += start;
- absoluteStart = sentencePositions[i] + lastStartPosition;
+ absoluteStart = sentenceSpans[i].getStart() +
lastStartPosition;
absoluteEnd = absoluteStart + name.length();
}
- NameOccurrence occurrence = new NameOccurrence(name,
- absoluteStart, absoluteEnd, context, confidence);
+ NameOccurrence occurrence = new NameOccurrence(name,
absoluteStart, absoluteEnd, context,
+ confidence);
List<NameOccurrence> occurrences = nameOccurrences.get(name);
if (occurrences == null) {
@@ -416,16 +354,15 @@ public class NamedEntityExtractionEnhanc
if (log.isDebugEnabled()) {
for (List<NameOccurrence> occurrences : nameOccurrences.values()) {
- log.debug("Occurrences found: "
- + StringUtils.join(occurrences, ", "));
+ log.debug("Occurrences found: " +
StringUtils.join(occurrences, ", "));
}
}
return nameOccurrences;
}
public int canEnhance(ContentItem ci) {
- //in case text/pain;charSet=UTF8 is parsed
- String mimeType = ci.getMimeType().split(";",2)[0];
+ // in case text/pain;charSet=UTF8 is parsed
+ String mimeType = ci.getMimeType().split(";", 2)[0];
if (TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType)) {
return ENHANCE_SYNCHRONOUS;
}
@@ -433,10 +370,9 @@ public class NamedEntityExtractionEnhanc
}
@Override
- public Map<String, Object> getServiceProperties() {
- return Collections.unmodifiableMap(Collections.singletonMap(
- ENHANCEMENT_ENGINE_ORDERING,
- (Object) defaultOrder));
+ public Map<String,Object> getServiceProperties() {
+ return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
+ (Object) defaultOrder));
}
}
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
Tue Feb 22 16:58:15 2011
@@ -34,7 +34,6 @@ import org.apache.clerezza.rdf.core.Trip
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
-import
org.apache.stanbol.enhancer.engines.autotagging.impl.ConfiguredAutotaggerProvider;
import org.apache.stanbol.enhancer.engines.opennlp.impl.NameOccurrence;
import
org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
@@ -50,7 +49,7 @@ import static org.apache.stanbol.enhance
public class TestNamedEntityExtractionEnhancementEngine extends Assert {
- public static final String SINGLE_SENTENCE = "Dr. Patrick Marshall (1869 -
November 1950) was a"
+ public static final String SINGLE_SENTENCE = "Dr Patrick Marshall (1869 -
November 1950) was a"
+ " geologist who lived in New Zealand and worked at the
University of Otago.";
public static final String MULTI_SENTENCES = "The life of Patrick
Marshall\n\n"
@@ -65,7 +64,6 @@ public class TestNamedEntityExtractionEn
@BeforeClass
public static void setUpServices() throws IOException {
Dictionary<String, Object> properties = new Hashtable<String,
Object>();
- properties.put(ConfiguredAutotaggerProvider.LUCENE_INDEX_PATH, "");
MockComponentContext context = new MockComponentContext(properties);
nerEngine.activate(context);
}
@@ -119,13 +117,13 @@ public class TestNamedEntityExtractionEn
assertEquals("Patrick Marshall", firstOccurrence.name);
assertEquals(12, firstOccurrence.start.intValue());
assertEquals(28, firstOccurrence.end.intValue());
- assertEquals(0.98, firstOccurrence.confidence, 0.005);
+ assertEquals(0.998, firstOccurrence.confidence, 0.005);
NameOccurrence secondOccurrence = pmOccurrences.get(1);
assertEquals("Patrick Marshall", secondOccurrence.name);
assertEquals(33, secondOccurrence.start.intValue());
assertEquals(49, secondOccurrence.end.intValue());
- assertEquals(0.97, secondOccurrence.confidence, 0.005);
+ assertEquals(0.85, secondOccurrence.confidence, 0.005);
}
@Test
Modified:
incubator/stanbol/trunk/enhancer/launchers/full/src/main/bundles/list.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/launchers/full/src/main/bundles/list.xml?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/launchers/full/src/main/bundles/list.xml
(original)
+++ incubator/stanbol/trunk/enhancer/launchers/full/src/main/bundles/list.xml
Tue Feb 22 16:58:15 2011
@@ -318,6 +318,11 @@
</bundle>
<bundle>
<groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.defaultdata</artifactId>
+ <version>0.0.1</version>
+ </bundle>
+ <bundle>
+ <groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.opennlp.ner</artifactId>
<version>0.9-SNAPSHOT</version>
</bundle>
Modified:
incubator/stanbol/trunk/enhancer/launchers/lite/src/main/bundles/list.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/launchers/lite/src/main/bundles/list.xml?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/launchers/lite/src/main/bundles/list.xml
(original)
+++ incubator/stanbol/trunk/enhancer/launchers/lite/src/main/bundles/list.xml
Tue Feb 22 16:58:15 2011
@@ -297,6 +297,11 @@
</bundle>
<bundle>
<groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.defaultdata</artifactId>
+ <version>0.0.1</version>
+ </bundle>
+ <bundle>
+ <groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.opennlp.ner</artifactId>
<version>0.9-SNAPSHOT</version>
</bundle>
Modified: incubator/stanbol/trunk/enhancer/parent/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/parent/pom.xml?rev=1073406&r1=1073405&r2=1073406&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/parent/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/parent/pom.xml Tue Feb 22 16:58:15 2011
@@ -119,7 +119,7 @@
<instructions>
<Bundle-Category>Stanbol Enhancer</Bundle-Category>
<Bundle-DocURL>http://incubator.apache.org/stanbol</Bundle-DocURL>
- <Bundle-Vendor>Apache
Stanbol (incubation)</Bundle-Vendor>
+ <Bundle-Vendor>Apache
Stanbol (Incubating)</Bundle-Vendor>
<Bundle-SymbolicName>${project.artifactId}</Bundle-SymbolicName>
<_versionpolicy>$${version;===;${@}}</_versionpolicy>
</instructions>
@@ -232,7 +232,13 @@
<dependencyManagement>
<dependencies>
- <!-- FISE Deps -->
+ <!-- Stanbol Deps -->
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+
<artifactId>org.apache.stanbol.defaultdata</artifactId>
+ <version>0.0.1</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
@@ -633,9 +639,15 @@
<!-- OpenNLP -->
<dependency>
- <groupId>org.clojars.pjt</groupId>
+ <groupId>org.clojars.zaxtax</groupId>
<artifactId>opennlp-tools</artifactId>
- <version>1.4.3</version>
+ <version>1.5.0</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.clojars.zaxtax</groupId>
+ <artifactId>maxent</artifactId>
+ <version>3.0.0</version>
<scope>provided</scope>
</dependency>