Author: rwesten
Date: Fri Jan 6 13:05:09 2012
New Revision: 1228163
URL: http://svn.apache.org/viewvc?rev=1228163&view=rev
Log:
STANBOL-102: Implementation as described in the comment. Also adds the Language
Identification Engine (LangId) and the KeywordExtractionEngine to the stable
launcher.
Configuration with the default configuration for the NER Engine was added to
the full, stable, full-war and kres launcher.
NOTE that the correct default initialisation of the NER Engine is implicitly
tested by the existing Integration-tests of the Stanbol Enhancer.
Added:
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
incubator/stanbol/trunk/launchers/full-war/pom.xml
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
incubator/stanbol/trunk/launchers/full/pom.xml
incubator/stanbol/trunk/launchers/full/src/main/resources/README
incubator/stanbol/trunk/launchers/stable/pom.xml
incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml
incubator/stanbol/trunk/launchers/stable/src/main/resources/README
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
Fri Jan 6 13:05:09 2012
@@ -29,11 +29,13 @@ import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
@@ -44,6 +46,7 @@ import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Span;
+import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Triple;
@@ -59,6 +62,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -69,12 +73,6 @@ public class NEREngineCore implements En
protected static final String TEXT_PLAIN_MIMETYPE = "text/plain";
private final Logger log = LoggerFactory.getLogger(getClass());
-// private final String bundleSymbolicName;
-// protected final SentenceModel sentenceModel;
-// protected final TokenNameFinderModel personNameModel;
-// protected final TokenNameFinderModel locationNameModel;
-// protected final TokenNameFinderModel organizationNameModel;
-// protected Map<String,Object[]> entityTypes = new
HashMap<String,Object[]>();
private static Map<String,UriRef> entityTypes = new
HashMap<String,UriRef>();
static {
entityTypes.put("person", OntologicalClasses.DBPEDIA_PERSON);
@@ -83,6 +81,10 @@ public class NEREngineCore implements En
}
private OpenNLP openNLP;
+
+ private final String defaultLang;
+
+ private final Set<String> processedLangs;
/** Comments about our models */
public static final Map<String, String> DATA_FILE_COMMENTS;
@@ -91,27 +93,39 @@ public class NEREngineCore implements En
DATA_FILE_COMMENTS.put("Default data files", "provided by the
org.apache.stanbol.defaultdata bundle");
}
- public NEREngineCore(OpenNLP openNLP) throws InvalidFormatException,
IOException{
+ public NEREngineCore(OpenNLP openNLP, String defaultLanguage, Set<String>
processedLanguages) throws InvalidFormatException, IOException{
this.openNLP = openNLP;
-// sentenceModel = openNLP.buildSentenceModel("en");
-// personNameModel = buildNameModel("person",
OntologicalClasses.DBPEDIA_PERSON);
-// locationNameModel = buildNameModel("location",
OntologicalClasses.DBPEDIA_PLACE);
-// organizationNameModel = buildNameModel("organization",
OntologicalClasses.DBPEDIA_ORGANISATION);
+ this.defaultLang = defaultLanguage;
+ this.processedLangs = Collections.unmodifiableSet(processedLanguages);
}
- NEREngineCore(DataFileProvider dfp) throws InvalidFormatException,
IOException {
- this(new OpenNLP(dfp));
+ NEREngineCore(DataFileProvider dfp,String defaultLanguage, Set<String>
processedLanguages) throws InvalidFormatException, IOException {
+ this(new OpenNLP(dfp),defaultLanguage,processedLanguages);
}
- protected TokenNameFinderModel buildNameModel(String name, UriRef typeUri)
throws IOException {
- //String modelRelativePath = String.format("en-ner-%s.bin", name);
- TokenNameFinderModel model = openNLP.getNameModel(name, "en");
- // register the name finder instances for matching owl class
-// entityTypes.put(name, new Object[] {typeUri, model});
- return model;
- }
+// protected TokenNameFinderModel buildNameModel(String name, UriRef
typeUri) throws IOException {
+// //String modelRelativePath = String.format("en-ner-%s.bin", name);
+// TokenNameFinderModel model = openNLP.getNameModel(name, "en");
+// // register the name finder instances for matching owl class
+//// entityTypes.put(name, new Object[] {typeUri, model});
+// return model;
+// }
public void computeEnhancements(ContentItem ci) throws EngineException {
+ //first check the langauge before processing the content (text)
+ String language = extractLanguage(ci);
+ if(language == null){
+ log.warn("Unable to extract Language for ContentItem {}: The text"
+
+ "of this ContentItem will not be processed by the NER
engine!",
+ ci.getUri());
+ return;
+ }
+ if(!isProcessedLangage(language)){
+ log.warn("The language {} of ContentItem {} is not configured to
be" +
+ "processed by this NER engine instance (processed {})!",
+ new Object[]{language,ci.getUri(),processedLangs});
+ return;
+ }
String mimeType = ci.getMimeType().split(";", 2)[0];
String text;
if (TEXT_PLAIN_MIMETYPE.equals(mimeType)) {
@@ -138,13 +152,16 @@ public class NEREngineCore implements En
return;
}
log.debug("computeEnhancements {} text={}",
ci.getUri().getUnicodeString(), StringUtils.abbreviate(text, 100));
-
try {
for (Map.Entry<String,UriRef> type : entityTypes.entrySet()) {
String typeLabel = type.getKey();
UriRef typeUri = type.getValue();
- TokenNameFinderModel nameFinderModel =
openNLP.getNameModel(typeLabel, "en");
- findNamedEntities(ci, text, typeUri, typeLabel,
nameFinderModel);
+ TokenNameFinderModel nameFinderModel =
openNLP.getNameModel(typeLabel, language);
+ if(nameFinderModel == null){
+ log.info("No NER Model for {} and language {}
available!",typeLabel,language);
+ } else {
+ findNamedEntities(ci, text, typeUri, typeLabel,
nameFinderModel);
+ }
}
} catch (Exception e) {
throw new EngineException(this, ci, e);
@@ -381,16 +398,19 @@ public class NEREngineCore implements En
public int canEnhance(ContentItem ci) {
// in case text/pain;charSet=UTF8 is parsed
String mimeType = ci.getMimeType().split(";", 2)[0];
- if (TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType)) {
- return ENHANCE_SYNCHRONOUS;
- }
- // check for existence of textual content in metadata
- UriRef subj = ci.getUri();
- Iterator<Triple> it = ci.getMetadata().filter(subj,
NIE_PLAINTEXTCONTENT, null);
- if (it.hasNext()) {
- return ENHANCE_SYNCHRONOUS;
+ if(TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType) || //plain test
+ //or extracted text
+ ci.getMetadata().filter(ci.getUri(), NIE_PLAINTEXTCONTENT,
null).hasNext()){
+ //TODO: check if the language metadata are already present when
+ //canEnhance is called. If not than return ENHANCE_SYNCHRONOUS
+ if(isProcessedLangage(extractLanguage(ci))){
+ return ENHANCE_SYNCHRONOUS;
+ } else {
+ return CANNOT_ENHANCE;
+ }
+ } else { //no textual content available
+ return CANNOT_ENHANCE;
}
- return CANNOT_ENHANCE;
}
/**
@@ -413,4 +433,65 @@ public class NEREngineCore implements En
}
return new String(bytes, UTF8);
}
+
+ /**
+ * The default language
+ * @return the defaultLang
+ */
+ public String getDefaultLanguage() {
+ return defaultLang;
+ }
+ /**
+ * Checks if the parsed language is enabled for processing.
+ * If <code>null</code> is parsed as language this returns
<code>false</code>
+ * even if processing of all languages is enabled. <p>
+ * NOTE: If this Method returns <code>true</code> this does
+ * not mean that text with this language can be actually processed because
this
+ * also requires that the NER model for this language are available via the
+ * parsed {@link OpenNLP} instance.
+ * @param lang the language
+ * @return the state
+ */
+ public boolean isProcessedLangage(String lang){
+ return lang != null && (processedLangs.isEmpty() ||
processedLangs.contains(lang));
+ }
+ /*
+ * The following Utility extracts the language from the metadata of the
+ * parsed Content Item.
+ * This Utility is actually a copy of the same form the
KeywordExtractionEngine.
+ * TODO: change this to a global Utility as soon as STANBOL Enhancement
+ * Structure is defined
+ */
+ /**
+ * The literal representing the LangIDEngine as creator.
+ */
+ public static final Literal LANG_ID_ENGINE_NAME =
LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
+ /**
+ * Extracts the language of the parsed ContentItem from the metadata
+ * @param ci the content item
+ * @return the language
+ */
+ private String extractLanguage(ContentItem ci) {
+ MGraph metadata = ci.getMetadata();
+ Iterator<Triple> langaugeEnhancementCreatorTriples =
+ metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+ if(langaugeEnhancementCreatorTriples.hasNext()){
+ String lang = EnhancementEngineHelper.getString(metadata,
+ langaugeEnhancementCreatorTriples.next().getSubject(),
+ Properties.DC_LANGUAGE);
+ if(lang != null){
+ return lang;
+ } else {
+ log.info("Unable to extract language for ContentItem %s! The
Enhancement of the %s is missing the %s property",
+ new
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
+ log.info(" ... return '{}' as default",defaultLang);
+ return defaultLang;
+ }
+ } else {
+ log.warn("Unable to extract language for ContentItem %s! Is the %s
active?",
+
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+ log.warn(" ... return '{}' as default",defaultLang);
+ return defaultLang;
+ }
+ }
}
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
Fri Jan 6 13:05:09 2012
@@ -17,35 +17,66 @@
package org.apache.stanbol.enhancer.engines.opennlp.impl;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.opennlp.OpenNLP;
-import
org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.osgi.framework.ServiceRegistration;
+import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
/**
* Apache Stanbol Enhancer Named Entity Recognition enhancement engine based
on opennlp's Maximum Entropy
* models.
*/
-@Component(immediate = true, metatype = true,
+@Component(
+ metatype = true,
+ immediate = true,
+ configurationFactory = true,
+ policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+ specVersion = "1.1",
label = "%stanbol.NamedEntityExtractionEnhancementEngine.name",
description =
"%stanbol.NamedEntityExtractionEnhancementEngine.description")
@Service
[email protected](value={
+
@Property(name=NamedEntityExtractionEnhancementEngine.PROCESSED_LANGUAGES,value=""),
+
@Property(name=NamedEntityExtractionEnhancementEngine.DEFAULT_LANGUAGE,value="")}
+)
public class NamedEntityExtractionEnhancementEngine implements
EnhancementEngine, ServiceProperties {
private EnhancementEngine engineCore;
public static final String DEFAULT_DATA_OPEN_NLP_MODEL_LOCATION =
"org/apache/stanbol/defaultdata/opennlp";
-
+
+ /**
+ * Allows to define the default language assumed for parsed Content if no
language
+ * detection is available. If <code>null</code> or empty this engine will
not
+ * process content with an unknown language
+ */
+ public static final String DEFAULT_LANGUAGE =
"stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage";
+ /**
+ * Allows to restrict the list of languages processed by this engine. if
+ * <code>null</code> or empty content of any language where a NER model is
+ * available via {@link OpenNLP} will be processed.<p>
+ * This property allows to configure multiple instances of this engine that
+ * do only process specific languages. The default is a single instance
that
+ * processes all languages.
+ */
+ public static final String PROCESSED_LANGUAGES =
"stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages";
+
/**
* The default value for the Execution of this Engine. Currently set to
* {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
@@ -57,9 +88,45 @@ public class NamedEntityExtractionEnhanc
@Reference
private OpenNLP openNLP;
- protected void activate(ComponentContext ctx) throws IOException {
+ protected void activate(ComponentContext ctx) throws IOException,
ConfigurationException {
// Need to register the default data before loading the models
- engineCore = new NEREngineCore(openNLP);
+ Object value = ctx.getProperties().get(DEFAULT_LANGUAGE);
+ final String defaultLanguage;
+ if(value != null && !value.toString().isEmpty()){
+ defaultLanguage = value.toString();
+ } else {
+ defaultLanguage = null;
+ }
+ value = ctx.getProperties().get(PROCESSED_LANGUAGES);
+ final Set<String> processedLanguages;
+ if(value instanceof String[]){
+ processedLanguages = new HashSet<String>(Arrays.asList((String[])
value));
+ processedLanguages.remove(null); //remove null
+ processedLanguages.remove(""); //remove empty
+ } else if (value instanceof Collection<?>){
+ processedLanguages = new HashSet<String>();
+ for(Object o : ((Collection<?>)value)){
+ if(o != null){
+ processedLanguages.add(o.toString());
+ }
+ }
+ processedLanguages.remove(""); //remove empty
+ } else if(value != null && !value.toString().isEmpty()){
+ //if a single String is parsed we support ',' as seperator
+ String[] languageArray = value.toString().split(",");
+ processedLanguages = new
HashSet<String>(Arrays.asList(languageArray));
+ processedLanguages.remove(null); //remove null
+ processedLanguages.remove(""); //remove empty
+ } else { //no configuration
+ processedLanguages = Collections.emptySet();
+ }
+ if(!processedLanguages.isEmpty() && defaultLanguage != null &&
+ !processedLanguages.contains(defaultLanguage)){
+ throw new ConfigurationException(PROCESSED_LANGUAGES, "The list
of" +
+ "processed Languages "+processedLanguages+" MUST
CONTAIN the" +
+ "configured default language '"+defaultLanguage+"'!");
+ }
+ engineCore = new NEREngineCore(openNLP, defaultLanguage,
processedLanguages);
}
protected void deactivate(ComponentContext ctx) {
@@ -67,6 +134,7 @@ public class NamedEntityExtractionEnhanc
dfpServiceRegistration.unregister();
dfpServiceRegistration = null;
}
+ engineCore = null;
}
@Override
@@ -92,4 +160,5 @@ public class NamedEntityExtractionEnhanc
throw new IllegalStateException("EngineCore not initialized");
}
}
+
}
\ No newline at end of file
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
Fri Jan 6 13:05:09 2012
@@ -19,9 +19,20 @@
# descriptions as used in the metatype.xml descriptor generated by the
# the maven SCR plugin
-stanbol.NamedEntityExtractionEnhancementEngine.name = Apache Stanbol Named
Entity Extraction engine
+stanbol.NamedEntityExtractionEnhancementEngine.name = Apache Stanbol
Enhancement Engine for Named Entity Extraction
stanbol.NamedEntityExtractionEnhancementEngine.description = Find names of
people, organization, \
places... using previously trained OpenNLP models.
stanbol.opennlp.models.path.name = Path to folder holding OpenNLP models
stanbol.opennlp.models.path.description = All OpenNLP files need to be in the
same folder using \
the same naming convention as the OpenNLP project in version 1.5 and
following.
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage.name = Default
Language
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage.description =
Allows to configure \
+a language that is used as default if the language of the parsed content is
not known. If \
+empty no default language is used.
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages.name =
Languages
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages.description
= Languages to process. \
+An empty text indicates that all languages are processed. Use ',' as separator
for languages \
+(e.g. 'en,de' to enhance only English and German texts). \
+NOTE: This porperty can be used to configure multiple instances of this engine
that \
+process only documents with specific languages. This might e.g. be useful to \
+enable/disable NER for specific languages.
\ No newline at end of file
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
Fri Jan 6 13:05:09 2012
@@ -25,6 +25,7 @@ import static org.apache.stanbol.enhance
import java.io.IOException;
import java.util.Collection;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -62,9 +63,11 @@ public class TestNamedEntityExtractionEn
public static final String FAKE_BUNDLE_SYMBOLIC_NAME =
"FAKE_BUNDLE_SYMBOLIC_NAME";
+ @SuppressWarnings("unchecked")
@BeforeClass
public static void setUpServices() throws IOException {
- nerEngine = new NEREngineCore(new
ClasspathDataFileProvider(FAKE_BUNDLE_SYMBOLIC_NAME));
+ nerEngine = new NEREngineCore(new
ClasspathDataFileProvider(FAKE_BUNDLE_SYMBOLIC_NAME),
+ "en",Collections.EMPTY_SET);
}
public static ContentItem wrapAsContentItem(final String id,
Modified:
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
---
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
(original)
+++
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
Fri Jan 6 13:05:09 2012
@@ -63,7 +63,8 @@ public class HttpQueryHeaderPostTest ext
builder.buildUrl("/engines",
"header_Accept",""))) //override the parse Accept Header
.withHeader("Accept","text/turtle") //set Accept to turtle
(overridden)
- .withContent("John Smith was born in London.")
+ .withContent("John Smith was born in London. But since ten years
he " +
+ "lives now in Paris.")
)
.assertStatus(200)
//check for JSON-LD (the default content type
Modified: incubator/stanbol/trunk/launchers/full-war/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full-war/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/full-war/pom.xml Fri Jan 6 13:05:09 2012
@@ -112,6 +112,7 @@
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
</excludes>
</configuration>
</plugin>
Modified:
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
(original)
+++ incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
Fri Jan 6 13:05:09 2012
@@ -21,3 +21,5 @@ resources/config/org.apache.stanbol.enti
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
+
Added:
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
---
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
(added)
+++
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Fri Jan 6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""
Modified: incubator/stanbol/trunk/launchers/full/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/full/pom.xml Fri Jan 6 13:05:09 2012
@@ -113,6 +113,7 @@
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
<exclude>src/main/resources/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+
<exclude>src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
</excludes>
</configuration>
</plugin>
Modified: incubator/stanbol/trunk/launchers/full/src/main/resources/README
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/src/main/resources/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full/src/main/resources/README (original)
+++ incubator/stanbol/trunk/launchers/full/src/main/resources/README Fri Jan 6
13:05:09 2012
@@ -21,5 +21,6 @@ resources/config/org.apache.stanbol.comm
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg
Added:
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
---
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
(added)
+++
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Fri Jan 6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""
Added:
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
---
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
(added)
+++
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Fri Jan 6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""
Modified: incubator/stanbol/trunk/launchers/stable/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/stable/pom.xml Fri Jan 6 13:05:09 2012
@@ -111,6 +111,7 @@
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
<exclude>src/main/resources/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+
<exclude>src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
</excludes>
</configuration>
</plugin>
Modified: incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml
(original)
+++ incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml Fri Jan
6 13:05:09 2012
@@ -19,6 +19,14 @@
List of initial bundles for the Stanbol Sling-based standalone
launcher.
-->
<bundles>
+ <!-- General-purpose libraries -->
+ <startLevel level="10">
+ <bundle> <!-- used by langid -->
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>0.9</version>
+ </bundle>
+ </startLevel>
<!-- *********************************************************************
start level 20 TO 24 reserved for Stanbol Framework
@@ -72,12 +80,12 @@
<!-- Stanbol Enhancer plug-ins (the Enhancement Engines) -->
<startLevel level="25">
- <!-- unstable
<bundle>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.langid</artifactId>
<version>0.9.0-incubating-SNAPSHOT</version>
</bundle>
+ <!-- unstable
<bundle>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.metaxa</artifactId>
@@ -119,6 +127,12 @@
<artifactId>org.apache.stanbol.enhancer.engine.entitytagging</artifactId>
<version>0.9.0-incubating-SNAPSHOT</version>
</bundle>
+ <bundle>
+ <groupId>org.apache.stanbol</groupId>
+
<artifactId>org.apache.stanbol.enhancer.engine.keywordextraction</artifactId>
+ <version>0.9.0-incubating-SNAPSHOT</version>
+ </bundle>
+
</startLevel>
<!-- *********************************************************************
Modified: incubator/stanbol/trunk/launchers/stable/src/main/resources/README
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/resources/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/src/main/resources/README
(original)
+++ incubator/stanbol/trunk/launchers/stable/src/main/resources/README Fri Jan
6 13:05:09 2012
@@ -20,4 +20,5 @@ resources/config/org.apache.stanbol.exam
resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
-resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
\ No newline at end of file
+resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Added:
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
---
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
(added)
+++
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Fri Jan 6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""