Author: rwesten
Date: Fri Jan  6 13:05:09 2012
New Revision: 1228163

URL: http://svn.apache.org/viewvc?rev=1228163&view=rev
Log:
STANBOL-102: Implementation as described in the comment. Also adds the Language 
Identification Engine (LangId) and the KeywordExtractionEngine to the stable 
launcher.

Configuration with the default configuration for the NER Engine was added to 
the full, stable, full-war and kres launcher.

NOTE that the correct default initialisation of the NER Engine is implicitly 
tested by the existing Integration-tests of the Stanbol Enhancer.

Added:
    
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
    
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
    
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
    
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
Modified:
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
    
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
    incubator/stanbol/trunk/launchers/full-war/pom.xml
    incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
    incubator/stanbol/trunk/launchers/full/pom.xml
    incubator/stanbol/trunk/launchers/full/src/main/resources/README
    incubator/stanbol/trunk/launchers/stable/pom.xml
    incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml
    incubator/stanbol/trunk/launchers/stable/src/main/resources/README

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 Fri Jan  6 13:05:09 2012
@@ -29,11 +29,13 @@ import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.namefind.TokenNameFinderModel;
@@ -44,6 +46,7 @@ import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.Span;
 
+import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.Triple;
@@ -59,6 +62,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -69,12 +73,6 @@ public class NEREngineCore implements En
     protected static final String TEXT_PLAIN_MIMETYPE = "text/plain";
 
     private final Logger log = LoggerFactory.getLogger(getClass());
-//    private final String bundleSymbolicName;
-//    protected final SentenceModel sentenceModel;
-//    protected final TokenNameFinderModel personNameModel;
-//    protected final TokenNameFinderModel locationNameModel;
-//    protected final TokenNameFinderModel organizationNameModel;
-//    protected Map<String,Object[]> entityTypes = new 
HashMap<String,Object[]>();
     private static Map<String,UriRef> entityTypes = new 
HashMap<String,UriRef>();
     static {
         entityTypes.put("person", OntologicalClasses.DBPEDIA_PERSON);
@@ -83,6 +81,10 @@ public class NEREngineCore implements En
     }
     
     private OpenNLP openNLP;
+
+    private final String defaultLang;
+
+    private final Set<String> processedLangs;
     
     /** Comments about our models */
     public static final Map<String, String> DATA_FILE_COMMENTS;
@@ -91,27 +93,39 @@ public class NEREngineCore implements En
         DATA_FILE_COMMENTS.put("Default data files", "provided by the 
org.apache.stanbol.defaultdata bundle");
     }
 
-    public NEREngineCore(OpenNLP openNLP) throws InvalidFormatException, 
IOException{
+    public NEREngineCore(OpenNLP openNLP, String defaultLanguage, Set<String> 
processedLanguages) throws InvalidFormatException, IOException{
         this.openNLP = openNLP;
-//        sentenceModel = openNLP.buildSentenceModel("en");
-//        personNameModel = buildNameModel("person", 
OntologicalClasses.DBPEDIA_PERSON);
-//        locationNameModel = buildNameModel("location", 
OntologicalClasses.DBPEDIA_PLACE);
-//        organizationNameModel = buildNameModel("organization", 
OntologicalClasses.DBPEDIA_ORGANISATION);
+        this.defaultLang = defaultLanguage;
+        this.processedLangs = Collections.unmodifiableSet(processedLanguages);
     }
     
-    NEREngineCore(DataFileProvider dfp) throws InvalidFormatException, 
IOException {
-        this(new OpenNLP(dfp));
+    NEREngineCore(DataFileProvider dfp,String defaultLanguage, Set<String> 
processedLanguages) throws InvalidFormatException, IOException {
+        this(new OpenNLP(dfp),defaultLanguage,processedLanguages);
     }
 
-    protected TokenNameFinderModel buildNameModel(String name, UriRef typeUri) 
throws IOException {
-        //String modelRelativePath = String.format("en-ner-%s.bin", name);
-        TokenNameFinderModel model = openNLP.getNameModel(name, "en");
-        // register the name finder instances for matching owl class
-//        entityTypes.put(name, new Object[] {typeUri, model});
-        return model;
-    }
+//    protected TokenNameFinderModel buildNameModel(String name, UriRef 
typeUri) throws IOException {
+//        //String modelRelativePath = String.format("en-ner-%s.bin", name);
+//        TokenNameFinderModel model = openNLP.getNameModel(name, "en");
+//        // register the name finder instances for matching owl class
+////        entityTypes.put(name, new Object[] {typeUri, model});
+//        return model;
+//    }
 
     public void computeEnhancements(ContentItem ci) throws EngineException {
+        //first check the langauge before processing the content (text)
+        String language = extractLanguage(ci);
+        if(language == null){
+            log.warn("Unable to extract Language for ContentItem {}: The text" 
+
+                    "of this ContentItem will not be processed by the NER 
engine!",
+                    ci.getUri());
+            return;
+        }
+        if(!isProcessedLangage(language)){
+            log.warn("The language {} of ContentItem {} is not configured to 
be" +
+                       "processed by this NER engine instance (processed {})!",
+                       new Object[]{language,ci.getUri(),processedLangs});
+            return;
+        }
         String mimeType = ci.getMimeType().split(";", 2)[0];
         String text;
         if (TEXT_PLAIN_MIMETYPE.equals(mimeType)) {
@@ -138,13 +152,16 @@ public class NEREngineCore implements En
             return;
         }
         log.debug("computeEnhancements {} text={}", 
ci.getUri().getUnicodeString(), StringUtils.abbreviate(text, 100));
-
         try {
             for (Map.Entry<String,UriRef> type : entityTypes.entrySet()) {
                 String typeLabel = type.getKey();
                 UriRef typeUri = type.getValue();
-                TokenNameFinderModel nameFinderModel = 
openNLP.getNameModel(typeLabel, "en");
-                findNamedEntities(ci, text, typeUri, typeLabel, 
nameFinderModel);
+                TokenNameFinderModel nameFinderModel = 
openNLP.getNameModel(typeLabel, language);
+                if(nameFinderModel == null){
+                    log.info("No NER Model for {} and language {} 
available!",typeLabel,language);
+                } else {
+                    findNamedEntities(ci, text, typeUri, typeLabel, 
nameFinderModel);
+                }
             }
         } catch (Exception e) {
             throw new EngineException(this, ci, e);
@@ -381,16 +398,19 @@ public class NEREngineCore implements En
     public int canEnhance(ContentItem ci) {
         // in case text/pain;charSet=UTF8 is parsed
         String mimeType = ci.getMimeType().split(";", 2)[0];
-        if (TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType)) {
-            return ENHANCE_SYNCHRONOUS;
-        }
-        // check for existence of textual content in metadata
-        UriRef subj = ci.getUri();
-        Iterator<Triple> it = ci.getMetadata().filter(subj, 
NIE_PLAINTEXTCONTENT, null);
-        if (it.hasNext()) {
-            return ENHANCE_SYNCHRONOUS;
+        if(TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType) || //plain test
+                //or extracted text
+                ci.getMetadata().filter(ci.getUri(), NIE_PLAINTEXTCONTENT, 
null).hasNext()){
+            //TODO: check if the language metadata are already present when
+            //canEnhance is called. If not than return ENHANCE_SYNCHRONOUS
+            if(isProcessedLangage(extractLanguage(ci))){
+                return ENHANCE_SYNCHRONOUS;
+            } else {
+                return CANNOT_ENHANCE;
+            }
+        } else { //no textual content available
+            return CANNOT_ENHANCE;
         }
-        return CANNOT_ENHANCE;
     }
 
     /**
@@ -413,4 +433,65 @@ public class NEREngineCore implements En
         }
         return new String(bytes, UTF8);
     }
+
+    /**
+     * The default language
+     * @return the defaultLang
+     */
+    public String getDefaultLanguage() {
+        return defaultLang;
+    }
+    /**
+     * Checks if the parsed language is enabled for processing.
+     * If <code>null</code> is parsed as language this returns 
<code>false</code>
+     * even if processing of all languages is enabled. <p>
+     * NOTE: If this Method returns <code>true</code> this does
+     * not mean that text with this language can be actually processed because 
this
+     * also requires that the NER model for this language are available via the
+     * parsed {@link OpenNLP} instance.
+     * @param lang the language
+     * @return the state
+     */
+    public boolean isProcessedLangage(String lang){
+        return lang != null && (processedLangs.isEmpty() || 
processedLangs.contains(lang));
+    }
+    /*
+     * The following Utility extracts the language from the metadata of the
+     * parsed Content Item.
+     * This Utility is actually a copy of the same form the 
KeywordExtractionEngine.
+     * TODO: change this to a global Utility as soon as STANBOL Enhancement
+     * Structure is defined
+     */
+    /**
+     * The literal representing the LangIDEngine as creator.
+     */
+    public static final Literal LANG_ID_ENGINE_NAME = 
LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
+    /**
+     * Extracts the language of the parsed ContentItem from the metadata
+     * @param ci the content item
+     * @return the language
+     */
+    private String extractLanguage(ContentItem ci) {
+        MGraph metadata = ci.getMetadata();
+        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+        if(langaugeEnhancementCreatorTriples.hasNext()){
+            String lang = EnhancementEngineHelper.getString(metadata, 
+                langaugeEnhancementCreatorTriples.next().getSubject(), 
+                Properties.DC_LANGUAGE);
+            if(lang != null){
+                return lang;
+            } else {
+                log.info("Unable to extract language for ContentItem %s! The 
Enhancement of the %s is missing the %s property",
+                    new 
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
+                log.info(" ... return '{}' as default",defaultLang);
+                return defaultLang;
+            }
+        } else {
+            log.warn("Unable to extract language for ContentItem %s! Is the %s 
active?",
+                
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+            log.warn(" ... return '{}' as default",defaultLang);
+            return defaultLang;
+        }
+    }
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
 Fri Jan  6 13:05:09 2012
@@ -17,35 +17,66 @@
 package org.apache.stanbol.enhancer.engines.opennlp.impl;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
-import 
org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.osgi.framework.ServiceRegistration;
+import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 
 /**
  * Apache Stanbol Enhancer Named Entity Recognition enhancement engine based 
on opennlp's Maximum Entropy
  * models.
  */
-@Component(immediate = true, metatype = true, 
+@Component(
+    metatype = true, 
+    immediate = true,
+    configurationFactory = true, 
+    policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+    specVersion = "1.1", 
     label = "%stanbol.NamedEntityExtractionEnhancementEngine.name", 
     description = 
"%stanbol.NamedEntityExtractionEnhancementEngine.description")
 @Service
[email protected](value={
+    
@Property(name=NamedEntityExtractionEnhancementEngine.PROCESSED_LANGUAGES,value=""),
+    
@Property(name=NamedEntityExtractionEnhancementEngine.DEFAULT_LANGUAGE,value="")}
+)
 public class NamedEntityExtractionEnhancementEngine implements 
EnhancementEngine, ServiceProperties {
 
     private EnhancementEngine engineCore;
     
     public static final String DEFAULT_DATA_OPEN_NLP_MODEL_LOCATION = 
"org/apache/stanbol/defaultdata/opennlp";
-    
+
+    /**
+     * Allows to define the default language assumed for parsed Content if no 
language
+     * detection is available. If <code>null</code> or empty this engine will 
not
+     * process content with an unknown language
+     */
+    public static final String DEFAULT_LANGUAGE = 
"stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage";
+    /**
+     * Allows to restrict the list of languages processed by this engine. if
+     * <code>null</code> or empty content of any language where a NER model is
+     * available via {@link OpenNLP} will be processed.<p>
+     * This property allows to configure multiple instances of this engine that
+     * do only process specific languages. The default is a single instance 
that
+     * processes all languages.
+     */
+    public static final String PROCESSED_LANGUAGES = 
"stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages";
+
     /**
      * The default value for the Execution of this Engine. Currently set to
      * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
@@ -57,9 +88,45 @@ public class NamedEntityExtractionEnhanc
     @Reference
     private OpenNLP openNLP;
     
-    protected void activate(ComponentContext ctx) throws IOException {
+    protected void activate(ComponentContext ctx) throws IOException, 
ConfigurationException {
         // Need to register the default data before loading the models
-        engineCore = new NEREngineCore(openNLP);
+        Object value = ctx.getProperties().get(DEFAULT_LANGUAGE);
+        final String defaultLanguage;
+        if(value != null && !value.toString().isEmpty()){
+            defaultLanguage = value.toString();
+        } else {
+            defaultLanguage = null;
+        }
+        value = ctx.getProperties().get(PROCESSED_LANGUAGES);
+        final Set<String> processedLanguages;
+        if(value instanceof String[]){
+            processedLanguages = new HashSet<String>(Arrays.asList((String[]) 
value));
+            processedLanguages.remove(null); //remove null
+            processedLanguages.remove(""); //remove empty
+        } else if (value instanceof Collection<?>){
+            processedLanguages = new HashSet<String>();
+            for(Object o : ((Collection<?>)value)){
+                if(o != null){
+                    processedLanguages.add(o.toString());
+                }
+            }
+            processedLanguages.remove(""); //remove empty
+        } else if(value != null && !value.toString().isEmpty()){
+            //if a single String is parsed we support ',' as seperator
+            String[] languageArray = value.toString().split(",");
+            processedLanguages = new 
HashSet<String>(Arrays.asList(languageArray));
+            processedLanguages.remove(null); //remove null
+            processedLanguages.remove(""); //remove empty
+        } else { //no configuration
+            processedLanguages = Collections.emptySet();
+        }
+        if(!processedLanguages.isEmpty() && defaultLanguage != null &&
+                !processedLanguages.contains(defaultLanguage)){
+            throw new ConfigurationException(PROCESSED_LANGUAGES, "The list 
of" +
+                       "processed Languages "+processedLanguages+" MUST 
CONTAIN the" +
+                       "configured default language '"+defaultLanguage+"'!");
+        }
+        engineCore = new NEREngineCore(openNLP, defaultLanguage, 
processedLanguages);
     }
 
     protected void deactivate(ComponentContext ctx) {
@@ -67,6 +134,7 @@ public class NamedEntityExtractionEnhanc
             dfpServiceRegistration.unregister();
             dfpServiceRegistration = null;
         }
+        engineCore = null;
     }
     
     @Override
@@ -92,4 +160,5 @@ public class NamedEntityExtractionEnhanc
             throw new IllegalStateException("EngineCore not initialized");
         }
     }
+
 }
\ No newline at end of file

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/resources/OSGI-INF/metatype/metatype.properties
 Fri Jan  6 13:05:09 2012
@@ -19,9 +19,20 @@
 # descriptions as used in the metatype.xml descriptor generated by the
 # the maven SCR plugin
 
-stanbol.NamedEntityExtractionEnhancementEngine.name = Apache Stanbol Named 
Entity Extraction engine
+stanbol.NamedEntityExtractionEnhancementEngine.name = Apache Stanbol 
Enhancement Engine for Named Entity Extraction
 stanbol.NamedEntityExtractionEnhancementEngine.description = Find names of 
people, organization, \
  places... using previously trained OpenNLP models.
 stanbol.opennlp.models.path.name = Path to folder holding OpenNLP models
 stanbol.opennlp.models.path.description = All OpenNLP files need to be in the 
same folder using \
  the same naming convention as the OpenNLP project in version 1.5 and 
following.
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage.name = Default 
Language
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage.description = 
Allows to configure \
+a language that is used as default if the language of the parsed content is 
not known. If \
+empty no default language is used.
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages.name = 
Languages
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages.description 
= Languages to process. \
+An empty text indicates that all languages are processed. Use ',' as separator 
for languages \
+(e.g. 'en,de' to enhance only English and German texts). \
+NOTE: This porperty can be used to configure multiple instances of this engine 
that \
+process only documents with specific languages. This might e.g. be useful to \
+enable/disable NER for specific languages.
\ No newline at end of file

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
 Fri Jan  6 13:05:09 2012
@@ -25,6 +25,7 @@ import static org.apache.stanbol.enhance
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -62,9 +63,11 @@ public class TestNamedEntityExtractionEn
     
     public static final String FAKE_BUNDLE_SYMBOLIC_NAME = 
"FAKE_BUNDLE_SYMBOLIC_NAME";
 
+    @SuppressWarnings("unchecked")
     @BeforeClass
     public static void setUpServices() throws IOException {
-        nerEngine = new NEREngineCore(new 
ClasspathDataFileProvider(FAKE_BUNDLE_SYMBOLIC_NAME));
+        nerEngine = new NEREngineCore(new 
ClasspathDataFileProvider(FAKE_BUNDLE_SYMBOLIC_NAME),
+            "en",Collections.EMPTY_SET);
     }
 
     public static ContentItem wrapAsContentItem(final String id,

Modified: 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
 (original)
+++ 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java
 Fri Jan  6 13:05:09 2012
@@ -63,7 +63,8 @@ public class HttpQueryHeaderPostTest ext
                 builder.buildUrl("/engines", 
                 "header_Accept",""))) //override the parse Accept Header
             .withHeader("Accept","text/turtle") //set Accept to turtle 
(overridden) 
-            .withContent("John Smith was born in London.")
+            .withContent("John Smith was born in London. But since ten years 
he " +
+                       "lives now in Paris.")
         )
         .assertStatus(200)
         //check for JSON-LD (the default content type

Modified: incubator/stanbol/trunk/launchers/full-war/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full-war/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/full-war/pom.xml Fri Jan  6 13:05:09 2012
@@ -112,6 +112,7 @@
             
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
             
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
             
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+            
<exclude>src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
           </excludes>
         </configuration>
       </plugin>

Modified: 
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README 
(original)
+++ incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/README 
Fri Jan  6 13:05:09 2012
@@ -21,3 +21,5 @@ resources/config/org.apache.stanbol.enti
 
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
 
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
 
resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
+

Added: 
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 (added)
+++ 
incubator/stanbol/trunk/launchers/full-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 Fri Jan  6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""

Modified: incubator/stanbol/trunk/launchers/full/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/full/pom.xml Fri Jan  6 13:05:09 2012
@@ -113,6 +113,7 @@
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+            
<exclude>src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
           </excludes>
         </configuration>
       </plugin>

Modified: incubator/stanbol/trunk/launchers/full/src/main/resources/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/src/main/resources/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/full/src/main/resources/README (original)
+++ incubator/stanbol/trunk/launchers/full/src/main/resources/README Fri Jan  6 
13:05:09 2012
@@ -21,5 +21,6 @@ resources/config/org.apache.stanbol.comm
 
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
 
resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config
 
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg
 

Added: 
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 (added)
+++ 
incubator/stanbol/trunk/launchers/full/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 Fri Jan  6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""

Added: 
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 (added)
+++ 
incubator/stanbol/trunk/launchers/kres/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 Fri Jan  6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""

Modified: incubator/stanbol/trunk/launchers/stable/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/pom.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/pom.xml (original)
+++ incubator/stanbol/trunk/launchers/stable/pom.xml Fri Jan  6 13:05:09 2012
@@ -111,6 +111,7 @@
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config</exclude>
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config</exclude>
             
<exclude>src/main/resources/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg</exclude>
+            
<exclude>src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config</exclude>
           </excludes>
         </configuration>
       </plugin>

Modified: incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml 
(original)
+++ incubator/stanbol/trunk/launchers/stable/src/main/bundles/list.xml Fri Jan  
6 13:05:09 2012
@@ -19,6 +19,14 @@
                List of initial bundles for the Stanbol Sling-based standalone 
launcher.
        -->
 <bundles>
+  <!-- General-purpose libraries -->
+  <startLevel level="10">
+        <bundle> <!-- used by langid -->
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>0.9</version>
+        </bundle>
+  </startLevel>
 
   <!-- *********************************************************************
           start level 20 TO 24 reserved for Stanbol Framework
@@ -72,12 +80,12 @@
 
   <!-- Stanbol Enhancer plug-ins (the Enhancement Engines) -->
   <startLevel level="25">
-    <!-- unstable
     <bundle>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engines.langid</artifactId>
       <version>0.9.0-incubating-SNAPSHOT</version>
     </bundle>
+    <!-- unstable
     <bundle>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engines.metaxa</artifactId>
@@ -119,6 +127,12 @@
       <artifactId>org.apache.stanbol.enhancer.engine.entitytagging</artifactId>
       <version>0.9.0-incubating-SNAPSHOT</version>
     </bundle>
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      
<artifactId>org.apache.stanbol.enhancer.engine.keywordextraction</artifactId>
+      <version>0.9.0-incubating-SNAPSHOT</version>
+    </bundle>
+    
   </startLevel>
 
   <!-- *********************************************************************

Modified: incubator/stanbol/trunk/launchers/stable/src/main/resources/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/resources/README?rev=1228163&r1=1228162&r2=1228163&view=diff
==============================================================================
--- incubator/stanbol/trunk/launchers/stable/src/main/resources/README 
(original)
+++ incubator/stanbol/trunk/launchers/stable/src/main/resources/README Fri Jan  
6 13:05:09 2012
@@ -20,4 +20,5 @@ resources/config/org.apache.stanbol.exam
 
resources/config/org.apache.stanbol.entityhub.core.impl.EntityhubConfigurationImpl.config
 
resources/config/org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine-local.config
 
resources/config/org.apache.stanbol.entityhub.yard.solr.impl.SolrYard-entityhub.config
-resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
\ No newline at end of file
+resources/config/org.apache.stanbol.commons.solr.web.impl.SolrDispatchFilterComponent-default.config
+resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config

Added: 
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config?rev=1228163&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 (added)
+++ 
incubator/stanbol/trunk/launchers/stable/src/main/resources/resources/config/org.apache.stanbol.enhancer.engines.opennlp.impl.NamedEntityExtractionEnhancementEngine-default.config
 Fri Jan  6 13:05:09 2012
@@ -0,0 +1,2 @@
+stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages=""
+stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage=""


Reply via email to