Author: rwesten
Date: Wed Sep 19 13:43:19 2012
New Revision: 1387596
URL: http://svn.apache.org/viewvc?rev=1387596&view=rev
Log:
STANBOL-733: all Engines now support a default instance; added a nlp-chain
configuration to the launcher; LanguageConfiguration utility now supports
parameter (currently used to optionally parse the name of the OpenNLP model)
Added:
incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/data/sentiment/sentiwordnet/src/main/resources/org/apache/stanbol/data/sentiment/sentiwordnet/
(props changed)
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
Propchange:
incubator/stanbol/branches/stanbol-nlp-processing/data/sentiment/sentiwordnet/src/main/resources/org/apache/stanbol/data/sentiment/sentiwordnet/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Sep 19 13:43:19 2012
@@ -0,0 +1,3 @@
+LICENSE.SentiWordNet
+
+SentiWordNet_3.0.0_20120206.txt
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
Wed Sep 19 13:43:19 2012
@@ -43,6 +43,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -58,7 +59,8 @@ import org.slf4j.LoggerFactory;
public class Nlp2RdfMetadataEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
private final Logger log =
LoggerFactory.getLogger(Nlp2RdfMetadataEngine.class);
-
+ //TODO: replace this with a reald ontology
+ private final static UriRef SENTIMENT_PROPERTY = new
UriRef(NamespaceEnum.fise+"sentiment-value");
private final LiteralFactory lf = LiteralFactory.getInstance();
/**
@@ -159,10 +161,16 @@ public class Nlp2RdfMetadataEngine exten
writePos(metadata, span, current);
writePhrase(metadata, span, current);
//OlIA does not include Sentiments
-// Value<SentimentTag> sentiment =
span.getAnnotation(NlpAnnotations.sentimentAnnotation);
-// if(sentiment != null){
-//
-// }
+
+ Value<SentimentTag> sentiment =
span.getAnnotation(NlpAnnotations.sentimentAnnotation);
+ if(sentiment != null){
+ double sentimentVal = sentiment.probability();
+ if(sentiment.value().isNegative()) {
+ sentimentVal = sentimentVal * -1;
+ }
+ metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY,
+ lf.createTypedLiteral(sentimentVal)));
+ }
}
} finally {
ci.getLock().writeLock().unlock();
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
Wed Sep 19 13:43:19 2012
@@ -60,12 +60,14 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -81,13 +83,19 @@ import org.slf4j.LoggerFactory;
* The noun phrase detector requires a {@link
org.apache.stanbol.enhancer.engines.opennlp.pos.model.POSContentPart} to
* be present in the content item and will extend each {@link
org.apache.stanbol.enhancer.engines.opennlp.pos.model.POSSentence}
* with an array of chunks.
- * <p/>
- * Author: Sebastian Schaffert
+ *
+ * @author Sebastian Schaffert
*/
-@Component(immediate = true, metatype = true, configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE)
+@Component(immediate = true, metatype = true,
+ configurationFactory = true, //allow multiple instances to be configured
+ policy = ConfigurationPolicy.OPTIONAL) //create the default instance with
the default config
@Service
@Properties(value={
- @Property(name= EnhancementEngine.PROPERTY_NAME,value="chunker")
+
@Property(name=EnhancementEngine.PROPERTY_NAME,value="opennlp-chunker"),
+ @Property(name=ChunkingEngine.CONFIG_LANGUAGES,
+ value =
{"de;model=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"}),
+ @Property(name=ChunkingEngine.MIN_CHUNK_SCORE),
+ @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
public class ChunkingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
@@ -95,19 +103,18 @@ public class ChunkingEngine extends Abst
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
* are the languages given as default value.
*/
- @Property(value = {"de;OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"})
public static final String CONFIG_LANGUAGES =
"org.apache.stanbol.enhancer.chunker.languages";
- @Property
public static final String MIN_CHUNK_SCORE =
"org.apache.stanbol.enhancer.chunker.minScore";
public static final String[] AVAILABLE_LANGUAGES = new String[]
{"en","de"};
+ private static final String MODEL_PARAM_NAME = "model";
private static Logger log = LoggerFactory.getLogger(ChunkingEngine.class);
-
- private Double minChunkScore;
+ private LanguageConfiguration languageConfiguration = new
LanguageConfiguration(CONFIG_LANGUAGES,
+ new String
[]{"de;"+MODEL_PARAM_NAME+"=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"});
@Reference
private OpenNLP openNLP;
@@ -117,23 +124,7 @@ public class ChunkingEngine extends Abst
*/
private PhraseTagSetRegistry tagSetRegistry =
PhraseTagSetRegistry.getInstance();
- /**
- * Holds as key explicitly enabled languages and as value the name of the
- * OpenNLP model used for Chunking. If the value is <code>null</code> this
- * indicates that the default model (
- * provided by {@link OpenNLP#getChunkerModel(String)}) will be used.<p>
- * NOTE: a configured language does not automatically mean that also the
- * requested model is available.
- */
- private Map<String,String> configuredLanguages;
- /**
- * Languages that are explicitly excluded
- */
- private Set<String> excludedLanguages;
- /**
- * if '*' is used as language configuration
- */
- private boolean allowAll;
+ private Double minChunkScore;
/**
@@ -378,62 +369,12 @@ public class ChunkingEngine extends Abst
}
//read the language configuration
- configuredLanguages = new HashMap<String,String>();
- excludedLanguages = new HashSet<String>();
- allowAll = false;
- if(properties.get(CONFIG_LANGUAGES) != null) {
- String[] languages = (String[])properties.get(CONFIG_LANGUAGES);
-
- for(String lang : languages) {
- String modelName;
- int seperatorIndex = lang.indexOf(';');
- if(seperatorIndex >= 0){
- if(seperatorIndex <lang.length()-2){
- modelName = lang.substring(seperatorIndex+1).trim();
- } else {
- modelName = null;
- }
- lang = lang.substring(0, seperatorIndex).trim();
- } else {
- modelName = null;
- }
- if(lang.charAt(0) == '!'){ //exclude
- lang = lang.substring(1);
- if(configuredLanguages.containsKey(lang)){
- throw new ConfigurationException(CONFIG_LANGUAGES,
- "Langauge '"+lang+"' is both included and excluded
(config: "
- + Arrays.toString(languages)+"");
- }
- excludedLanguages.add(lang);
- if(modelName != null){
- log.warn("Parsed model names are ignored for excluded
languages "
- + "(langauge: {}, modelName: {})!",
lang,modelName);
- }
- } else if("*".equals(lang)){
- allowAll = true;
- if(modelName != null){
- log.warn("A parsed model name is ignored for the
wildcard "
- + "langauge (modelName: {})!", lang,modelName);
- }
- } else if(!lang.isEmpty()){
- if(excludedLanguages.contains(lang)){
- throw new ConfigurationException(CONFIG_LANGUAGES,
- "Langauge '"+lang+"' is both included and excluded
(config: "
- + Arrays.toString(languages)+"");
- }
- configuredLanguages.put(lang,modelName);
- }
- }
- } else {
- allowAll = true;
- }
+ languageConfiguration.setConfiguration(properties);
}
@Deactivate
protected void deactivate(ComponentContext context){
- this.allowAll = false;
- this.configuredLanguages = null;
- this.excludedLanguages = null;
+ this.languageConfiguration.setDefault();
this.minChunkScore = null;
super.deactivate(context);
}
@@ -450,9 +391,7 @@ public class ChunkingEngine extends Abst
* language is not configured as beeing processed.
*/
boolean isLangaugeConfigured(String language, boolean exception){
- boolean state = allowAll ?
- (!excludedLanguages.contains(language)) :
- configuredLanguages.containsKey(language);
+ boolean state = languageConfiguration.isLanguage(language);
if(!state && exception){
throw new IllegalStateException("Language "+language+" is not
included "
+ "by the LanguageConfiguration of this engine (name "+
getName()
@@ -465,7 +404,7 @@ public class ChunkingEngine extends Abst
private ChunkerME initChunker(String language) {
isLangaugeConfigured(language, true); //check if the parsed language
is ok
- String modelName = configuredLanguages.get(language);
+ String modelName = languageConfiguration.getParameter(language,
MODEL_PARAM_NAME);
ChunkerModel model;
try {
if(modelName == null){ // the default model
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
Wed Sep 19 13:43:19 2012
@@ -76,6 +76,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -85,14 +86,18 @@ import org.slf4j.LoggerFactory;
* A german language POS tagger. Requires that the content item has a
text/plain part and a
* language id of "de". Adds a POSContentPart to the content item that can be
used for further
* processing by other modules.
- * <p/>
- * Author: Sebastian Schaffert
+ *
+ * @author Sebastian Schaffert
*/
-@Component(immediate = true, metatype = true, configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE)
+@Component(immediate = true, metatype = true,
+ configurationFactory = true, //allow multiple instances
+ policy = ConfigurationPolicy.OPTIONAL) //create a default instance with
the default configuration
@Service
@Properties(value={
- @Property(name= EnhancementEngine.PROPERTY_NAME,value="pos-tagger")
+ @Property(name= EnhancementEngine.PROPERTY_NAME,value="opennlp-pos"),
+ @Property(name=POSTaggingEngine.CONFIG_LANGUAGES, value =
{"*"},cardinality=Integer.MAX_VALUE),
+ @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
public class POSTaggingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
@@ -101,17 +106,18 @@ public class POSTaggingEngine extends Ab
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
* are the languages given as default value.
*/
- @Property(value = {"en","de","da","es","sv","pt","nl"})
public static final String CONFIG_LANGUAGES =
"org.apache.stanbol.enhancer.pos.languages";
-
- public static final String[] AVAILABLE_LANGUAGES = new String[]
{"en","de","da","es","sv","pt","nl"};
+ /**
+ * The parameter name used to configure the name of the OpenNLP model used
for pos tagging
+ */
+ private static final String MODEL_NAME_PARAM = "model";
private static Logger log =
LoggerFactory.getLogger(POSTaggingEngine.class);
//Langauge configuration
- private LanguageConfiguration languageConfig = new
LanguageConfiguration(CONFIG_LANGUAGES, AVAILABLE_LANGUAGES);
+ private LanguageConfiguration languageConfig = new
LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"});
// private Set<String> configuredLanguages;
// private Set<String> excludedLanguages;
// private boolean allowAll;
@@ -391,8 +397,14 @@ public class POSTaggingEngine extends Ab
return null;
}
private POSTagger getPOSTagger(String language) {
+ String modelName =
languageConfig.getParameter(language,MODEL_NAME_PARAM);
try {
- POSModel model = openNLP.getPartOfSpeachModel(language);
+ POSModel model;
+ if(modelName == null){ //use the default
+ model = openNLP.getPartOfSpeachModel(language);
+ } else {
+ model = openNLP.getModel(POSModel.class, modelName, null);
+ }
if(model != null) {
log.debug("POS Tagger Model {} for lanugage '{}' version: {}",
new Object[]{model.getClass().getSimpleName(),
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
Wed Sep 19 13:43:19 2012
@@ -110,7 +110,10 @@ public class SentiWSComponent {
}
//all resources available ... start the service
if(loadedSentiWsFiles.equals(sentiWsFileNames)){
+ log.info("register Sentiment Classifier for SentiWs (german)");
registerService();
+ } else {
+ log.info("loaded {} (required:
{})",loadedSentiWsFiles,sentiWsFileNames);
}
//remove registration
return true;
@@ -142,7 +145,7 @@ public class SentiWSComponent {
Dictionary<String,Object> serviceProperties = new
Hashtable<String,Object>();
serviceProperties.put("language", "de"); //set the language
BundleContext bc = bundleContext;
- if(bc != null){
+ if(bc != null && sentiWsClassifierService == null){
sentiWsClassifierService = bc.registerService(
SentimentClassifier.class.getName(), sentiWsClassifier,
serviceProperties);
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
Wed Sep 19 13:43:19 2012
@@ -54,6 +54,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -87,25 +88,30 @@ import java.util.Set;
* <p/>
* Author: Sebastian Schaffert
*/
-@Component(immediate = true, metatype = true, configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE)
+@Component(immediate = true, metatype = true,
+ configurationFactory = true, //allow multiple instances
+ policy = ConfigurationPolicy.OPTIONAL) //create a default instance with
the default configuration
@Service
@Properties(value={
- @Property(name= EnhancementEngine.PROPERTY_NAME,value="sentiment")
+ @Property(name=
EnhancementEngine.PROPERTY_NAME,value="sentiment-wordclassifier"),
+
@Property(name=SentimentEngine.CONFIG_LANGUAGES,value={SentimentEngine.DEFAULT_LANGUAGE_CONFIG}),
+ @Property(name=SentimentEngine.CONFIG_ADJECTIVES,
+ boolValue=SentimentEngine.DEFAULT_PROCESS_ADJECTIVES_ONLY),
+ @Property(name=SentimentEngine.CONFIG_MIN_POS_CONFIDENCE,
+ doubleValue = SentimentEngine.DEFAULT_MIN_POS_CONFIDNECE),
+ @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
-
public class SentimentEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
/**
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
* are the languages given as default value.
*/
- @Property(value={SentimentEngine.DEFAULT_LANGUAGE_CONFIG})
public static final String CONFIG_LANGUAGES =
"org.apache.stanbol.enhancer.sentiment.languages";
/**
* When set to true, only adjectives and nouns will be considered in
sentiment analysis.
*/
- @Property(boolValue = SentimentEngine.DEFAULT_PROCESS_ADJECTIVES_ONLY )
public static final String CONFIG_ADJECTIVES =
"org.apache.stanbol.enhancer.sentiment.adjectives";
/**
* POS tags that are not selected by {@link
SentimentClassifier#isAdjective(PosTag)}
@@ -114,11 +120,8 @@ public class SentimentEngine extends Ab
* that Words that do have a suitable TAG are still considered if the
* confidence of the fitting tag is >= {min-pos-confidence}/2
*/
- @Property(doubleValue = SentimentEngine.DEFAULT_MIN_POS_CONFIDNECE)
public static final String CONFIG_MIN_POS_CONFIDENCE =
"org.apache.stanbol.enhancer.sentiment.min-pos-confidence";
- @Property(boolValue=true)
- public static final String DEBUG_SENTIMENTS = "debug";
boolean debugSentiments;
public static final String DEFAULT_LANGUAGE_CONFIG = "*";
@@ -131,9 +134,9 @@ public class SentimentEngine extends Ab
* {@link LexicalCategory#Noun Noun} if {@link #CONFIG_ADJECTIVES} is
* deactivated) - default: 0.8<p>
*/
- private static final double DEFAULT_MIN_POS_CONFIDNECE = 0.8;
+ public static final double DEFAULT_MIN_POS_CONFIDNECE = 0.8;
- private static final boolean DEFAULT_PROCESS_ADJECTIVES_ONLY = false;
+ public static final boolean DEFAULT_PROCESS_ADJECTIVES_ONLY = false;
private static Logger log = LoggerFactory.getLogger(SentimentEngine.class);
@@ -156,10 +159,12 @@ public class SentimentEngine extends Ab
protected void bindClassifier(SentimentClassifier classifier){
log.info(" ... bind Sentiment Classifier {} for language {}",
classifier.getClass().getSimpleName(),classifier.getLanguage());
- SentimentClassifier old = classifiers.put(classifier.getLanguage(),
classifier);
- if(old != null){
- log.warn("Replaced Sentiment Classifier for language {} (old: {},
new: {}",
- new Object[]{old.getLanguage(),old,classifier});
+ synchronized (classifiers) {
+ SentimentClassifier old =
classifiers.put(classifier.getLanguage(), classifier);
+ if(old != null){
+ log.warn("Replaced Sentiment Classifier for language {} (old:
{}, new: {}",
+ new Object[]{old.getLanguage(),old,classifier});
+ }
}
}
/** unbind method for {@link #classifiers} */
@@ -285,41 +290,6 @@ public class SentimentEngine extends Ab
// } finally {
// ci.getLock().writeLock().unlock();
// }
-// if(debugSentiments){
-// Iterator<Sentence> sentences = analysedText.getSentences();
-// if(sentences.hasNext()){
-// while(sentences.hasNext()){
-// Sentence sent = sentences.next();
-// log.info("Sentence: {}", sent.getSpan());
-// tokens = sent.getTokens();
-// double positive = 0.0;
-// double negaitve = 0.0;
-// while (tokens.hasNext()){
-// Token token = tokens.next();
-// Value<SentimentTag> sentiment =
token.getAnnotation(NlpAnnotations.sentimentAnnotation);
-// if(sentiment != null){
-// if(sentiment.value().isPositive()){
-// positive = positive+sentiment.probability();
-// } else {
-// negaitve = negaitve+sentiment.probability();
-// }
-// Value<PosTag> posTag =
token.getAnnotation(NlpAnnotations.POSAnnotation);
-// log.info(" - {} '{}'[{}] - value: {}",
-// new Object []{
-//
sentiment.value().isPositive()?"positive":"negative",
-// token.getSpan(),
-// posTag != null ? posTag.value(): "POS
unknown",
-// sentiment.probability()
-// });
-// }
-// }
-// log.info(" > positive: {} | negative: {} | sum: {}",
-// new Object []{positive, negaitve, (positive -
negaitve)});
-// }
-// } else {
-//
-// }
-// }
}
@@ -365,12 +335,6 @@ public class SentimentEngine extends Ab
"The configured minimum POS confidence value '"
+minPOSConfidence+"' MUST BE > 0 and < 1!");
}
-
- //TODO: just for testing
- value = properties.get(DEBUG_SENTIMENTS);
- debugSentiments = value instanceof Boolean ? (Boolean)value :
- value != null ? Boolean.parseBoolean(value.toString()) :
- false;
}
@Deactivate
Modified:
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java?rev=1387596&r1=1387595&r2=1387596&view=diff
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
(original)
+++
incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
Wed Sep 19 13:43:19 2012
@@ -4,7 +4,9 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
import org.osgi.service.cm.ConfigurationException;
@@ -27,10 +29,12 @@ import org.osgi.service.cm.Configuration
*/
public class LanguageConfiguration {
+ private static final Map<String,String> EMPTY_PARAMS =
Collections.emptyMap();
+
private final String property;
private final Collection<String> defaultConfig;
//Langauge configuration
- private Set<String> configuredLanguages = new HashSet<String>();
+ private Map<String,Map<String,String>> configuredLanguages = new
HashMap<String,Map<String,String>>();
private Set<String> excludedLanguages = new HashSet<String>();
private boolean allowAll;
@@ -88,10 +92,13 @@ public class LanguageConfiguration {
if(value == null){
continue; //ignore null values
}
- String lang = value.toString().trim().toLowerCase();
+ String line = value.toString().trim();
+ int sepIndex = line.indexOf(';');
+ String lang = sepIndex < 0 ? line : line.substring(0,
sepIndex).trim();
+ lang = lang.toLowerCase();
if(lang.charAt(0) == '!'){ //exclude
lang = lang.substring(1);
- if(configuredLanguages.contains(lang)){
+ if(configuredLanguages.containsKey(lang)){
throw new ConfigurationException(property,
"Langauge '"+lang+"' is both included and excluded
(config: "
+ config+")");
@@ -105,10 +112,47 @@ public class LanguageConfiguration {
"Langauge '"+lang+"' is both included and excluded
(config: "
+ config+")");
}
- configuredLanguages.add(lang);
+ if(sepIndex >= 0){
+
+ }
+ configuredLanguages.put(lang,sepIndex >= 0 && sepIndex <
line.length()-2 ?
+ parseParameters(line.substring(sepIndex,
line.length()).trim()) :
+ EMPTY_PARAMS);
+ }
+ }
+ }
+ /**
+ * Parses optional parameters
<code>{key}[={value}];{key2}[={value2}]</code>. Using
+ * the same key multiple times will override the previouse value
+ * @param paramString
+ * @return
+ * @throws ConfigurationException
+ */
+ private Map<String,String> parseParameters(String paramString) throws
ConfigurationException {
+ Map<String,String> params = new HashMap<String,String>();
+ for(String param : paramString.split(";")){
+ param = param.trim();
+ int equalsPos = param.indexOf('=');
+ if(equalsPos == 0){
+ throw new ConfigurationException(property,
+ "Parameter '"+param+"' has empty key!");
+ }
+ String key = equalsPos > 0 ? param.substring(0, equalsPos).trim()
: param;
+ String value;
+ if(equalsPos > 0){
+ if(equalsPos < param.length()-2) {
+ value = param.substring(equalsPos+1).trim();
+ } else {
+ value = "";
+ }
+ } else {
+ value = null;
}
+ params.put(key, value);
}
+ return params.isEmpty() ? EMPTY_PARAMS :
Collections.unmodifiableMap(params);
}
+
/**
* Checks if the parsed language is included in the configuration
* @param language the language
@@ -117,9 +161,19 @@ public class LanguageConfiguration {
public boolean isLanguage(String language){
return allowAll ?
(!excludedLanguages.contains(language)) :
- configuredLanguages.contains(language);
+ configuredLanguages.containsKey(language);
}
/**
+ * Returns parsed parameters if <code>{@link #isLanguage(String)} ==
true</code>
+ * @param language the language
+ * @return the parameters or <code>null</code> if none or the parsed
language
+ * is not active.
+ */
+ public Map<String,String> getParameters(String language){
+ return isLanguage(language) ? configuredLanguages.get(language) : null;
+ }
+
+ /**
* Resets the configuration to the default (as parsed in the constructor)
*/
public void setDefault() {
@@ -130,6 +184,18 @@ public class LanguageConfiguration {
// within the constructor
}
}
+ /**
+ * Returns the value of the parameter for the language (if present and the
+ * langage is active)
+ * @param language the language
+ * @param paramName the name of the param
+ * @return the param or <code>null</code> if not present OR the language
+ * is not active.
+ */
+ public String getParameter(String language, String paramName) {
+ Map<String,String> params = getParameters(language);
+ return params == null ? null : params.get(paramName);
+ }
}
Added:
incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config?rev=1387596&view=auto
==============================================================================
---
incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config
(added)
+++
incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config
Wed Sep 19 13:43:19 2012
@@ -0,0 +1,2 @@
+stanbol.enhancer.chain.name="nlp-processing"
+stanbol.enhancer.chain.list.enginelist=["langdetect","opennlp-pos","opennlp-chunker","sentiment-wordclassifier","nlp2rdf"]