Author: rwesten
Date: Thu Oct 17 10:48:59 2013
New Revision: 1533038
URL: http://svn.apache.org/r1533038
Log:
STANBOL-1070: typeMapping configuration is now ignored. The engine uses
dc:types of the initial mention; also removed the unused default language
configuration from the @Property annotations
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1533038&r1=1533037&r2=1533038&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
Thu Oct 17 10:48:59 2013
@@ -39,6 +39,7 @@ import static org.apache.stanbol.enhance
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText;
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
import static
org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
+import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CONTRIBUTOR;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
@@ -54,6 +55,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
@@ -131,7 +133,6 @@ import org.slf4j.LoggerFactory;
@org.apache.felix.scr.annotations.Properties(value={
@Property(name=PROPERTY_NAME),
@Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
- @Property(name=MIN_SEARCH_TOKEN_LENGTH,
intValue=DEFAULT_MIN_SEARCH_TOKEN_LENGTH),
@Property(name=PROCESS_ONLY_PROPER_NOUNS_STATE,
boolValue=DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
@Property(name=PROCESSED_LANGUAGES,
cardinality=Integer.MAX_VALUE,
@@ -139,15 +140,7 @@ import org.slf4j.LoggerFactory;
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not
support ProperNouns
"nl;lc=Noun"}), //same for Dutch
- @Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
- @Property(name=TYPE_MAPPINGS,cardinality=Integer.MAX_VALUE, value={
- "dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization >
dbp-ont:Organisation",
- "dbp-ont:Person; foaf:Person; schema:Person > dbp-ont:Person",
- "dbp-ont:Place; schema:Place > dbp-ont:Place",
- "dbp-ont:Work; schema:CreativeWork > dbp-ont:Work",
- "dbp-ont:Event; schema:Event > dbp-ont:Event",
- "schema:Product > schema:Product",
- "skos:Concept > skos:Concept"}),
+ //@Property(name=DEFAULT_MATCHING_LANGUAGE,value=""), //will only be used
when adding alt label support
@Property(name=SERVICE_RANKING,intValue=0)
})
@Service(value=EnhancementEngine.class)
@@ -206,6 +199,12 @@ public class EntityCoMentionEngine exten
linkerConfig.setMinMatchScore( //labelScore * token match factor
linkerConfig.getMinLabelScore()*linkerConfig.getMinTokenMatchFactor());
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.IGNORE);
+ //remove all type mappings
+ linkerConfig.setDefaultDcType(null);
+ Set<UriRef> mappedUris = new
HashSet<UriRef>(linkerConfig.getTypeMappings().keySet());
+ for(UriRef mappedUri : mappedUris){
+ linkerConfig.setTypeMapping(mappedUri.getUnicodeString(), null);
+ }
//get the metadata later set to the enhancement engine
}
/**
@@ -267,10 +266,6 @@ public class EntityCoMentionEngine exten
} finally {
ci.getLock().writeLock().unlock();
}
- log.info("Found co-mentions:");
- for(LinkedEntity linkedEntity :
entityLinker.getLinkedEntities().values()){
- log.info(" > {}",linkedEntity);
- }
}
private void writeComentions(ContentItem ci,Collection<LinkedEntity>
comentions, String language) {
@@ -280,8 +275,10 @@ public class EntityCoMentionEngine exten
}
MGraph metadata = ci.getMetadata();
-
+
+ log.debug("Write Co-Mentions:");
for(LinkedEntity comention : comentions){
+ log.debug(" > {}",comention);
//URIs of TextAnnotations for the initial mention of this
co-mention
Collection<UriRef> initialMentions = new
ArrayList<UriRef>(comention.getOccurrences().size());
for(Suggestion suggestion : comention.getSuggestions()){
@@ -352,7 +349,13 @@ public class EntityCoMentionEngine exten
ENHANCER_CONFIDENCE, Double.class, literalFactory);
}
//now process initial mention(s) for the co-mention
+ Set<UriRef> dcTypes = new HashSet<UriRef>();
for(UriRef initialMention : initialMentions){
+ //get the dc:type(s) of the initial mentions
+ Iterator<UriRef> dcTypesIt = getReferences(metadata,
initialMention, DC_TYPE);
+ while(dcTypesIt.hasNext()){
+ dcTypes.add(dcTypesIt.next());
+ }
//check confidence of the initial one
Double confidnece =
EnhancementEngineHelper.get(metadata, initialMention,
ENHANCER_CONFIDENCE, Double.class, literalFactory);
@@ -363,16 +366,8 @@ public class EntityCoMentionEngine exten
maxConfidence = confidnece;
}
}
- //add suggestions of the initial mention
- Set<Resource> values = new HashSet<Resource>();
- for(Iterator<Triple> suggestions =
metadata.filter(initialMention, DC_TYPE, null); suggestions.hasNext();){
- values.add(suggestions.next().getObject());
- }
- for(Resource dcType : values){
- metadata.add(new TripleImpl(textAnnotation,
DC_TYPE, dcType));
- }
- values.clear();
//add the suggestions of the initial mention to this
one
+ Set<Resource> values = new HashSet<Resource>();
for(Iterator<Triple> suggestions =
metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext();){
values.add(suggestions.next().getSubject());
}
@@ -384,6 +379,14 @@ public class EntityCoMentionEngine exten
metadata.add(new TripleImpl(textAnnotation,
DC_RELATION, initialMention));
//metadata.add(new TripleImpl(initialMention,
DC_RELATION, textAnnotation));
}
+ //finally add the collected dc:types of initial mentions
to the textAnnotation
+ Iterator<UriRef> existingDcTypesIt =
getReferences(metadata, textAnnotation, DC_TYPE);
+ while(existingDcTypesIt.hasNext()){ //do not add existing
+ dcTypes.remove(existingDcTypesIt.next());
+ }
+ for(UriRef dcType : dcTypes){ //add missing
+ metadata.add(new TripleImpl(textAnnotation, DC_TYPE,
dcType));
+ }
//TODO: support also Entities
if(maxConfidence != null){ //set the confidence value (if
known)
EnhancementEngineHelper.set(metadata, textAnnotation,
ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1533038&r1=1533037&r2=1533038&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
Thu Oct 17 10:48:59 2013
@@ -58,11 +58,3 @@ used in addition to the language detecte
configuration is an empty string to search for labels without any language
defined, but for some data \
sets (such as DBpedia.org) that add languages to any labels it might improve
resuls to change this \
configuration (e.g. to 'en' in the case of DBpedia.org).
-
-enhancer.engines.linking.typeMappings.name=Type Mappings
-enhancer.engines.linking.typeMappings.description=This allows to add \
-additional entity-type > text-annotation-type mappings. Such mappings are used
to determine the \
-'dc:type' value of the 'fise:TextAnnotation' created for extracted entities.
Usage: \
-variant (a) '{uri}' short for {uri} > {uri} or (b)
'{source1};{source2};..;{sourceN} > {target}'. \
-Note that a {source} may be only mapped to a single {target}. Multiple
{source} types \
-can be mapped to the same {target}.