Author: rwesten
Date: Sun Nov 24 11:42:04 2013
New Revision: 1544960

URL: http://svn.apache.org/r1544960
Log:
STANBOL-1219: merged implementation to the trunk

Modified:
    
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
    
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties

Modified: 
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1544960&r1=1544959&r2=1544960&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
 (original)
+++ 
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
 Sun Nov 24 11:42:04 2013
@@ -140,13 +140,26 @@ import org.slf4j.LoggerFactory;
                "de;uc=MATCH", //in German all Nouns are upper case
                "es;lc=Noun", //the OpenNLP POS tagger for Spanish does not 
support ProperNouns
                "nl;lc=Noun"}), //same for Dutch 
-    //@Property(name=DEFAULT_MATCHING_LANGUAGE,value=""), //will only be used 
when adding alt label support
+    @Property(name=EntityCoMentionEngine.ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+       doubleValue=EntityCoMentionEngine.DEFAULT_CONFIDENCE_ADJUSTEMENT), 
     @Property(name=SERVICE_RANKING,intValue=0)
 })
 @Service(value=EnhancementEngine.class)
 public class EntityCoMentionEngine extends 
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements 
ServiceProperties {
 
+       /**
+        * Property used to configure if/how confidence values of existing 
suggestions
+        * are modified if a co-mention is detected for a 
fise:TextAnnotation.<p>
+        * Values MUST be in the range [0..1) the 
+        * {@link #DEFAULT_CONFIDENCE_ADJUSTEMENT default} is <code>0.33</code> 
<p>
+        * Added with <a 
href="https://issues.apache.org/jira/browse/STANBOL-1219";>STANBOL-1219</a>
+        */
+       public static final String ADJUST_EXISTING_SUGGESTION_CONFIDENCE = 
"enhancer.engines.comention.adjustExistingConfidence";
     /**
+     * Default value for {@link #ADJUST_EXISTING_SUGGESTION_CONFIDENCE}
+     */
+       public static final double DEFAULT_CONFIDENCE_ADJUSTEMENT = 0.33;
+       /**
      * first of the post processing engines (note STANBOL-1218)
      */
     private static final Integer ENGINE_ORDERING = 
ServiceProperties.ORDERING_POST_PROCESSING + 80;
@@ -166,6 +179,8 @@ public class EntityCoMentionEngine exten
     @Reference 
     protected LabelTokenizer labelTokenizer; 
 
+    private double confidenceAdjustmentFactor;
+    
 //    private BundleContext bundleContext;
     /**
      * EntityLinking configuration used for Co-Mention extractions
@@ -208,6 +223,29 @@ public class EntityCoMentionEngine exten
         for(UriRef mappedUri : mappedUris){
             linkerConfig.setTypeMapping(mappedUri.getUnicodeString(), null);
         }
+        //parse confidence adjustment value (STANBOL-1219)
+        Object value = properties.get(ADJUST_EXISTING_SUGGESTION_CONFIDENCE);
+        final double confidenceAdjustment;
+        if(value == null){
+               confidenceAdjustment = DEFAULT_CONFIDENCE_ADJUSTEMENT;
+        } else if(value instanceof Number){
+               confidenceAdjustment = ((Number)value).doubleValue();
+        } else {
+               try {
+                       confidenceAdjustment = 
Double.parseDouble(value.toString());
+               } catch (NumberFormatException e){
+                       throw new 
ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE, 
+                                       "The confidence adjustement value for 
existing suggestions "
+                                       + "MUST BE a double value in the range 
[0..1)", e);
+               }
+        }
+        if(confidenceAdjustment < 0 || confidenceAdjustment >= 1){
+               throw new 
ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE, 
+                               "The confidence adjustement value for existing 
suggestions "
+                               + "MUST BE a double value in the range [0..1) 
(parsed: "
+                               + confidenceAdjustment +")!");
+        }
+        confidenceAdjustmentFactor = 1 - confidenceAdjustment;
         //get the metadata later set to the enhancement engine
     }
     /**
@@ -330,6 +368,7 @@ public class EntityCoMentionEngine exten
                 if(!ignore){
                     //collect confidence values of co-mentions
                     Double maxConfidence = null;
+                    Double maxExistingConfidence = null;
                     if(textAnnotation == null){ //not found ... create a new 
TextAnnotation for the co-mention
                         textAnnotation = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
                         metadata.add(new TripleImpl(textAnnotation, 
@@ -369,6 +408,26 @@ public class EntityCoMentionEngine exten
                                 maxConfidence = confidnece;
                             }
                         }
+                        Map<NonLiteral, Double> existingSuggestions = new 
HashMap<NonLiteral,Double>();
+                       if(maxConfidence != null && confidenceAdjustmentFactor 
< 1){
+                               //adapt confidence of existing annotations
+                               for(Iterator<Triple> esIt = 
metadata.filter(null, DC_RELATION, textAnnotation);esIt.hasNext();){
+                                       NonLiteral existingSuggestion = 
esIt.next().getSubject();
+                                       
existingSuggestions.put(existingSuggestion,
+                                                       
EnhancementEngineHelper.get(metadata, existingSuggestion, 
+                                                                       
ENHANCER_CONFIDENCE, Double.class, literalFactory));
+                               }
+                               for(Entry<NonLiteral,Double> entry : 
existingSuggestions.entrySet()){
+                                       if(entry.getValue() != null){
+                                               double adjustedConfidence = 
entry.getValue() * confidenceAdjustmentFactor;
+                                               if(maxExistingConfidence == 
null || adjustedConfidence > maxExistingConfidence){
+                                                       maxExistingConfidence = 
adjustedConfidence;
+                                               }
+                                               
EnhancementEngineHelper.set(metadata, entry.getKey(), 
+                                                               
ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
+                                       }
+                               }
+                       }
                         //add the suggestions of the initial mention to this 
one
                         Set<Resource> values = new HashSet<Resource>();
                         for(Iterator<Triple> suggestions = 
metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext();){
@@ -382,13 +441,29 @@ public class EntityCoMentionEngine exten
                         metadata.add(new TripleImpl(textAnnotation, 
DC_RELATION, initialMention));
                         //metadata.add(new TripleImpl(initialMention, 
DC_RELATION, textAnnotation));
                     }
-                    //finally add the collected dc:types of initial mentions 
to the textAnnotation
+                    // Adapt the dc:type values of the fise:TextAnnotation
+                    // - if Suggestions added by this engine do have the max 
confidence
+                    //   use the dc:type values of the initial mention
+                    // - if the original suggestions do have a higher 
confidence keep the
+                    //   existing
+                    // - in case both do have the same confidence we add all 
dc:types
+                    boolean removeExistingDcTypes = maxConfidence != null && 
(maxExistingConfidence == null || 
+                               maxConfidence.compareTo(maxExistingConfidence) 
>= 0);
+                    boolean addCoMentionDcTypes = maxExistingConfidence == 
null ||
+                               (maxConfidence != null && 
maxConfidence.compareTo(maxExistingConfidence) >= 1);
                     Iterator<UriRef> existingDcTypesIt = 
getReferences(metadata, textAnnotation, DC_TYPE);
                     while(existingDcTypesIt.hasNext()){ //do not add existing
-                        dcTypes.remove(existingDcTypesIt.next());
+                       //remove dc:type triples if they are not re-added later 
and
+                       //removeExistingDcTypes == true
+                        if((!dcTypes.remove(existingDcTypesIt.next()) || 
!addCoMentionDcTypes )
+                               && removeExistingDcTypes){
+                               existingDcTypesIt.remove(); //remove the dcType
+                        }
                     }
-                    for(UriRef dcType : dcTypes){ //add missing
-                        metadata.add(new TripleImpl(textAnnotation, DC_TYPE, 
dcType));
+                    if(addCoMentionDcTypes){
+                           for(UriRef dcType : dcTypes){ //add missing
+                               metadata.add(new TripleImpl(textAnnotation, 
DC_TYPE, dcType));
+                           }
                     }
                     //TODO: support also Entities
                     if(maxConfidence != null){ //set the confidence value (if 
known)

Modified: 
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1544960&r1=1544959&r2=1544960&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
 Sun Nov 24 11:42:04 2013
@@ -58,3 +58,10 @@ used in addition to the language detecte
 configuration is an empty string to search for labels without any language 
defined, but for some data \
 sets (such as DBpedia.org) that add languages to any labels it might improve 
resuls to change this \
 configuration (e.g. to 'en' in the case of DBpedia.org).
+
+enhancer.engines.comention.adjustExistingConfidence.name=Confidence Adjustment
+enhancer.engines.comention.adjustExistingConfidence.description=Used to adjust 
the \
+confidence of existing suggestions for fise:TextAnnotation where a Co-Mention 
is \
+detected by this Engine. Values MUST BE in the range [0..1) (default: 0.33). 
Setting \
+this to 0.0 will deactivate this feature. The {adjusted-confidence} := 
{confidence} * \
+(1 - {value}) .. where {value} is the value configured for this property.


Reply via email to