Author: rwesten
Date: Tue Oct  4 15:44:17 2011
New Revision: 1178840

URL: http://svn.apache.org/viewvc?rev=1178840&view=rev
Log:
Added support for dereferencing Entities to the KeywordLinkingEngine as 
described by STANBOL-333 for the EntityLinkingEngine.
This is a temporary workaround until STANBOL-336.

Note that the ContentItemResource now also can use foaf:depiction as icons

Modified:
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
    
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 Tue Oct  4 15:44:17 2011
@@ -1,6 +1,7 @@
 package org.apache.stanbol.enhancer.engines.keywordextraction.engine;
 
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.NIE_PLAINTEXTCONTENT;
+import static 
org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum.getFullName;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -55,7 +56,9 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.Entityhub;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
 import org.apache.stanbol.entityhub.servicesapi.model.Reference;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;
@@ -90,7 +93,9 @@ import org.slf4j.LoggerFactory;
     @Property(name=KeywordLinkingEngine.MAX_SUGGESTIONS,
         intValue=EntityLinkerConfig.DEFAULT_SUGGESTIONS),
     @Property(name=KeywordLinkingEngine.PROCESSED_LANGUAGES,value=""),
-    @Property(name=KeywordLinkingEngine.DEFAULT_MATCHING_LANGUAGE,value="")
+    @Property(name=KeywordLinkingEngine.DEFAULT_MATCHING_LANGUAGE,value=""),
+    @Property(name=KeywordLinkingEngine.DEREFERENCE_ENTITIES,
+        boolValue=KeywordLinkingEngine.DEFAULT_DEREFERENCE_ENTITIES_STATE)
 })
 public class KeywordLinkingEngine implements EnhancementEngine, 
ServiceProperties{
 
@@ -123,6 +128,28 @@ public class KeywordLinkingEngine implem
 //  public static final String SIMPLE_TOKENIZER = 
"org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer";
 //  public static final String ENABLE_CHUNKER = 
"org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker";
     /**
+     * Adds the dereference feature (STANBOL-333) also to this engine.
+     * This will be replaced by STANBOL-336. 
+     */
+    public static final String DEREFERENCE_ENTITIES = 
"org.apache.stanbol.enhancer.engines.keywordextraction.dereference";
+    /**
+     * The default state to dereference entities set to <code>true</code>.
+     */
+    public static final boolean DEFAULT_DEREFERENCE_ENTITIES_STATE = true;
+    /**
+     * Additional fields added for dereferenced entities
+     */
+    private static final Collection<String> DEREFERENCE_FIELDS = Arrays.asList(
+        getFullName("rdfs:comment"),
+        getFullName("geo:lat"),
+        getFullName("geo:long"),
+        getFullName("foaf:depiction"),
+        getFullName("dbp-ont:thumbnail"));
+    /**
+     * The dereferenceEntitiesState as set in {@link 
#activateEntityDereference(Dictionary)}
+     */
+    private boolean dereferenceEntitiesState;
+    /**
      * Default set of languages. This is an empty set indicating that texts in 
any
      * language are processed. 
      */
@@ -363,6 +390,13 @@ public class KeywordLinkingEngine implem
                     metadata.add(new TripleImpl(entityAnnotation, 
                         Properties.DC_RELATION, textAnnotation));
                 }
+                //in case dereferencing of Entities is enabled we need also to
+                //add the RDF data for entities
+                if(dereferenceEntitiesState){
+                    metadata.addAll(
+                        RdfValueFactory.getInstance().toRdfRepresentation(
+                            suggestion.getRepresentation()).getRdfGraph());
+                }
             }
         }
     }
@@ -447,6 +481,8 @@ public class KeywordLinkingEngine implem
      * <li> {@link #activateEntitySearcher(ComponentContext, Dictionary)}
      * <li> {@link #initEntityLinkerConfig(Dictionary, EntityLinkerConfig)} and
      * <li> {@link #activateTextAnalyzer(Dictionary)}
+     * <li> {@link #dereferenceEntitiesState} (needs to be called after 
+     * {@link #initEntityLinkerConfig(Dictionary, EntityLinkerConfig)})
      * </ul>
      * if applicable.
      * @param context the Component context
@@ -460,6 +496,26 @@ public class KeywordLinkingEngine implem
         activateTextAnalyzer(properties);
         activateEntitySearcher(context, properties);
         activateEntityLinkerConfig(properties);
+        activateEntityDereference(properties);
+    }
+
+    /**
+     * Inits the {@link #dereferenceEntitiesState} based on the
+     * {@link #DEREFERENCE_ENTITIES} configuration.
+     * @param properties the configuration
+     */
+    protected final void activateEntityDereference(Dictionary<String,Object> 
properties) {
+        Object value = properties.get(DEREFERENCE_ENTITIES);
+        if(value instanceof Boolean){
+            dereferenceEntitiesState = ((Boolean)value).booleanValue();
+        } else if(value != null && !value.toString().isEmpty()){
+            dereferenceEntitiesState = Boolean.parseBoolean(value.toString());
+        } else {
+            dereferenceEntitiesState = DEFAULT_DEREFERENCE_ENTITIES_STATE;
+        }
+        if(dereferenceEntitiesState){
+            config.getSelectedFields().addAll(DEREFERENCE_FIELDS);
+        }
     }
 
     /**
@@ -688,6 +744,14 @@ public class KeywordLinkingEngine implem
         deactivateEntitySearcher();
         deactivateTextAnalyzer();
         deactivateEntityLinkerConfig();
+        deactivateEntityDereference();
+    }
+    /**
+     * Resets the {@link #dereferenceEntitiesState} to 
+     * {@link #DEFAULT_DEREFERENCE_ENTITIES_STATE}
+     */
+    protected final void deactivateEntityDereference() {
+        dereferenceEntitiesState = DEFAULT_DEREFERENCE_ENTITIES_STATE;
     }
 
     /**

Modified: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
 Tue Oct  4 15:44:17 2011
@@ -57,3 +57,6 @@ org.apache.stanbol.enhancer.engines.keyw
 
 
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default
 Matching Language
 
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The
 language used in addition to the language detected for the analysed text to 
search for Entities. Typically this configuration is an empty string to search 
for labels without any language defined, but for some data sets (such as 
DBpedia.org) that add languages to any labels it might improve resuls to change 
this configuration (e.g. to 'en' in the case of DBpedia.org).
+
+org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference
 Entities
+org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If
 enabled additional data for suggested Entities are included.

Modified: 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
 Tue Oct  4 15:44:17 2011
@@ -89,6 +89,7 @@ public class ContentItemResource extends
 
     // TODO make this configurable trough a property
     public static final UriRef THUMBNAIL = new 
UriRef("http://dbpedia.org/ontology/thumbnail";);
+    public static final UriRef DEPICTION = new 
UriRef("http://xmlns.com/foaf/0.1/depiction";);
 
     public final Map<UriRef,String> defaultThumbnails = new 
HashMap<UriRef,String>();
 
@@ -430,9 +431,17 @@ public class ContentItemResource extends
         }
 
         public String getThumbnailSrc() {
-            Iterator<Triple> abstracts = entityProperties.filter(uri, 
THUMBNAIL, null);
-            while (abstracts.hasNext()) {
-                Resource object = abstracts.next().getObject();
+            Iterator<Triple> thumbnails = entityProperties.filter(uri, 
THUMBNAIL, null);
+            while (thumbnails.hasNext()) {
+                Resource object = thumbnails.next().getObject();
+                if (object instanceof UriRef) {
+                    return ((UriRef) object).getUnicodeString();
+                }
+            }
+            //if no dbpedia ontology thumbnail was found. try the same with 
foaf:depiction
+            thumbnails = entityProperties.filter(uri, DEPICTION, null);
+            while (thumbnails.hasNext()) {
+                Resource object = thumbnails.next().getObject();
                 if (object instanceof UriRef) {
                     return ((UriRef) object).getUnicodeString();
                 }


Reply via email to