Author: rwesten
Date: Tue Oct 4 15:44:17 2011
New Revision: 1178840
URL: http://svn.apache.org/viewvc?rev=1178840&view=rev
Log:
Added support for dereferencing Entities to the KeywordLinkingEngine as
described by STANBOL-333 for the EntityLinkingEngine.
This is a temporary workaround until STANBOL-336.
Note that the ContentItemResource now also can use foaf:depiction as icons
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
Tue Oct 4 15:44:17 2011
@@ -1,6 +1,7 @@
package org.apache.stanbol.enhancer.engines.keywordextraction.engine;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.NIE_PLAINTEXTCONTENT;
+import static
org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum.getFullName;
import java.io.IOException;
import java.util.ArrayList;
@@ -55,7 +56,9 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.Entityhub;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.site.ReferencedSite;
@@ -90,7 +93,9 @@ import org.slf4j.LoggerFactory;
@Property(name=KeywordLinkingEngine.MAX_SUGGESTIONS,
intValue=EntityLinkerConfig.DEFAULT_SUGGESTIONS),
@Property(name=KeywordLinkingEngine.PROCESSED_LANGUAGES,value=""),
- @Property(name=KeywordLinkingEngine.DEFAULT_MATCHING_LANGUAGE,value="")
+ @Property(name=KeywordLinkingEngine.DEFAULT_MATCHING_LANGUAGE,value=""),
+ @Property(name=KeywordLinkingEngine.DEREFERENCE_ENTITIES,
+ boolValue=KeywordLinkingEngine.DEFAULT_DEREFERENCE_ENTITIES_STATE)
})
public class KeywordLinkingEngine implements EnhancementEngine,
ServiceProperties{
@@ -123,6 +128,28 @@ public class KeywordLinkingEngine implem
// public static final String SIMPLE_TOKENIZER =
"org.apache.stanbol.enhancer.engines.keywordextraction.simpleTokenizer";
// public static final String ENABLE_CHUNKER =
"org.apache.stanbol.enhancer.engines.keywordextraction.enableChunker";
/**
+ * Adds the dereference feature (STANBOL-333) also to this engine.
+ * This will be replaced by STANBOL-336.
+ */
+ public static final String DEREFERENCE_ENTITIES =
"org.apache.stanbol.enhancer.engines.keywordextraction.dereference";
+ /**
+ * The default state to dereference entities set to <code>true</code>.
+ */
+ public static final boolean DEFAULT_DEREFERENCE_ENTITIES_STATE = true;
+ /**
+ * Additional fields added for dereferenced entities
+ */
+ private static final Collection<String> DEREFERENCE_FIELDS = Arrays.asList(
+ getFullName("rdfs:comment"),
+ getFullName("geo:lat"),
+ getFullName("geo:long"),
+ getFullName("foaf:depiction"),
+ getFullName("dbp-ont:thumbnail"));
+ /**
+ * The dereferenceEntitiesState as set in {@link
#activateEntityDereference(Dictionary)}
+ */
+ private boolean dereferenceEntitiesState;
+ /**
* Default set of languages. This is an empty set indicating that texts in
any
* language are processed.
*/
@@ -363,6 +390,13 @@ public class KeywordLinkingEngine implem
metadata.add(new TripleImpl(entityAnnotation,
Properties.DC_RELATION, textAnnotation));
}
+ //in case dereferencing of Entities is enabled we need also to
+ //add the RDF data for entities
+ if(dereferenceEntitiesState){
+ metadata.addAll(
+ RdfValueFactory.getInstance().toRdfRepresentation(
+ suggestion.getRepresentation()).getRdfGraph());
+ }
}
}
}
@@ -447,6 +481,8 @@ public class KeywordLinkingEngine implem
* <li> {@link #activateEntitySearcher(ComponentContext, Dictionary)}
* <li> {@link #initEntityLinkerConfig(Dictionary, EntityLinkerConfig)} and
* <li> {@link #activateTextAnalyzer(Dictionary)}
+ * <li> {@link #dereferenceEntitiesState} (needs to be called after
+ * {@link #initEntityLinkerConfig(Dictionary, EntityLinkerConfig)})
* </ul>
* if applicable.
* @param context the Component context
@@ -460,6 +496,26 @@ public class KeywordLinkingEngine implem
activateTextAnalyzer(properties);
activateEntitySearcher(context, properties);
activateEntityLinkerConfig(properties);
+ activateEntityDereference(properties);
+ }
+
+ /**
+ * Inits the {@link #dereferenceEntitiesState} based on the
+ * {@link #DEREFERENCE_ENTITIES} configuration.
+ * @param properties the configuration
+ */
+ protected final void activateEntityDereference(Dictionary<String,Object>
properties) {
+ Object value = properties.get(DEREFERENCE_ENTITIES);
+ if(value instanceof Boolean){
+ dereferenceEntitiesState = ((Boolean)value).booleanValue();
+ } else if(value != null && !value.toString().isEmpty()){
+ dereferenceEntitiesState = Boolean.parseBoolean(value.toString());
+ } else {
+ dereferenceEntitiesState = DEFAULT_DEREFERENCE_ENTITIES_STATE;
+ }
+ if(dereferenceEntitiesState){
+ config.getSelectedFields().addAll(DEREFERENCE_FIELDS);
+ }
}
/**
@@ -688,6 +744,14 @@ public class KeywordLinkingEngine implem
deactivateEntitySearcher();
deactivateTextAnalyzer();
deactivateEntityLinkerConfig();
+ deactivateEntityDereference();
+ }
+ /**
+ * Resets the {@link #dereferenceEntitiesState} to
+ * {@link #DEFAULT_DEREFERENCE_ENTITIES_STATE}
+ */
+ protected final void deactivateEntityDereference() {
+ dereferenceEntitiesState = DEFAULT_DEREFERENCE_ENTITIES_STATE;
}
/**
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/resources/OSGI-INF/metatype/metatype.properties
Tue Oct 4 15:44:17 2011
@@ -57,3 +57,6 @@ org.apache.stanbol.enhancer.engines.keyw
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.name=Default
Matching Language
org.apache.stanbol.enhancer.engines.keywordextraction.defaultMatchingLanguage.description=The
language used in addition to the language detected for the analysed text to
search for Entities. Typically this configuration is an empty string to search
for labels without any language defined, but for some data sets (such as
DBpedia.org) that add languages to any labels it might improve resuls to change
this configuration (e.g. to 'en' in the case of DBpedia.org).
+
+org.apache.stanbol.enhancer.engines.keywordextraction.dereference.name=Dereference
Entities
+org.apache.stanbol.enhancer.engines.keywordextraction.dereference.description=If
enabled additional data for suggested Entities are included.
Modified:
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1178840&r1=1178839&r2=1178840&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
(original)
+++
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
Tue Oct 4 15:44:17 2011
@@ -89,6 +89,7 @@ public class ContentItemResource extends
// TODO make this configurable trough a property
public static final UriRef THUMBNAIL = new
UriRef("http://dbpedia.org/ontology/thumbnail");
+ public static final UriRef DEPICTION = new
UriRef("http://xmlns.com/foaf/0.1/depiction");
public final Map<UriRef,String> defaultThumbnails = new
HashMap<UriRef,String>();
@@ -430,9 +431,17 @@ public class ContentItemResource extends
}
public String getThumbnailSrc() {
- Iterator<Triple> abstracts = entityProperties.filter(uri,
THUMBNAIL, null);
- while (abstracts.hasNext()) {
- Resource object = abstracts.next().getObject();
+ Iterator<Triple> thumbnails = entityProperties.filter(uri,
THUMBNAIL, null);
+ while (thumbnails.hasNext()) {
+ Resource object = thumbnails.next().getObject();
+ if (object instanceof UriRef) {
+ return ((UriRef) object).getUnicodeString();
+ }
+ }
+ //if no dbpedia ontology thumbnail was found. try the same with
foaf:depiction
+ thumbnails = entityProperties.filter(uri, DEPICTION, null);
+ while (thumbnails.hasNext()) {
+ Resource object = thumbnails.next().getObject();
if (object instanceof UriRef) {
return ((UriRef) object).getUnicodeString();
}