Author: ogrisel
Date: Fri Sep 30 17:25:29 2011
New Revision: 1177741
URL: http://svn.apache.org/viewvc?rev=1177741&view=rev
Log:
STANBOL-333: Add an option to NamedEntityTaggingEngine to fetch entity info
from the EntityHub
Removed:
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/cache/
Modified:
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
Modified:
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
---
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
(original)
+++
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
Fri Sep 30 17:25:29 2011
@@ -17,7 +17,6 @@
package org.apache.stanbol.contenthub.web.resource;
import static javax.ws.rs.core.MediaType.APPLICATION_FORM_URLENCODED;
-import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE;
import static javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA;
import static javax.ws.rs.core.MediaType.TEXT_HTML;
import static javax.ws.rs.core.MediaType.TEXT_PLAIN_TYPE;
@@ -77,7 +76,6 @@ import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.stanbol.commons.web.base.ContextHelper;
import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
-import org.apache.stanbol.enhancer.jersey.cache.EntityCacheProvider;
import org.apache.stanbol.enhancer.jersey.resource.ContentItemResource;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -180,11 +178,6 @@ public class ContentHubRootResource exte
jobManager =
ContextHelper.getServiceFromContext(EnhancementJobManager.class, context);
tcManager = ContextHelper.getServiceFromContext(TcManager.class,
context);
serializer = ContextHelper.getServiceFromContext(Serializer.class,
context);
- EntityCacheProvider entityCacheProvider =
ContextHelper.getServiceFromContext(
- EntityCacheProvider.class, context);
- if (entityCacheProvider != null) {
- entityCache = entityCacheProvider.getEntityCache();
- }
if (store == null || tcManager == null) {
log.error("Missing either store={} or tcManager={}", store,
tcManager);
@@ -332,8 +325,7 @@ public class ContentHubRootResource exte
if (ci == null) {
throw new WebApplicationException(404);
}
- return new ContentItemResource(localId, ci, entityCache, uriInfo,
- tcManager, serializer, servletContext);
+ return new ContentItemResource(localId, ci, uriInfo, tcManager,
serializer, servletContext);
}
@GET
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
Fri Sep 30 17:25:29 2011
@@ -33,6 +33,7 @@ import org.apache.clerezza.rdf.core.Lite
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -54,6 +55,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.Entityhub;
import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
@@ -113,6 +115,13 @@ public class NamedEntityTaggingEngine im
public static final String NAME_FIELD =
"org.apache.stanbol.enhancer.engines.entitytagging.nameField";
/**
+ * Use the RDFS label as default
+ */
+ @Property(boolValue = true)
+ public static final String DEREFERENCE_ENTITIES =
"org.apache.stanbol.enhancer.engines.entitytagging.dereference";
+
+
+ /**
* Service of the Entityhub that manages all the active referenced Site.
This Service is used to lookup the
* configured Referenced Site when we need to enhance a content item.
*/
@@ -182,6 +191,8 @@ public class NamedEntityTaggingEngine im
* The number of Suggestions to be added
*/
public Integer numSuggestions = 3;
+
+ public boolean dereferenceEntities = true;
/**
* The {@link OfflineMode} is used by Stanbol to indicate that no external
service should be referenced.
@@ -258,6 +269,9 @@ public class NamedEntityTaggingEngine im
Object nameField = config.get(NAME_FIELD);
this.nameField = nameField == null || nameField.toString().isEmpty() ?
NamespaceEnum.rdfs + "label"
: NamespaceEnum.getFullName(nameField.toString());
+ Object dereferenceEntities = config.get(DEREFERENCE_ENTITIES);
+ this.dereferenceEntities = state == null ? true : Boolean
+ .parseBoolean(dereferenceEntities.toString());
}
@Deactivate
@@ -439,6 +453,7 @@ public class NamedEntityTaggingEngine im
matches.add(guess);
}
}
+ RdfValueFactory factory = RdfValueFactory.getInstance();
//now write the results
for(int i=0;i<matches.size();i++){
Representation rep = matches.get(i).getRepresentation();
@@ -454,8 +469,11 @@ public class NamedEntityTaggingEngine im
log.debug("Adding {} to ContentItem {}", rep.getId(),
contentItemId);
EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory,
graph, contentItemId,
annotationsToRelate, rep, nameField);
+
+ if (dereferenceEntities) {
+ graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
+ }
}
-
return results;
}
Modified:
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
(original)
+++
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
Fri Sep 30 17:25:29 2011
@@ -25,7 +25,6 @@ import static org.apache.stanbol.enhance
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LONG;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.NIE_PLAINTEXTCONTENT;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
@@ -42,7 +41,6 @@ import java.util.TreeMap;
import javax.servlet.ServletContext;
import javax.ws.rs.GET;
import javax.ws.rs.Produces;
-import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
@@ -69,7 +67,6 @@ import org.apache.clerezza.rdf.core.spar
import org.apache.clerezza.rdf.core.sparql.query.SelectQuery;
import org.apache.clerezza.rdf.utils.GraphNode;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -109,8 +106,6 @@ public class ContentItemResource extends
protected String serializationFormat = SupportedFormat.RDF_XML;
- protected final TripleCollection remoteEntityCache;
-
protected Collection<EntityExtractionSummary> people;
protected Collection<EntityExtractionSummary> organizations;
@@ -123,7 +118,6 @@ public class ContentItemResource extends
public ContentItemResource(String localId,
ContentItem ci,
- TripleCollection remoteEntityCache,
UriInfo uriInfo,
TcManager tcManager,
Serializer serializer,
@@ -133,7 +127,6 @@ public class ContentItemResource extends
this.uriInfo = uriInfo;
this.tcManager = tcManager;
this.serializer = serializer;
- this.remoteEntityCache = remoteEntityCache;
this.servletContext = servletContext;
if (localId != null) {
@@ -277,7 +270,7 @@ public class ContentItemResource extends
if (entityUri != null) {
String label = ((Literal)
mapping.get("entity_label")).getLexicalForm();
Double confidence = lf.createObject(Double.class,
(TypedLiteral) mapping.get("confidence"));
- Graph properties = new GraphNode(entityUri,
remoteEntityCache).getNodeContext();
+ Graph properties = new GraphNode(entityUri,
contentItem.getMetadata()).getNodeContext();
entity.addSuggestion(entityUri, label, confidence, properties);
}
}
@@ -494,24 +487,22 @@ public class ContentItemResource extends
*/
public String getPlacesAsJSON() throws ParseException,
UnsupportedEncodingException {
MGraph g = new SimpleMGraph();
- if (remoteEntityCache != null) {
- LiteralFactory lf = LiteralFactory.getInstance();
- for (EntityExtractionSummary p : getPlaceOccurrences()) {
- EntitySuggestion bestGuess = p.getBestGuess();
- if (bestGuess == null) {
- continue;
- }
- UriRef uri = new UriRef(bestGuess.getUri());
- Iterator<Triple> latitudes = remoteEntityCache.filter(uri,
GEO_LAT, null);
- if (latitudes.hasNext()) {
- g.add(latitudes.next());
- }
- Iterator<Triple> longitutes = remoteEntityCache.filter(uri,
GEO_LONG, null);
- if (longitutes.hasNext()) {
- g.add(longitutes.next());
- g.add(new TripleImpl(uri, Properties.RDFS_LABEL,
lf.createTypedLiteral(bestGuess
- .getLabel())));
- }
+ LiteralFactory lf = LiteralFactory.getInstance();
+ MGraph metadata = contentItem.getMetadata();
+ for (EntityExtractionSummary p : getPlaceOccurrences()) {
+ EntitySuggestion bestGuess = p.getBestGuess();
+ if (bestGuess == null) {
+ continue;
+ }
+ UriRef uri = new UriRef(bestGuess.getUri());
+ Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
+ if (latitudes.hasNext()) {
+ g.add(latitudes.next());
+ }
+ Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
+ if (longitutes.hasNext()) {
+ g.add(longitutes.next());
+ g.add(new TripleImpl(uri, Properties.RDFS_LABEL,
lf.createTypedLiteral(bestGuess.getLabel())));
}
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
Modified:
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
(original)
+++
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
Fri Sep 30 17:25:29 2011
@@ -41,13 +41,10 @@ import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Response;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.access.TcManager;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.serializedform.Serializer;
import org.apache.stanbol.commons.web.base.ContextHelper;
import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
-import org.apache.stanbol.enhancer.jersey.cache.EntityCacheProvider;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -76,23 +73,11 @@ public class EnginesRootResource extends
protected Serializer serializer;
- protected TripleCollection entityCache;
-
// bind the job manager by looking it up from the servlet request context
public EnginesRootResource(@Context ServletContext context) {
jobManager =
ContextHelper.getServiceFromContext(EnhancementJobManager.class, context);
tcManager = ContextHelper.getServiceFromContext(TcManager.class,
context);
serializer = ContextHelper.getServiceFromContext(Serializer.class,
context);
- entityCache = new SimpleMGraph().getGraph();
- try {
- EntityCacheProvider entityCacheProvider =
ContextHelper.getServiceFromContext(
- EntityCacheProvider.class, context);
- if (entityCacheProvider != null) {
- entityCache = entityCacheProvider.getEntityCache();
- }
- } catch (NullPointerException e) {
- // service lookup can raise null pointer exception, fall back to
empty cache
- }
}
@GET
@@ -183,8 +168,8 @@ public class EnginesRootResource extends
MGraph graph = ci.getMetadata();
if (buildAjaxview) {
- ContentItemResource contentItemResource = new
ContentItemResource(null, ci, entityCache, uriInfo,
- tcManager, serializer, servletContext);
+ ContentItemResource contentItemResource = new
ContentItemResource(null, ci, uriInfo, tcManager,
+ serializer, servletContext);
contentItemResource.setRdfSerializationFormat(format);
Viewable ajaxView = new Viewable("/ajax/contentitem",
contentItemResource);
return Response.ok(ajaxView,TEXT_HTML).build();