Author: ogrisel
Date: Fri Sep 30 17:25:29 2011
New Revision: 1177741

URL: http://svn.apache.org/viewvc?rev=1177741&view=rev
Log:
STANBOL-333: Add an option to NamedEntityTaggingEngine to fetch entity info 
from the EntityHub

Removed:
    
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/cache/
Modified:
    
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
    
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
    
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
    
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java

Modified: 
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
 (original)
+++ 
incubator/stanbol/trunk/contenthub/web/src/main/java/org/apache/stanbol/contenthub/web/resource/ContentHubRootResource.java
 Fri Sep 30 17:25:29 2011
@@ -17,7 +17,6 @@
 package org.apache.stanbol.contenthub.web.resource;
 
 import static javax.ws.rs.core.MediaType.APPLICATION_FORM_URLENCODED;
-import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE;
 import static javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA;
 import static javax.ws.rs.core.MediaType.TEXT_HTML;
 import static javax.ws.rs.core.MediaType.TEXT_PLAIN_TYPE;
@@ -77,7 +76,6 @@ import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.stanbol.commons.web.base.ContextHelper;
 import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
-import org.apache.stanbol.enhancer.jersey.cache.EntityCacheProvider;
 import org.apache.stanbol.enhancer.jersey.resource.ContentItemResource;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -180,11 +178,6 @@ public class ContentHubRootResource exte
         jobManager = 
ContextHelper.getServiceFromContext(EnhancementJobManager.class, context);
         tcManager = ContextHelper.getServiceFromContext(TcManager.class, 
context);
         serializer = ContextHelper.getServiceFromContext(Serializer.class, 
context);
-        EntityCacheProvider entityCacheProvider = 
ContextHelper.getServiceFromContext(
-            EntityCacheProvider.class, context);
-        if (entityCacheProvider != null) {
-            entityCache = entityCacheProvider.getEntityCache();
-        }
 
         if (store == null || tcManager == null) {
             log.error("Missing either store={} or tcManager={}", store, 
tcManager);
@@ -332,8 +325,7 @@ public class ContentHubRootResource exte
         if (ci == null) {
             throw new WebApplicationException(404);
         }
-        return new ContentItemResource(localId, ci, entityCache, uriInfo,
-                tcManager, serializer, servletContext);
+        return new ContentItemResource(localId, ci, uriInfo, tcManager, 
serializer, servletContext);
     }
 
     @GET

Modified: 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 Fri Sep 30 17:25:29 2011
@@ -33,6 +33,7 @@ import org.apache.clerezza.rdf.core.Lite
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
@@ -54,6 +55,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.Entityhub;
 import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
 import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
@@ -113,6 +115,13 @@ public class NamedEntityTaggingEngine im
     public static final String NAME_FIELD = 
"org.apache.stanbol.enhancer.engines.entitytagging.nameField";
 
     /**
+     * Use the RDFS label as default
+     */
+    @Property(boolValue = true)
+    public static final String DEREFERENCE_ENTITIES = 
"org.apache.stanbol.enhancer.engines.entitytagging.dereference";
+
+    
+    /**
      * Service of the Entityhub that manages all the active referenced Site. 
This Service is used to lookup the
      * configured Referenced Site when we need to enhance a content item.
      */
@@ -182,6 +191,8 @@ public class NamedEntityTaggingEngine im
      * The number of Suggestions to be added
      */
     public Integer numSuggestions = 3;
+    
+    public boolean dereferenceEntities = true;
 
     /**
      * The {@link OfflineMode} is used by Stanbol to indicate that no external 
service should be referenced.
@@ -258,6 +269,9 @@ public class NamedEntityTaggingEngine im
         Object nameField = config.get(NAME_FIELD);
         this.nameField = nameField == null || nameField.toString().isEmpty() ? 
NamespaceEnum.rdfs + "label"
                 : NamespaceEnum.getFullName(nameField.toString());
+        Object dereferenceEntities = config.get(DEREFERENCE_ENTITIES);
+        this.dereferenceEntities = state == null ? true : Boolean
+                .parseBoolean(dereferenceEntities.toString());
     }
 
     @Deactivate
@@ -439,6 +453,7 @@ public class NamedEntityTaggingEngine im
                 matches.add(guess);
             }
         }
+        RdfValueFactory factory = RdfValueFactory.getInstance();
         //now write the results
         for(int i=0;i<matches.size();i++){
             Representation rep = matches.get(i).getRepresentation();
@@ -454,8 +469,11 @@ public class NamedEntityTaggingEngine im
             log.debug("Adding {} to ContentItem {}", rep.getId(), 
contentItemId);
             EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, 
graph, contentItemId,
                 annotationsToRelate, rep, nameField);
+
+            if (dereferenceEntities) {
+                graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
+            }
         }
-        
         return results;
     }
 

Modified: 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
 Fri Sep 30 17:25:29 2011
@@ -25,7 +25,6 @@ import static org.apache.stanbol.enhance
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LONG;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.NIE_PLAINTEXTCONTENT;
 
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
@@ -42,7 +41,6 @@ import java.util.TreeMap;
 import javax.servlet.ServletContext;
 import javax.ws.rs.GET;
 import javax.ws.rs.Produces;
-import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
 
@@ -69,7 +67,6 @@ import org.apache.clerezza.rdf.core.spar
 import org.apache.clerezza.rdf.core.sparql.query.SelectQuery;
 import org.apache.clerezza.rdf.utils.GraphNode;
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
 import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -109,8 +106,6 @@ public class ContentItemResource extends
 
     protected String serializationFormat = SupportedFormat.RDF_XML;
 
-    protected final TripleCollection remoteEntityCache;
-
     protected Collection<EntityExtractionSummary> people;
 
     protected Collection<EntityExtractionSummary> organizations;
@@ -123,7 +118,6 @@ public class ContentItemResource extends
 
     public ContentItemResource(String localId,
                                ContentItem ci,
-                               TripleCollection remoteEntityCache,
                                UriInfo uriInfo,
                                TcManager tcManager,
                                Serializer serializer,
@@ -133,7 +127,6 @@ public class ContentItemResource extends
         this.uriInfo = uriInfo;
         this.tcManager = tcManager;
         this.serializer = serializer;
-        this.remoteEntityCache = remoteEntityCache;
         this.servletContext = servletContext;
 
         if (localId != null) {
@@ -277,7 +270,7 @@ public class ContentItemResource extends
             if (entityUri != null) {
                 String label = ((Literal) 
mapping.get("entity_label")).getLexicalForm();
                 Double confidence = lf.createObject(Double.class, 
(TypedLiteral) mapping.get("confidence"));
-                Graph properties = new GraphNode(entityUri, 
remoteEntityCache).getNodeContext();
+                Graph properties = new GraphNode(entityUri, 
contentItem.getMetadata()).getNodeContext();
                 entity.addSuggestion(entityUri, label, confidence, properties);
             }
         }
@@ -494,24 +487,22 @@ public class ContentItemResource extends
      */
     public String getPlacesAsJSON() throws ParseException, 
UnsupportedEncodingException {
         MGraph g = new SimpleMGraph();
-        if (remoteEntityCache != null) {
-            LiteralFactory lf = LiteralFactory.getInstance();
-            for (EntityExtractionSummary p : getPlaceOccurrences()) {
-                EntitySuggestion bestGuess = p.getBestGuess();
-                if (bestGuess == null) {
-                    continue;
-                }
-                UriRef uri = new UriRef(bestGuess.getUri());
-                Iterator<Triple> latitudes = remoteEntityCache.filter(uri, 
GEO_LAT, null);
-                if (latitudes.hasNext()) {
-                    g.add(latitudes.next());
-                }
-                Iterator<Triple> longitutes = remoteEntityCache.filter(uri, 
GEO_LONG, null);
-                if (longitutes.hasNext()) {
-                    g.add(longitutes.next());
-                    g.add(new TripleImpl(uri, Properties.RDFS_LABEL, 
lf.createTypedLiteral(bestGuess
-                            .getLabel())));
-                }
+        LiteralFactory lf = LiteralFactory.getInstance();
+        MGraph metadata = contentItem.getMetadata();
+        for (EntityExtractionSummary p : getPlaceOccurrences()) {
+            EntitySuggestion bestGuess = p.getBestGuess();
+            if (bestGuess == null) {
+                continue;
+            }
+            UriRef uri = new UriRef(bestGuess.getUri());
+            Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
+            if (latitudes.hasNext()) {
+                g.add(latitudes.next());
+            }
+            Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
+            if (longitutes.hasNext()) {
+                g.add(longitutes.next());
+                g.add(new TripleImpl(uri, Properties.RDFS_LABEL, 
lf.createTypedLiteral(bestGuess.getLabel())));
             }
         }
         ByteArrayOutputStream out = new ByteArrayOutputStream();

Modified: 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java?rev=1177741&r1=1177740&r2=1177741&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/EnginesRootResource.java
 Fri Sep 30 17:25:29 2011
@@ -41,13 +41,10 @@ import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.Response;
 
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.access.TcManager;
-import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
 import org.apache.stanbol.commons.web.base.ContextHelper;
 import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource;
-import org.apache.stanbol.enhancer.jersey.cache.EntityCacheProvider;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -76,23 +73,11 @@ public class EnginesRootResource extends
 
     protected Serializer serializer;
 
-    protected TripleCollection entityCache;
-
     // bind the job manager by looking it up from the servlet request context
     public EnginesRootResource(@Context ServletContext context) {
         jobManager = 
ContextHelper.getServiceFromContext(EnhancementJobManager.class, context);
         tcManager = ContextHelper.getServiceFromContext(TcManager.class, 
context);
         serializer = ContextHelper.getServiceFromContext(Serializer.class, 
context);
-        entityCache = new SimpleMGraph().getGraph();
-        try {
-            EntityCacheProvider entityCacheProvider = 
ContextHelper.getServiceFromContext(
-                EntityCacheProvider.class, context);
-            if (entityCacheProvider != null) {
-                entityCache = entityCacheProvider.getEntityCache();
-            }
-        } catch (NullPointerException e) {
-            // service lookup can raise null pointer exception, fall back to 
empty cache
-        }
     }
 
     @GET
@@ -183,8 +168,8 @@ public class EnginesRootResource extends
         MGraph graph = ci.getMetadata();
 
         if (buildAjaxview) {
-            ContentItemResource contentItemResource = new 
ContentItemResource(null, ci, entityCache, uriInfo,
-                    tcManager, serializer, servletContext);
+            ContentItemResource contentItemResource = new 
ContentItemResource(null, ci, uriInfo, tcManager,
+                    serializer, servletContext);
             contentItemResource.setRdfSerializationFormat(format);
             Viewable ajaxView = new Viewable("/ajax/contentitem", 
contentItemResource);
             return Response.ok(ajaxView,TEXT_HTML).build();


Reply via email to