Author: rwesten
Date: Thu May 17 11:28:41 2012
New Revision: 1339557

URL: http://svn.apache.org/viewvc?rev=1339557&view=rev
Log:
STANBOL-613

* merged changes to EnhancementEngines related to STANBOL-613 from the CELI 
enhancement engine branch back to trunk

STANBOL-617

* Adapted Zemanta EnhancementEngine to comply to the new rules for 
TopicAnnotations

Modified:
    
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
    
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
    incubator/stanbol/trunk/enhancer/engines/langid/pom.xml
    
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
    
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
    
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
    
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
    
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
 Thu May 17 11:28:41 2012
@@ -63,6 +63,7 @@ public class EnhancementRDFUtils {
      * @param entity
      *            the related entity
      * @param nameField the field used to extract the name
+     * @param lang the preferred language to include
      */
     public static UriRef writeEntityAnnotation(EnhancementEngine engine,
                                                LiteralFactory literalFactory,
@@ -70,7 +71,8 @@ public class EnhancementRDFUtils {
                                                UriRef contentItemId,
                                                Collection<NonLiteral> 
relatedEnhancements,
                                                Representation rep,
-                                               String nameField) {
+                                               String nameField, 
+                                               String lang) {
         // 1. check if the returned Entity does has a label -> if not return 
null
         // add labels (set only a single label. Use "en" if available!
         Text label = null;
@@ -81,7 +83,7 @@ public class EnhancementRDFUtils {
                 label = actLabel;
             } else {
                 //use startWith to match also en-GB and en-US ...
-                if (actLabel.getLanguage() != null && 
actLabel.getLanguage().startsWith("en")) {
+                if (actLabel.getLanguage() != null && 
actLabel.getLanguage().startsWith(lang)) {
                     label = actLabel;
                 }
             }

Modified: 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 Thu May 17 11:28:41 2012
@@ -49,6 +49,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -61,6 +62,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
 import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
@@ -126,6 +128,11 @@ public class NamedEntityTaggingEngine 
 
     @Property(intValue=0)
     public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
+    /**
+     * The default language for labels included in the enhancement metadata
+     * (if not available for the parsed content).
+     */
+    private static final String DEFAULT_LANGUAGE = "en";
     
     /**
      * Service of the Entityhub that manages all the active referenced Site. 
This Service is used to lookup the
@@ -154,6 +161,7 @@ public class NamedEntityTaggingEngine 
     public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
 
 
+
     /**
      * State if text annotations of type {@link 
OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
      */
@@ -319,8 +327,11 @@ public class NamedEntityTaggingEngine 
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         // Retrieve the existing text annotations (requires read lock)
         Map<NamedEntity,List<UriRef>> textAnnotations = new 
HashMap<NamedEntity,List<UriRef>>();
+        //the language extracted for the parsed content or NULL if not 
available
+        String contentLangauge;
         ci.getLock().readLock().lock();
         try {
+            contentLangauge = EnhancementEngineHelper.getLanguage(ci);
             for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, 
TechnicalClasses.ENHANCER_TEXTANNOTATION); it
                     .hasNext();) {
                 UriRef uri = (UriRef) it.next().getSubject();
@@ -346,7 +357,7 @@ public class NamedEntityTaggingEngine 
         for (Entry<NamedEntity,List<UriRef>> entry : 
textAnnotations.entrySet()) {
             try {
                 List<Entity> entitySuggestions = computeEntityRecommentations(
-                    site, entry.getKey(),entry.getValue());
+                    site, entry.getKey(),entry.getValue(),contentLangauge);
                 if(entitySuggestions != null && !entitySuggestions.isEmpty()){
                     suggestions.put(entry.getKey(), entitySuggestions);
                 }
@@ -366,7 +377,10 @@ public class NamedEntityTaggingEngine 
                 for(Entity suggestion : entitySuggestions.getValue()){
                     log.debug("Add Suggestion {} for {}", suggestion.getId(), 
entitySuggestions.getKey());
                     EnhancementRDFUtils.writeEntityAnnotation(this, 
literalFactory, graph, ci.getUri(),
-                        annotationsToRelate, suggestion.getRepresentation(), 
nameField);
+                        annotationsToRelate, suggestion.getRepresentation(), 
nameField,
+                        //TODO: maybe we want labels in a different language 
than the
+                        //      language of the content (e.g. Accept-Language 
header)?!
+                        contentLangauge == null ? DEFAULT_LANGUAGE : 
contentLangauge);
                     if (dereferenceEntities) {
                         entityData.put(suggestion.getId(), 
suggestion.getRepresentation());
                     }
@@ -391,13 +405,15 @@ public class NamedEntityTaggingEngine 
      * @param contentItemId the id of the contentItem
      * @param textAnnotation the text annotation to enhance
      * @param subsumedAnnotations other text annotations for the same entity 
+     * @param language the language of the analyzed text or <code>null</code>
+     * if not available.
      * @return the suggested {@link Entity entities}
      * @throws EntityhubException On any Error while looking up Entities via
      * the Entityhub
      */
     protected final List<Entity> computeEntityRecommentations(ReferencedSite 
site,
             NamedEntity namedEntity,
-            List<UriRef> subsumedAnnotations) throws EntityhubException {
+            List<UriRef> subsumedAnnotations, String language) throws 
EntityhubException {
         // First get the required properties for the parsed textAnnotation
         // ... and check the values
 
@@ -406,7 +422,16 @@ public class NamedEntityTaggingEngine 
                 entityhub.getQueryFactory().createFieldQuery() : 
                     site.getQueryFactory().createFieldQuery();
         // replace spaces with plus to create an AND search for all words in 
the name!
-        query.setConstraint(nameField, new 
TextConstraint(namedEntity.getName()));// name.replace(' ', '+')));
+        Constraint labelConstraint;
+        //TODO: make case sensitivity configurable
+        boolean casesensitive = false;
+        if(language != null){
+            //search labels in the language and without language
+            labelConstraint = new 
TextConstraint(namedEntity.getName(),casesensitive,language,null);
+        } else {
+            labelConstraint = new 
TextConstraint(namedEntity.getName(),casesensitive);
+        }
+        query.setConstraint(nameField, labelConstraint);
         if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
             if (personState) {
                 if (personType != null) {
@@ -457,7 +482,7 @@ public class NamedEntityTaggingEngine 
             boolean found = false;
             while(labels.hasNext() && !found){
                 Text label = labels.next();
-                if(label.getLanguage() == null || 
label.getLanguage().startsWith("en")){
+                if(label.getLanguage() == null || (language != null && 
label.getLanguage().startsWith(language))){
                     
if(label.getText().equalsIgnoreCase(namedEntity.getName())){
                         found = true;
                     }

Modified: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 Thu May 17 11:28:41 2012
@@ -463,32 +463,36 @@ public class KeywordLinkingEngine 
         }
     }
     /**
-     * Extracts the language of the parsed ContentItem from the metadata
+     * Extracts the language of the parsed ContentItem by using
+     * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and "en" as
+     * default.
      * @param ci the content item
      * @return the language
      */
     private String extractLanguage(ContentItem ci) {
-        MGraph metadata = ci.getMetadata();
-        Iterator<Triple> langaugeEnhancementCreatorTriples = 
-            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
-        if(langaugeEnhancementCreatorTriples.hasNext()){
-            String lang = EnhancementEngineHelper.getString(metadata, 
-                langaugeEnhancementCreatorTriples.next().getSubject(), 
-                Properties.DC_LANGUAGE);
-            if(lang != null){
-                return lang;
-            } else {
-                log.warn("Unable to extract language for ContentItem %s! The 
Enhancement of the %s is missing the %s property",
-                    new 
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
-                log.warn(" ... return 'en' as default");
-                return "en";
-            }
+        String lang = EnhancementEngineHelper.getLanguage(ci);
+//        if(lang != null){
+//        MGraph metadata = ci.getMetadata();
+//        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+//            metadata.filter(null, Properties.DC_CREATOR, 
LANG_ID_ENGINE_NAME);
+//        if(langaugeEnhancementCreatorTriples.hasNext()){
+//            String lang = EnhancementEngineHelper.getString(metadata, 
+//                langaugeEnhancementCreatorTriples.next().getSubject(), 
+//                Properties.DC_LANGUAGE);
+        if(lang != null){
+            return lang;
         } else {
-            log.warn("Unable to extract language for ContentItem %s! Is the %s 
active?",
-                
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+            log.warn("Unable to extract language for ContentItem %s! The 
Enhancement of the %s is missing the %s property",
+                new 
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
             log.warn(" ... return 'en' as default");
             return "en";
         }
+//        } else {
+//            log.warn("Unable to extract language for ContentItem %s! Is the 
%s active?",
+//                
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+//            log.warn(" ... return 'en' as default");
+//            return "en";
+//        }
     }
 
     

Modified: incubator/stanbol/trunk/enhancer/engines/langid/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/pom.xml?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/langid/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/langid/pom.xml Thu May 17 11:28:41 
2012
@@ -113,6 +113,18 @@
     </dependency>
 
     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

Modified: 
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
 Thu May 17 11:28:41 2012
@@ -17,6 +17,8 @@
 package org.apache.stanbol.enhancer.engines.langid;
 
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -34,6 +36,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -73,7 +76,11 @@ public class LangIdEnhancementEngine 
 
     /**
      * The default value for the Execution of this Engine. Currently set to
-     * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+     * NOTE: this information is used by the default and weighed {@link Chain}
+     * implementation to determine the processing order of 
+     * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
+     * use this information.
      */
     public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
 
@@ -162,6 +169,7 @@ public class LangIdEnhancementEngine 
         try {
             UriRef textEnhancement = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
             g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new 
PlainLiteralImpl(language)));
+            g.add(new TripleImpl(textEnhancement, DC_TYPE, 
DCTERMS_LINGUISTIC_SYSTEM));
         } finally {
             ci.getLock().writeLock().unlock();
         }

Modified: 
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
 Thu May 17 11:28:41 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.langid.core;
 
+import static junit.framework.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
@@ -35,16 +36,21 @@ import org.junit.Test;
  */
 public class LangIdTest {
 
+    private static final String TEST_FILE_NAME = "en.txt";
     /**
-     * This contains the text categorizer to test.
+     * This contains the text used for testing
      */
-  
+    private static String text;
     /**
      * This initializes the text categorizer.
      */
     @BeforeClass
     public static void oneTimeSetUp() throws IOException {
         LanguageIdentifier.initProfiles();
+        InputStream in = LangIdTest.class.getClassLoader().getResourceAsStream(
+            TEST_FILE_NAME);
+        assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+        text = IOUtils.toString(in);
     }
 
     /**
@@ -54,16 +60,8 @@ public class LangIdTest {
      */
     @Test
     public void testLangId() throws IOException {
-        String testFileName = "en.txt";
-
-        InputStream in = this.getClass().getClassLoader().getResourceAsStream(
-                testFileName);
-        assertNotNull("failed to load resource " + testFileName, in);
-
-        String text = IOUtils.toString(in);
         LanguageIdentifier tc = new LanguageIdentifier(text);
         String language = tc.getLanguage();
         assertEquals("en", language);
     }
-
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
 Thu May 17 11:28:41 2012
@@ -257,7 +257,7 @@ public class OpenCalaisEngine 
 
     public int canEnhance(ContentItem ci) throws EngineException {
         if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null){
-            String language = getMetadataLanguage(ci.getMetadata(), null);
+            String language = EnhancementEngineHelper.getLanguage(ci);
             if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
                 log.info("OpenCalais can not process ContentItem {} because "
                     + "language {} is not supported (supported: {})",
@@ -320,7 +320,7 @@ public class OpenCalaisEngine 
     public void createEnhancements(Collection<CalaisEntityOccurrence> occs, 
ContentItem ci) {
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         final Language language; // used for plain literals representing parts 
fo the content
-        String langString = getMetadataLanguage(ci.getMetadata(), null);
+        String langString = EnhancementEngineHelper.getLanguage(ci);
         if(langString != null && !langString.isEmpty()){
             language = new Language(langString);
         } else {
@@ -593,25 +593,6 @@ public class OpenCalaisEngine 
                 urlConn.getInputStream(), responseEncoding);
     }
 
-    public String getMetadataLanguage(MGraph model, NonLiteral subj) {
-        Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
-        if (it.hasNext()) {
-            Resource langNode = it.next().getObject();
-            return getLexicalForm(langNode);
-        }
-        return null;
-    }
-
-    public String getLexicalForm(Resource res) {
-        if (res == null) {
-            return null;
-        } else if (res instanceof Literal) {
-            return ((Literal) res).getLexicalForm();
-        } else {
-            return res.toString();
-        }
-    }
-
     /**
      * The activate method.
      *

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 Thu May 17 11:28:41 2012
@@ -473,31 +473,34 @@ public class NEREngineCore implements En
      */
     public static final Literal LANG_ID_ENGINE_NAME = 
LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
     /**
-     * Extracts the language of the parsed ContentItem from the metadata
+     * Extracts the language of the parsed ContentItem by using
+     * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and 
+     * {@link #defaultLang} as default
      * @param ci the content item
      * @return the language
      */
     private String extractLanguage(ContentItem ci) {
-        MGraph metadata = ci.getMetadata();
-        Iterator<Triple> langaugeEnhancementCreatorTriples = 
-            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
-        if(langaugeEnhancementCreatorTriples.hasNext()){
-            String lang = EnhancementEngineHelper.getString(metadata, 
-                langaugeEnhancementCreatorTriples.next().getSubject(), 
-                Properties.DC_LANGUAGE);
-            if(lang != null){
-                return lang;
-            } else {
-                log.info("Unable to extract language for ContentItem %s! The 
Enhancement of the %s is missing the %s property",
-                    new 
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
-                log.info(" ... return '{}' as default",defaultLang);
-                return defaultLang;
-            }
+        String lang = EnhancementEngineHelper.getLanguage(ci);
+//        MGraph metadata = ci.getMetadata();
+//        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+//            metadata.filter(null, Properties.DC_CREATOR, 
LANG_ID_ENGINE_NAME);
+//        if(langaugeEnhancementCreatorTriples.hasNext()){
+//            String lang = EnhancementEngineHelper.getString(metadata, 
+//                langaugeEnhancementCreatorTriples.next().getSubject(), 
+//                Properties.DC_LANGUAGE);
+        if(lang != null){
+            return lang;
         } else {
-            log.info("Unable to extract language for ContentItem {}! Is the {} 
active?",
-                
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+            log.info("Unable to extract language for ContentItem %s! The 
Enhancement of the %s is missing the %s property",
+                new 
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
             log.info(" ... return '{}' as default",defaultLang);
             return defaultLang;
         }
+//        } else {
+//            log.info("Unable to extract language for ContentItem {}! Is the 
{} active?",
+//                
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+//            log.info(" ... return '{}' as default",defaultLang);
+//            return defaultLang;
+//        }
     }
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
 Thu May 17 11:28:41 2012
@@ -16,7 +16,10 @@
  */
 package org.apache.stanbol.enhancer.engines.zemanta.impl;
 
+import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
+import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTopicEnhancement;
 import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -73,6 +76,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
@@ -232,6 +236,8 @@ public class ZemantaEnhancementEngine 
     
     protected void processCategories(MGraph results, MGraph enhancements, 
UriRef ciId) {
         Iterator<Triple> categories = results.filter(null, RDF_TYPE, 
ZemantaOntologyEnum.Category.getUri());
+        //add the root Text annotation as soon as the first TopicAnnotation is 
added.
+        UriRef textAnnotation = null;
         while (categories.hasNext()) {
             NonLiteral category = categories.next().getSubject();
             log.debug("process category " + category);
@@ -245,8 +251,16 @@ public class ZemantaEnhancementEngine 
                 if (categorisationScheme != null && 
categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
                     String categoryTitle = 
EnhancementEngineHelper.getString(results, target, 
ZemantaOntologyEnum.title.getUri());
                     if (categoryTitle != null) {
-                        //now write the Stanbol Enhancer entity enhancement
-                        UriRef categoryEnhancement = 
EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
+                        if(textAnnotation == null){
+                            //this is the first category ... create the 
TextAnnotation used
+                            //to link all fise:TopicAnnotations
+                            textAnnotation = 
createTextEnhancement(enhancements, this, ciId);
+                            enhancements.add(new 
TripleImpl(textAnnotation,DC_TYPE,SKOS_CONCEPT));
+                        }
+                        //now write the TopicAnnotation
+                        UriRef categoryEnhancement = 
createTopicEnhancement(enhancements, this, ciId);
+                        //make related to the EntityAnnotation
+                        enhancements.add(new TripleImpl(categoryEnhancement, 
DC_RELATION, textAnnotation));
                         //write the title
                         enhancements.add(new TripleImpl(categoryEnhancement, 
ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle)));
                         //write the reference
@@ -256,14 +270,16 @@ public class ZemantaEnhancementEngine 
                         }
                         //write the confidence
                         if (confidence != null) {
-                            enhancements.add(
-                                    new TripleImpl(categoryEnhancement, 
ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
+                            enhancements.add(new 
TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE, 
+                                
literalFactory.createTypedLiteral(confidence)));
                         }
-                        //we need to write the entity type and the dc:type
+                        //we need to write the fise:entity-type
+                        //as of STANBOL-617 we use now both the 
zemanta:Category AND the skos:Concept
+                        //type. dc:type is no longer used as this is only used 
by fise:TextAnnotations
                         // see 
http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories
                         // for more Information
-                        enhancements.add(new TripleImpl(categoryEnhancement, 
DC_TYPE, ENHANCER_CATEGORY));
-                        //Use the Zemanta Category as type for the referred 
Entity
+                        enhancements.add(new TripleImpl(categoryEnhancement, 
ENHANCER_ENTITY_TYPE, SKOS_CONCEPT));
+                        //Use also Zemanta Category as type for the referred 
Entity
                         enhancements.add(new TripleImpl(categoryEnhancement, 
ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
                     } else {
                         log.warn("Unable to process category " + category + " 
because no title is present");

Modified: 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
 Thu May 17 11:28:41 2012
@@ -120,6 +120,8 @@ public class ZemantaEnhancementEngineTes
         log.info(textAnnoNum + " TextAnnotations found ...");
         int entityAnnoNum = 
EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(),expectedValues);
         log.info(entityAnnoNum + " EntityAnnotations found ...");
+        int topicAnnoNum = 
EnhancementStructureHelper.validateAllTopicAnnotations(ci.getMetadata(),expectedValues);
+        log.info(entityAnnoNum + " TopicAnnotations found ...");
     }
 
     public static void main(String[] args) throws Exception{


Reply via email to