Author: rwesten
Date: Sat Feb 25 16:05:32 2012
New Revision: 1293633

URL: http://svn.apache.org/viewvc?rev=1293633&view=rev
Log:
Fixes STANBOl-509: All EnhancementEngines that create TextAnnotations now use 
PlainLiterals; Added also a check for that to the integration tests

Modified:
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
    
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
    
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
    
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/DefaultChainTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1293633&r1=1293632&r2=1293633&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 Sat Feb 25 16:05:32 2012
@@ -387,6 +387,10 @@ public class KeywordLinkingEngine 
      * @param language
      */
     private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> 
linkedEntities, String language) {
+        Language languageObject = null;
+        if(language != null && !language.isEmpty()){
+            languageObject = new Language(language);
+        }
         MGraph metadata = ci.getMetadata();
         for(LinkedEntity linkedEntity : linkedEntities){
             Collection<UriRef> textAnnotations = new 
ArrayList<UriRef>(linkedEntity.getOccurrences().size());
@@ -402,10 +406,10 @@ public class KeywordLinkingEngine 
                     literalFactory.createTypedLiteral(occurrence.getEnd())));
                 metadata.add(new TripleImpl(textAnnotation, 
                     Properties.ENHANCER_SELECTION_CONTEXT, 
-                    
literalFactory.createTypedLiteral(occurrence.getContext())));
+                    new 
PlainLiteralImpl(occurrence.getContext(),languageObject)));
                 metadata.add(new TripleImpl(textAnnotation, 
                     Properties.ENHANCER_SELECTED_TEXT, 
-                    
literalFactory.createTypedLiteral(occurrence.getSelectedText())));
+                    new 
PlainLiteralImpl(occurrence.getSelectedText(),languageObject)));
                 metadata.add(new TripleImpl(textAnnotation, 
                     Properties.ENHANCER_CONFIDENCE, 
                     
literalFactory.createTypedLiteral(linkedEntity.getScore())));

Modified: 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1293633&r1=1293632&r2=1293633&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
 Sat Feb 25 16:05:32 2012
@@ -50,6 +50,7 @@ import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.clerezza.rdf.core.Graph;
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
@@ -58,6 +59,7 @@ import org.apache.clerezza.rdf.core.Reso
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.access.TcManager;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Parser;
@@ -317,6 +319,13 @@ public class OpenCalaisEngine 
      */
     public void createEnhancements(Collection<CalaisEntityOccurrence> occs, 
ContentItem ci) {
         LiteralFactory literalFactory = LiteralFactory.getInstance();
+        final Language language; // used for plain literals representing parts 
fo the content
+        String langString = getMetadataLanguage(ci.getMetadata(), null);
+        if(langString != null && !langString.isEmpty()){
+            language = new Language(langString);
+        } else {
+            language = null;
+        }
         //TODO create TextEnhancement (form, start, end, type?) and 
EntityAnnotation (id, name, type)
         HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, 
UriRef>();
         for (CalaisEntityOccurrence occ : occs) {
@@ -326,14 +335,14 @@ public class OpenCalaisEngine 
             model.add(new TripleImpl(textAnnotation, DC_TYPE, occ.type));
             // for autotagger use the name instead of the matched term (that 
might be a pronoun!)
             if (onlyNERMode) {
-                model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT,literalFactory.createTypedLiteral(occ.name)));
+                model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT,new PlainLiteralImpl(occ.name,language)));
             }
             else {
-                model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT, literalFactory.createTypedLiteral(occ.exact)));
+                model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.exact,language)));
             }
             model.add(new TripleImpl(textAnnotation, ENHANCER_START, 
literalFactory.createTypedLiteral(occ.offset)));
             model.add(new TripleImpl(textAnnotation, ENHANCER_END, 
literalFactory.createTypedLiteral(occ.offset + occ.length)));
-            model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTION_CONTEXT, literalFactory.createTypedLiteral(occ.context)));
+            model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
             //create EntityAnnotation only once but add a reference to the 
textAnnotation
             if (entityAnnotationMap.containsKey(occ.id)) {
                 model.add(new TripleImpl(entityAnnotationMap.get(occ.id), 
DC_RELATION, textAnnotation));

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1293633&r1=1293632&r2=1293633&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 Sat Feb 25 16:05:32 2012
@@ -46,11 +46,13 @@ import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.Span;
 
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
@@ -172,7 +174,7 @@ public class NEREngineCore implements En
                 if(nameFinderModel == null){
                     log.info("No NER Model for {} and language {} 
available!",typeLabel,language);
                 } else {
-                    findNamedEntities(ci, text, typeUri, typeLabel, 
nameFinderModel);
+                    findNamedEntities(ci, text, language, typeUri, typeLabel, 
nameFinderModel);
                 }
             }
         } catch (Exception e) {
@@ -182,6 +184,7 @@ public class NEREngineCore implements En
 
     protected void findNamedEntities(final ContentItem ci,
                                      final String text,
+                                     final String lang,
                                      final UriRef typeUri,
                                      final String typeLabel,
                                      final TokenNameFinderModel 
nameFinderModel) {
@@ -193,6 +196,12 @@ public class NEREngineCore implements En
             log.warn("NULL was parsed as text for content item " + 
ci.getUri().getUnicodeString() + "! -> call ignored");
             return;
         }
+        final Language language;
+        if(lang != null && !lang.isEmpty()){
+            language = new Language(lang);
+        } else {
+            language = null;
+        }
         log.debug("findNamedEntities typeUri={}, type={}, text=", 
                 new Object[]{ typeUri, typeLabel, StringUtils.abbreviate(text, 
100) });
         LiteralFactory literalFactory = LiteralFactory.getInstance();
@@ -211,10 +220,10 @@ public class NEREngineCore implements En
     
                 for (NameOccurrence occurrence : occurrences) {
                     UriRef textAnnotation = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
-                    g.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT, literalFactory
-                            .createTypedLiteral(name)));
-                    g.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTION_CONTEXT, literalFactory
-                            .createTypedLiteral(occurrence.context)));
+                    g.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT, 
+                        new PlainLiteralImpl(name, language)));
+                    g.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTION_CONTEXT, 
+                        new PlainLiteralImpl(occurrence.context, language)));
                     g.add(new TripleImpl(textAnnotation, DC_TYPE, typeUri));
                     g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, 
literalFactory
                             .createTypedLiteral(occurrence.confidence)));

Modified: 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java?rev=1293633&r1=1293632&r2=1293633&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
 Sat Feb 25 16:05:32 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.zemanta.impl;
 
+import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -48,6 +49,7 @@ import org.apache.clerezza.rdf.core.NonL
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.commons.io.IOUtils;
@@ -277,27 +279,27 @@ public class ZemantaEnhancementEngine 
         Iterator<Triple> recognitions = results.filter(null, RDF_TYPE, 
ZemantaOntologyEnum.Recognition.getUri());
         while (recognitions.hasNext()) {
             NonLiteral recognition = recognitions.next().getSubject();
-            log.info("process recognition " + recognition);
+            log.debug("process recognition " + recognition);
             //first get everything we need for the textAnnotations
             Double confidence = parseConfidence(results, recognition);
-            log.info(" > confidence :" + confidence);
+            log.debug(" > confidence :" + confidence);
             String anchor = EnhancementEngineHelper.getString(results, 
recognition, ZemantaOntologyEnum.anchor.getUri());
-            log.info(" > anchor :" + anchor);
+            log.debug(" > anchor :" + anchor);
             Collection<NonLiteral> textAnnotations = 
processTextAnnotation(enhancements, text, ciId, anchor, confidence);
-            log.info(" > number of textAnnotations :" + 
textAnnotations.size());
+            log.debug(" > number of textAnnotations :" + 
textAnnotations.size());
 
             //second we need to create the EntityAnnotation that represent the
             //recognition
             NonLiteral object = EnhancementEngineHelper.getReference(results, 
recognition, ZemantaOntologyEnum.object.getUri());
-            log.info(" > object :" + object);
+            log.debug(" > object :" + object);
             //The targets represent the linked entities
             //  ... and yes there can be more of them!
             //TODO: can we create an EntityAnnotation with several referred 
entities?
             //      Should we use the owl:sameAs to decide that!
             Set<UriRef> sameAsSet = new HashSet<UriRef>();
-            for (Iterator<UriRef> sameAs = 
EnhancementEngineHelper.getReferences(results, object, 
ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); 
sameAsSet.add(sameAs.next()))
+            for (Iterator<UriRef> sameAs = getReferences(results, object, 
ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); 
sameAsSet.add(sameAs.next()))
                 ;
-            log.info(" > sameAs :" + sameAsSet);
+            log.debug(" > sameAs :" + sameAsSet);
             //now parse the targets and look if there are others than the one
             //merged by using sameAs
             Iterator<UriRef> targets = 
EnhancementEngineHelper.getReferences(results, object, 
ZemantaOntologyEnum.target.getUri());
@@ -305,12 +307,12 @@ public class ZemantaEnhancementEngine 
             while (targets.hasNext()) {
                 //the entityRef is the URL of the target
                 UriRef entity = targets.next();
-                log.info("    -  target :" + entity);
+                log.debug("    -  target :" + entity);
                 UriRef targetType = 
EnhancementEngineHelper.getReference(results, entity, 
ZemantaOntologyEnum.targetType.getUri());
-                log.info("       o type :" + targetType);
+                log.debug("       o type :" + targetType);
                 if 
(ZemantaOntologyEnum.targetType_RDF.getUri().equals(targetType)) {
                     String targetTitle = 
EnhancementEngineHelper.getString(results, entity, 
ZemantaOntologyEnum.title.getUri());
-                    log.info("       o title :" + targetTitle);
+                    log.debug("       o title :" + targetTitle);
                     if (sameAsSet.contains(entity)) {
                         if (title == null) {
                             title = targetTitle;
@@ -342,7 +344,7 @@ public class ZemantaEnhancementEngine 
                         new TripleImpl(entityEnhancement, 
ENHANCER_ENTITY_REFERENCE, entity));
             }
             enhancements.add(
-                    new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, 
literalFactory.createTypedLiteral(title)));
+                    new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, 
new PlainLiteralImpl(title)));
         }
     }
 
@@ -395,7 +397,7 @@ public class ZemantaEnhancementEngine 
     private Collection<NonLiteral> processTextAnnotation(MGraph enhancements, 
String text, UriRef ciId, String anchor, Double confidence) {
         Collection<NonLiteral> textAnnotations = new ArrayList<NonLiteral>();
         int anchorLength = anchor.length();
-        Literal anchorLiteral = literalFactory.createTypedLiteral(anchor);
+        Literal anchorLiteral = new PlainLiteralImpl(anchor);
         //first search for existing TextAnnotations for the anchor
         Map<Integer, Collection<NonLiteral>> existingTextAnnotationsMap = 
searchExistingTextAnnotations(enhancements, anchorLiteral);
 

Modified: 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/DefaultChainTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/DefaultChainTest.java?rev=1293633&r1=1293632&r2=1293633&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/DefaultChainTest.java
 (original)
+++ 
incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/DefaultChainTest.java
 Sat Feb 25 16:05:32 2012
@@ -71,13 +71,16 @@ public class DefaultChainTest extends En
         .assertContentRegexp(
                 //check execution metadata
                 
"http://stanbol.apache.org/ontology/enhancer/executionMetadata#executionPart";,
-                //check execution of metaxa & if executionPlan is incuded
+                //check execution of metaxa & if executionPlan is included
                 
"http://stanbol.apache.org/ontology/enhancer/executionplan#engine.*metaxa";, 
                 "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine";,
                 "http://purl.org/dc/terms/language.*en";,
                 "http://fise.iks-project.eu/ontology/entity-label.*Paris";,
                 
"http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore";,
-                "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley"
+                "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley",
+                //the following two lines test the use of plain literals (see 
STANBOL-509)
+                "http://fise.iks-project.eu/ontology/selected-text.*\"Bob 
Marley\"@en",
+                
"http://fise.iks-project.eu/ontology/selection-context>.*people such as Bob 
Marley.\"@en"
                 )
         .generateDocumentation(
                 documentor,


Reply via email to