Author: rwesten
Date: Thu May 17 11:28:41 2012
New Revision: 1339557
URL: http://svn.apache.org/viewvc?rev=1339557&view=rev
Log:
STANBOL-613
* merged changes to EnhancementEngines related to STANBOL-613 from the CELI
enhancement engine branch back to trunk
STANBOL-617
* Adapted Zemanta EnhancementEngine to comply to the new rules for
TopicAnnotations
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
incubator/stanbol/trunk/enhancer/engines/langid/pom.xml
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
Thu May 17 11:28:41 2012
@@ -63,6 +63,7 @@ public class EnhancementRDFUtils {
* @param entity
* the related entity
* @param nameField the field used to extract the name
+ * @param lang the preferred language to include
*/
public static UriRef writeEntityAnnotation(EnhancementEngine engine,
LiteralFactory literalFactory,
@@ -70,7 +71,8 @@ public class EnhancementRDFUtils {
UriRef contentItemId,
Collection<NonLiteral>
relatedEnhancements,
Representation rep,
- String nameField) {
+ String nameField,
+ String lang) {
// 1. check if the returned Entity does has a label -> if not return
null
// add labels (set only a single label. Use "en" if available!
Text label = null;
@@ -81,7 +83,7 @@ public class EnhancementRDFUtils {
label = actLabel;
} else {
//use startWith to match also en-GB and en-US ...
- if (actLabel.getLanguage() != null &&
actLabel.getLanguage().startsWith("en")) {
+ if (actLabel.getLanguage() != null &&
actLabel.getLanguage().startsWith(lang)) {
label = actLabel;
}
}
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
Thu May 17 11:28:41 2012
@@ -49,6 +49,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -61,6 +62,7 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
@@ -126,6 +128,11 @@ public class NamedEntityTaggingEngine
@Property(intValue=0)
public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
+ /**
+ * The default language for labels included in the enhancement metadata
+ * (if not available for the parsed content).
+ */
+ private static final String DEFAULT_LANGUAGE = "en";
/**
* Service of the Entityhub that manages all the active referenced Site.
This Service is used to lookup the
@@ -154,6 +161,7 @@ public class NamedEntityTaggingEngine
public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
+
/**
* State if text annotations of type {@link
OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
*/
@@ -319,8 +327,11 @@ public class NamedEntityTaggingEngine
LiteralFactory literalFactory = LiteralFactory.getInstance();
// Retrieve the existing text annotations (requires read lock)
Map<NamedEntity,List<UriRef>> textAnnotations = new
HashMap<NamedEntity,List<UriRef>>();
+ //the language extracted for the parsed content or NULL if not
available
+ String contentLangauge;
ci.getLock().readLock().lock();
try {
+ contentLangauge = EnhancementEngineHelper.getLanguage(ci);
for (Iterator<Triple> it = graph.filter(null, RDF_TYPE,
TechnicalClasses.ENHANCER_TEXTANNOTATION); it
.hasNext();) {
UriRef uri = (UriRef) it.next().getSubject();
@@ -346,7 +357,7 @@ public class NamedEntityTaggingEngine
for (Entry<NamedEntity,List<UriRef>> entry :
textAnnotations.entrySet()) {
try {
List<Entity> entitySuggestions = computeEntityRecommentations(
- site, entry.getKey(),entry.getValue());
+ site, entry.getKey(),entry.getValue(),contentLangauge);
if(entitySuggestions != null && !entitySuggestions.isEmpty()){
suggestions.put(entry.getKey(), entitySuggestions);
}
@@ -366,7 +377,10 @@ public class NamedEntityTaggingEngine
for(Entity suggestion : entitySuggestions.getValue()){
log.debug("Add Suggestion {} for {}", suggestion.getId(),
entitySuggestions.getKey());
EnhancementRDFUtils.writeEntityAnnotation(this,
literalFactory, graph, ci.getUri(),
- annotationsToRelate, suggestion.getRepresentation(),
nameField);
+ annotationsToRelate, suggestion.getRepresentation(),
nameField,
+ //TODO: maybe we want labels in a different language
than the
+ // language of the content (e.g. Accept-Language
header)?!
+ contentLangauge == null ? DEFAULT_LANGUAGE :
contentLangauge);
if (dereferenceEntities) {
entityData.put(suggestion.getId(),
suggestion.getRepresentation());
}
@@ -391,13 +405,15 @@ public class NamedEntityTaggingEngine
* @param contentItemId the id of the contentItem
* @param textAnnotation the text annotation to enhance
* @param subsumedAnnotations other text annotations for the same entity
+ * @param language the language of the analyzed text or <code>null</code>
+ * if not available.
* @return the suggested {@link Entity entities}
* @throws EntityhubException On any Error while looking up Entities via
* the Entityhub
*/
protected final List<Entity> computeEntityRecommentations(ReferencedSite
site,
NamedEntity namedEntity,
- List<UriRef> subsumedAnnotations) throws EntityhubException {
+ List<UriRef> subsumedAnnotations, String language) throws
EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
@@ -406,7 +422,16 @@ public class NamedEntityTaggingEngine
entityhub.getQueryFactory().createFieldQuery() :
site.getQueryFactory().createFieldQuery();
// replace spaces with plus to create an AND search for all words in
the name!
- query.setConstraint(nameField, new
TextConstraint(namedEntity.getName()));// name.replace(' ', '+')));
+ Constraint labelConstraint;
+ //TODO: make case sensitivity configurable
+ boolean casesensitive = false;
+ if(language != null){
+ //search labels in the language and without language
+ labelConstraint = new
TextConstraint(namedEntity.getName(),casesensitive,language,null);
+ } else {
+ labelConstraint = new
TextConstraint(namedEntity.getName(),casesensitive);
+ }
+ query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
if (personState) {
if (personType != null) {
@@ -457,7 +482,7 @@ public class NamedEntityTaggingEngine
boolean found = false;
while(labels.hasNext() && !found){
Text label = labels.next();
- if(label.getLanguage() == null ||
label.getLanguage().startsWith("en")){
+ if(label.getLanguage() == null || (language != null &&
label.getLanguage().startsWith(language))){
if(label.getText().equalsIgnoreCase(namedEntity.getName())){
found = true;
}
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
Thu May 17 11:28:41 2012
@@ -463,32 +463,36 @@ public class KeywordLinkingEngine
}
}
/**
- * Extracts the language of the parsed ContentItem from the metadata
+ * Extracts the language of the parsed ContentItem by using
+ * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and "en" as
+ * default.
* @param ci the content item
* @return the language
*/
private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples =
- metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
- if(langaugeEnhancementCreatorTriples.hasNext()){
- String lang = EnhancementEngineHelper.getString(metadata,
- langaugeEnhancementCreatorTriples.next().getSubject(),
- Properties.DC_LANGUAGE);
- if(lang != null){
- return lang;
- } else {
- log.warn("Unable to extract language for ContentItem %s! The
Enhancement of the %s is missing the %s property",
- new
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
- log.warn(" ... return 'en' as default");
- return "en";
- }
+ String lang = EnhancementEngineHelper.getLanguage(ci);
+// if(lang != null){
+// MGraph metadata = ci.getMetadata();
+// Iterator<Triple> langaugeEnhancementCreatorTriples =
+// metadata.filter(null, Properties.DC_CREATOR,
LANG_ID_ENGINE_NAME);
+// if(langaugeEnhancementCreatorTriples.hasNext()){
+// String lang = EnhancementEngineHelper.getString(metadata,
+// langaugeEnhancementCreatorTriples.next().getSubject(),
+// Properties.DC_LANGUAGE);
+ if(lang != null){
+ return lang;
} else {
- log.warn("Unable to extract language for ContentItem %s! Is the %s
active?",
-
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+ log.warn("Unable to extract language for ContentItem %s! The
Enhancement of the %s is missing the %s property",
+ new
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
log.warn(" ... return 'en' as default");
return "en";
}
+// } else {
+// log.warn("Unable to extract language for ContentItem %s! Is the
%s active?",
+//
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+// log.warn(" ... return 'en' as default");
+// return "en";
+// }
}
Modified: incubator/stanbol/trunk/enhancer/engines/langid/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/pom.xml?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/langid/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/langid/pom.xml Thu May 17 11:28:41
2012
@@ -113,6 +113,18 @@
</dependency>
<dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
Modified:
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
Thu May 17 11:28:41 2012
@@ -17,6 +17,8 @@
package org.apache.stanbol.enhancer.engines.langid;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
import java.io.IOException;
import java.util.Collections;
@@ -34,6 +36,7 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -73,7 +76,11 @@ public class LangIdEnhancementEngine
/**
* The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+ * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+ * NOTE: this information is used by the default and weighed {@link Chain}
+ * implementation to determine the processing order of
+ * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
+ * use this information.
*/
public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
@@ -162,6 +169,7 @@ public class LangIdEnhancementEngine
try {
UriRef textEnhancement =
EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new
PlainLiteralImpl(language)));
+ g.add(new TripleImpl(textEnhancement, DC_TYPE,
DCTERMS_LINGUISTIC_SYSTEM));
} finally {
ci.getLock().writeLock().unlock();
}
Modified:
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
Thu May 17 11:28:41 2012
@@ -16,6 +16,7 @@
*/
package org.apache.stanbol.enhancer.engines.langid.core;
+import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -35,16 +36,21 @@ import org.junit.Test;
*/
public class LangIdTest {
+ private static final String TEST_FILE_NAME = "en.txt";
/**
- * This contains the text categorizer to test.
+ * This contains the text used for testing
*/
-
+ private static String text;
/**
* This initializes the text categorizer.
*/
@BeforeClass
public static void oneTimeSetUp() throws IOException {
LanguageIdentifier.initProfiles();
+ InputStream in = LangIdTest.class.getClassLoader().getResourceAsStream(
+ TEST_FILE_NAME);
+ assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+ text = IOUtils.toString(in);
}
/**
@@ -54,16 +60,8 @@ public class LangIdTest {
*/
@Test
public void testLangId() throws IOException {
- String testFileName = "en.txt";
-
- InputStream in = this.getClass().getClassLoader().getResourceAsStream(
- testFileName);
- assertNotNull("failed to load resource " + testFileName, in);
-
- String text = IOUtils.toString(in);
LanguageIdentifier tc = new LanguageIdentifier(text);
String language = tc.getLanguage();
assertEquals("en", language);
}
-
}
Modified:
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
Thu May 17 11:28:41 2012
@@ -257,7 +257,7 @@ public class OpenCalaisEngine
public int canEnhance(ContentItem ci) throws EngineException {
if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null){
- String language = getMetadataLanguage(ci.getMetadata(), null);
+ String language = EnhancementEngineHelper.getLanguage(ci);
if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
log.info("OpenCalais can not process ContentItem {} because "
+ "language {} is not supported (supported: {})",
@@ -320,7 +320,7 @@ public class OpenCalaisEngine
public void createEnhancements(Collection<CalaisEntityOccurrence> occs,
ContentItem ci) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
final Language language; // used for plain literals representing parts
fo the content
- String langString = getMetadataLanguage(ci.getMetadata(), null);
+ String langString = EnhancementEngineHelper.getLanguage(ci);
if(langString != null && !langString.isEmpty()){
language = new Language(langString);
} else {
@@ -593,25 +593,6 @@ public class OpenCalaisEngine
urlConn.getInputStream(), responseEncoding);
}
- public String getMetadataLanguage(MGraph model, NonLiteral subj) {
- Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource langNode = it.next().getObject();
- return getLexicalForm(langNode);
- }
- return null;
- }
-
- public String getLexicalForm(Resource res) {
- if (res == null) {
- return null;
- } else if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
/**
* The activate method.
*
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
Thu May 17 11:28:41 2012
@@ -473,31 +473,34 @@ public class NEREngineCore implements En
*/
public static final Literal LANG_ID_ENGINE_NAME =
LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
/**
- * Extracts the language of the parsed ContentItem from the metadata
+ * Extracts the language of the parsed ContentItem by using
+ * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and
+ * {@link #defaultLang} as default
* @param ci the content item
* @return the language
*/
private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples =
- metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
- if(langaugeEnhancementCreatorTriples.hasNext()){
- String lang = EnhancementEngineHelper.getString(metadata,
- langaugeEnhancementCreatorTriples.next().getSubject(),
- Properties.DC_LANGUAGE);
- if(lang != null){
- return lang;
- } else {
- log.info("Unable to extract language for ContentItem %s! The
Enhancement of the %s is missing the %s property",
- new
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
- log.info(" ... return '{}' as default",defaultLang);
- return defaultLang;
- }
+ String lang = EnhancementEngineHelper.getLanguage(ci);
+// MGraph metadata = ci.getMetadata();
+// Iterator<Triple> langaugeEnhancementCreatorTriples =
+// metadata.filter(null, Properties.DC_CREATOR,
LANG_ID_ENGINE_NAME);
+// if(langaugeEnhancementCreatorTriples.hasNext()){
+// String lang = EnhancementEngineHelper.getString(metadata,
+// langaugeEnhancementCreatorTriples.next().getSubject(),
+// Properties.DC_LANGUAGE);
+ if(lang != null){
+ return lang;
} else {
- log.info("Unable to extract language for ContentItem {}! Is the {}
active?",
-
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+ log.info("Unable to extract language for ContentItem %s! The
Enhancement of the %s is missing the %s property",
+ new
Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
log.info(" ... return '{}' as default",defaultLang);
return defaultLang;
}
+// } else {
+// log.info("Unable to extract language for ContentItem {}! Is the
{} active?",
+//
ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+// log.info(" ... return '{}' as default",defaultLang);
+// return defaultLang;
+// }
}
}
Modified:
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
Thu May 17 11:28:41 2012
@@ -16,7 +16,10 @@
*/
package org.apache.stanbol.enhancer.engines.zemanta.impl;
+import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
+import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTopicEnhancement;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -73,6 +76,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
@@ -232,6 +236,8 @@ public class ZemantaEnhancementEngine
protected void processCategories(MGraph results, MGraph enhancements,
UriRef ciId) {
Iterator<Triple> categories = results.filter(null, RDF_TYPE,
ZemantaOntologyEnum.Category.getUri());
+ //add the root Text annotation as soon as the first TopicAnnotation is
added.
+ UriRef textAnnotation = null;
while (categories.hasNext()) {
NonLiteral category = categories.next().getSubject();
log.debug("process category " + category);
@@ -245,8 +251,16 @@ public class ZemantaEnhancementEngine
if (categorisationScheme != null &&
categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
String categoryTitle =
EnhancementEngineHelper.getString(results, target,
ZemantaOntologyEnum.title.getUri());
if (categoryTitle != null) {
- //now write the Stanbol Enhancer entity enhancement
- UriRef categoryEnhancement =
EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
+ if(textAnnotation == null){
+ //this is the first category ... create the
TextAnnotation used
+ //to link all fise:TopicAnnotations
+ textAnnotation =
createTextEnhancement(enhancements, this, ciId);
+ enhancements.add(new
TripleImpl(textAnnotation,DC_TYPE,SKOS_CONCEPT));
+ }
+ //now write the TopicAnnotation
+ UriRef categoryEnhancement =
createTopicEnhancement(enhancements, this, ciId);
+ //make related to the EntityAnnotation
+ enhancements.add(new TripleImpl(categoryEnhancement,
DC_RELATION, textAnnotation));
//write the title
enhancements.add(new TripleImpl(categoryEnhancement,
ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle)));
//write the reference
@@ -256,14 +270,16 @@ public class ZemantaEnhancementEngine
}
//write the confidence
if (confidence != null) {
- enhancements.add(
- new TripleImpl(categoryEnhancement,
ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
+ enhancements.add(new
TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE,
+
literalFactory.createTypedLiteral(confidence)));
}
- //we need to write the entity type and the dc:type
+ //we need to write the fise:entity-type
+ //as of STANBOL-617 we use now both the
zemanta:Category AND the skos:Concept
+ //type. dc:type is no longer used as this is only used
by fise:TextAnnotations
// see
http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories
// for more Information
- enhancements.add(new TripleImpl(categoryEnhancement,
DC_TYPE, ENHANCER_CATEGORY));
- //Use the Zemanta Category as type for the referred
Entity
+ enhancements.add(new TripleImpl(categoryEnhancement,
ENHANCER_ENTITY_TYPE, SKOS_CONCEPT));
+ //Use also Zemanta Category as type for the referred
Entity
enhancements.add(new TripleImpl(categoryEnhancement,
ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
} else {
log.warn("Unable to process category " + category + "
because no title is present");
Modified:
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java?rev=1339557&r1=1339556&r2=1339557&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
Thu May 17 11:28:41 2012
@@ -120,6 +120,8 @@ public class ZemantaEnhancementEngineTes
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum =
EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(),expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
+ int topicAnnoNum =
EnhancementStructureHelper.validateAllTopicAnnotations(ci.getMetadata(),expectedValues);
+ log.info(entityAnnoNum + " TopicAnnotations found ...");
}
public static void main(String[] args) throws Exception{