Author: rwesten
Date: Fri May 25 11:34:33 2012
New Revision: 1342580
URL: http://svn.apache.org/viewvc?rev=1342580&view=rev
Log:
STANBOL-612: The KeywordLinkingEnginenow validates the enhancement by using the
EnhancementStructureHelper Utility. Also added validation for STANBOL-625
Added:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
(contents, props changed)
- copied, changed from r1340995,
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
Modified:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1342580&r1=1342579&r2=1342580&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
Fri May 25 11:34:33 2012
@@ -240,7 +240,11 @@ public class KeywordLinkingEngine
unbind = "disableOfflineMode",
strategy = ReferenceStrategy.EVENT)
private OfflineMode offlineMode;
- private String referencedSiteName;
+ /**
+ * The name of the reference site ('local' or 'entityhub') if the
+ * Entityhub is used for enhancing
+ */
+ protected String referencedSiteName;
/**
* Called by the ConfigurationAdmin to bind the {@link #offlineMode} if
the service becomes available
Copied:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
(from r1340995,
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java)
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java?p2=incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java&p1=incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java&r1=1340995&r2=1342580&rev=1342580&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
Fri May 25 11:34:33 2012
@@ -14,9 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.stanbol.enhancer.engines.keywordextraction;
+package org.apache.stanbol.enhancer.engines.keywordextraction.engine;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
+import static
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
+import static
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEntityAnnotation;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -24,15 +33,24 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import opennlp.tools.tokenize.SimpleTokenizer;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig;
import
org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import
org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine;
import
org.apache.stanbol.enhancer.engines.keywordextraction.impl.ClasspathDataFileProvider;
import
org.apache.stanbol.enhancer.engines.keywordextraction.impl.TestSearcherImpl;
import
org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinker;
@@ -43,23 +61,31 @@ import org.apache.stanbol.enhancer.engin
import
org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl.OpenNlpAnalysedContentFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* TODO: convert this to an integration test!
* @author Rupert Westenthaler
*/
-public class TestTaxonomyLinker {
+public class KeywordLinkingEngineTest {
+
+ private final static Logger log =
LoggerFactory.getLogger(KeywordLinkingEngineTest.class);
/**
* The context for the tests (same as in TestOpenNLPEnhancementEngine)
@@ -72,10 +98,12 @@ public class TestTaxonomyLinker {
private static final ContentItemFactory ciFactory =
InMemoryContentItemFactory.getInstance();
+ private static final String TEST_REFERENCED_SITE_NAME = "dummRefSiteName";
+
static TestSearcherImpl searcher;
static ValueFactory factory = InMemoryValueFactory.getInstance();
private static OpenNLP openNLP;
-
+
public static final String NAME = NamespaceEnum.rdfs+"label";
public static final String TYPE = NamespaceEnum.rdf+"type";
public static final String REDIRECT = NamespaceEnum.rdfs+"seeAlso";
@@ -115,6 +143,16 @@ public class TestTaxonomyLinker {
rep.addNaturalText(NAME, "Otago");
rep.addReference(TYPE,
OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
searcher.addEntity(rep);
+ //add a 2nd Otago (Place and University
+ rep = factory.createRepresentation("urn:test:Otago_Texas");
+ rep.addNaturalText(NAME, "Otago (Texas)");
+ rep.addNaturalText(NAME, "Otago");
+ rep.addReference(TYPE,
OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
+ searcher.addEntity(rep);
+ rep = factory.createRepresentation("urn:test:UniversityOfOtago_Texas");
+ rep.addNaturalText(NAME, "University of Otago (Texas)");
+ rep.addReference(TYPE,
OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
+ searcher.addEntity(rep);
}
@Before
@@ -132,7 +170,11 @@ public class TestTaxonomyLinker {
public static ContentItem getContentItem(final String id, final String
text) throws IOException {
return ciFactory.createContentItem(new UriRef(id),new
StringSource(text));
}
-
+ /**
+ * This tests the EntityLinker functionality (if the expected Entities
+ * are linked)
+ * @throws Exception
+ */
@Test
public void testTaxonomyLinker() throws Exception {
OpenNlpAnalysedContentFactory acf =
OpenNlpAnalysedContentFactory.getInstance(openNLP,
@@ -150,7 +192,7 @@ public class TestTaxonomyLinker {
expectedResults.put("New Zealand", new ArrayList<String>(
Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(
- Arrays.asList("urn:test:UniversityOfOtago")));
+
Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas")));
for(LinkedEntity linkedEntity : linker.getLinkedEntities().values()){
List<String> expectedSuggestions =
expectedResults.remove(linkedEntity.getSelectedText());
assertNotNull("LinkedEntity "+linkedEntity.getSelectedText()+
@@ -176,5 +218,81 @@ public class TestTaxonomyLinker {
}
}
}
-
+ /**
+ * This tests if the Enhancements created by the Engine confirm to the
+ * rules defined for the Stanbol Enhancement Structure.
+ * @throws IOException
+ * @throws EngineException
+ */
+ @Test
+ public void testEngine() throws IOException, EngineException {
+ EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
+ linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
+ KeywordLinkingEngine engine =
KeywordLinkingEngine.createInstance(openNLP, searcher, new
TextAnalyzerConfig(),
+ linkerConfig);
+ engine.referencedSiteName = TEST_REFERENCED_SITE_NAME;
+ ContentItem ci = ciFactory.createContentItem(new
StringSource(TEST_TEXT));
+ //tells the engine that this is an English text
+ ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new
PlainLiteralImpl("en")));
+ //compute the enhancements
+ engine.computeEnhancements(ci);
+ //validate the enhancement results
+ Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+ expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
+
expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral(
+ engine.getClass().getName()));
+ //validate create fise:TextAnnotations
+ int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(),
TEST_TEXT, expectedValues);
+ assertEquals("Four fise:TextAnnotations are expected by this Test", 4,
numTextAnnotations);
+ //validate create fise:EntityAnnotations
+ int numEntityAnnotations = validateAllEntityAnnotations(ci,
expectedValues);
+ assertEquals("Five fise:EntityAnnotations are expected by this Test",
5, numEntityAnnotations);
+ }
+ /**
+ * Similar to {@link
EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.rdf.core.TripleCollection,
Map)}
+ * but in addition checks fise:confidence [0..1] and entityhub:site
properties
+ * @param ci
+ * @param expectedValues
+ * @return
+ */
+ private static int validateAllEntityAnnotations(ContentItem ci,
Map<UriRef,Resource> expectedValues){
+ Iterator<Triple> entityAnnotationIterator =
ci.getMetadata().filter(null,
+ RDF_TYPE, ENHANCER_ENTITYANNOTATION);
+ int entityAnnotationCount = 0;
+ while (entityAnnotationIterator.hasNext()) {
+ UriRef entityAnnotation = (UriRef)
entityAnnotationIterator.next().getSubject();
+ // test if selected Text is added
+ validateEntityAnnotation(ci.getMetadata(), entityAnnotation,
expectedValues);
+ //validate also that the confidence is between [0..1]
+ Iterator<Triple> confidenceIterator =
ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null);
+ //NOTE: the fact that fise:confidence values are TypedLiterals of
type xsd:double
+ // is already validated at this point
+ // Also that there are only [0..1] confidence values
+ assertTrue("Expected fise:confidence value is missing
(entityAnnotation "
+ +entityAnnotation+")",confidenceIterator.hasNext());
+ Double confidence =
LiteralFactory.getInstance().createObject(Double.class,
+ (TypedLiteral)confidenceIterator.next().getObject());
+ assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
+ + "',entityAnnotation " +entityAnnotation+")",
+ 1.0 >= confidence.doubleValue());
+ assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
+ +"',entityAnnotation "+entityAnnotation+")",
+ 0.0 <= confidence.doubleValue());
+ //Test the entityhub:site property (STANBOL-625)
+ UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+ Iterator<Triple> entitySiteIterator =
ci.getMetadata().filter(entityAnnotation,
+ ENTITYHUB_SITE, null);
+ assertTrue("Expected entityhub:site value is missing
(entityAnnotation "
+ +entityAnnotation+")",entitySiteIterator.hasNext());
+ Resource siteResource = entitySiteIterator.next().getObject();
+ assertTrue("entityhub:site values MUST BE Literals", siteResource
instanceof Literal);
+ assertEquals("'"+TEST_REFERENCED_SITE_NAME+"' is expected as "
+ + "entityhub:site value", TEST_REFERENCED_SITE_NAME,
+ ((Literal)siteResource).getLexicalForm());
+ assertFalse("entityhub:site MUST HAVE only a single value",
entitySiteIterator.hasNext());
+ entityAnnotationCount++;
+ }
+ return entityAnnotationCount;
+
+ }
}
Propchange:
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain