Author: ogrisel
Date: Tue Mar 27 12:56:42 2012
New Revision: 1305826
URL: http://svn.apache.org/viewvc?rev=1305826&view=rev
Log:
STANBOL-197: make it possible to initialize an empty model from a SKOS taxonomy
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/pom.xml
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README
Modified: incubator/stanbol/trunk/enhancer/engines/topic/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/pom.xml?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/pom.xml Tue Mar 27 12:56:42
2012
@@ -268,10 +268,18 @@
<artifactId>rdf.core</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.jena.parser</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
-
+ <dependency> <!-- contains the GraphNode -->
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.utils</artifactId>
+ </dependency>
</dependencies>
</project>
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Tue Mar 27 12:56:42 2012
@@ -26,6 +26,7 @@ import java.util.Date;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
+import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -33,9 +34,13 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
+import org.apache.clerezza.rdf.core.Graph;
import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.utils.GraphNode;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
@@ -643,7 +648,9 @@ public class TopicClassificationEngine e
request.add(newEntry);
}
}
- solrServer.request(request);
+ if (request.getDocuments() != null &&
request.getDocuments().size() > 0) {
+ solrServer.request(request);
+ }
} catch (Exception e) {
String msg = String.format("Error invalidating topics [%s] on Solr
Core '%s'",
StringUtils.join(conceptIds, ", "), solrCoreId);
@@ -1224,4 +1231,31 @@ public class TopicClassificationEngine e
}
return chainNames;
}
+
+ public int importConceptsFromGraph(Graph graph, UriRef conceptClass,
UriRef broaderProperty) throws ClassifierException {
+ int importedCount = 0;
+ Iterator<Triple> conceptIterator = graph.filter(null,
+ org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE,
conceptClass);
+ while (conceptIterator.hasNext()) {
+ Triple conceptTriple = conceptIterator.next();
+ if (!(conceptTriple.getSubject() instanceof UriRef)) {
+ continue;
+ }
+ UriRef conceptUri = (UriRef) conceptTriple.getSubject();
+ GraphNode node = new GraphNode(conceptUri, graph);
+ List<String> broaderConcepts = new ArrayList<String>();
+ // TODO: use OWL property inference on sub-properties here instead
of explicit
+ // property filter
+ Iterator<GraphNode> broaderIterator =
node.getObjectNodes(broaderProperty);
+ while (broaderIterator.hasNext()) {
+ Resource node2 = broaderIterator.next().getNode();
+ if (node2 instanceof UriRef) {
+ broaderConcepts.add(((UriRef) node2).getUnicodeString());
+ }
+ }
+ addConcept(conceptUri.getUnicodeString(), broaderConcepts);
+ importedCount++;
+ }
+ return importedCount;
+ }
}
\ No newline at end of file
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Tue Mar 27 12:56:42 2012
@@ -34,6 +34,10 @@ import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
+import org.apache.clerezza.rdf.core.Graph;
+import org.apache.clerezza.rdf.core.serializedform.Parser;
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
+import org.apache.clerezza.rdf.jena.parser.JenaParserProvider;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
@@ -44,6 +48,8 @@ import org.apache.solr.client.solrj.resp
import org.apache.solr.common.params.CommonParams;
import org.apache.stanbol.commons.solr.utils.StreamQueryRequest;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.topic.ClassificationReport;
import org.apache.stanbol.enhancer.topic.ClassifierException;
import org.apache.stanbol.enhancer.topic.EmbeddedSolrHelper;
@@ -155,6 +161,17 @@ public class TopicEngineTest extends Emb
}
@Test
+ public void testImportModelFromSKOS() throws Exception {
+ Parser parser = Parser.getInstance();
+ parser.bindParsingProvider(new JenaParserProvider());
+ Graph graph =
parser.parse(getClass().getResourceAsStream("/sample-scheme.skos.rdf.xml"),
+ SupportedFormat.RDF_XML);
+ int imported = classifier.importConceptsFromGraph(graph,
OntologicalClasses.SKOS_CONCEPT,
+ Properties.SKOS_BROADER);
+ assertEquals(imported, 4);
+ }
+
+ @Test
public void testProgrammaticThesaurusConstruction() throws Exception {
// Register the roots of the taxonomy
classifier.addConcept("http://example.com/topics/root1", null);
@@ -555,4 +572,5 @@ public class TopicEngineTest extends Emb
config.put(SolrTrainingSet.MODIFICATION_DATE_FIELD, "modification_dt");
return config;
}
+
}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README
(original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README
Tue Mar 27 12:56:42 2012
@@ -15,4 +15,6 @@ limitations under the License.
The following files are provided under the Apache License, Version 2.0:
-classifier/topics_abstracts_snippet.tsv
+sample-scheme.skos.rdf.xml
+
+
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml?rev=1305826&view=auto
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
(added)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
Tue Mar 27 12:56:42 2012
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<rdf:RDF xmlns="http://example.com/ns#" xmlns:ex="http://example.com/ns#"
+ xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#ConceptScheme"
/>
+ <skos:HasTopConcept>
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ </rdf:Description>
+ </skos:HasTopConcept>
+
+ <skos:HasTopConcept>
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ </rdf:Description>
+ </skos:HasTopConcept>
+
+ </rdf:Description>
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ <skos:prefLabel xml:lang="en-GB">Root Topic 01
+ </skos:prefLabel>
+ <skos:definition xml:lang="en-GB">The first top level
+ topic
+ of the "someconceptscheme" controlled vocabulary.
+ </skos:definition>
+
+ </rdf:Description>
+
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ <skos:prefLabel xml:lang="en-GB">Root Topic 02
+ </skos:prefLabel>
+ <skos:definition xml:lang="en-GB">The second top level
+ topic of the "someconceptscheme" controlled vocabulary.
+ </skos:definition>
+
+ </rdf:Description>
+
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/010">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ <skos:prefLabel xml:lang="en-GB">Nested topic 1
+ </skos:prefLabel>
+ <skos:definition xml:lang="en-GB">A subtopic of the first
+ top level node.
+ </skos:definition>
+
+ <skos:broader>
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ </rdf:Description>
+ </skos:broader>
+
+ </rdf:Description>
+
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/020">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ <skos:prefLabel xml:lang="en-GB">Nested topic 2
+ </skos:prefLabel>
+ <skos:definition xml:lang="en-GB">Another nested topic.
+ </skos:definition>
+
+ <skos:broader>
+ <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200">
+ <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept" />
+ </rdf:Description>
+ </skos:broader>
+
+ </rdf:Description>
+
+</rdf:RDF>