Author: ogrisel
Date: Tue Mar 27 12:56:42 2012
New Revision: 1305826

URL: http://svn.apache.org/viewvc?rev=1305826&view=rev
Log:
STANBOL-197: make it possible to initialize an empty model from a SKOS taxonomy

Added:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
Modified:
    incubator/stanbol/trunk/enhancer/engines/topic/pom.xml
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
    incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README

Modified: incubator/stanbol/trunk/enhancer/engines/topic/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/pom.xml?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/pom.xml Tue Mar 27 12:56:42 
2012
@@ -268,10 +268,18 @@
       <artifactId>rdf.core</artifactId>
     </dependency>
     <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.jena.parser</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>
-
+    <dependency> <!--  contains the GraphNode -->
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.utils</artifactId>
+    </dependency>
   </dependencies>
 
 </project>

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Tue Mar 27 12:56:42 2012
@@ -26,6 +26,7 @@ import java.util.Date;
 import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.Hashtable;
+import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -33,9 +34,13 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.UUID;
 
+import org.apache.clerezza.rdf.core.Graph;
 import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.utils.GraphNode;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.felix.scr.annotations.Activate;
@@ -643,7 +648,9 @@ public class TopicClassificationEngine e
                     request.add(newEntry);
                 }
             }
-            solrServer.request(request);
+            if (request.getDocuments() != null && 
request.getDocuments().size() > 0) {
+                solrServer.request(request);
+            }
         } catch (Exception e) {
             String msg = String.format("Error invalidating topics [%s] on Solr 
Core '%s'",
                 StringUtils.join(conceptIds, ", "), solrCoreId);
@@ -1224,4 +1231,31 @@ public class TopicClassificationEngine e
         }
         return chainNames;
     }
+
+    public int importConceptsFromGraph(Graph graph, UriRef conceptClass, 
UriRef broaderProperty) throws ClassifierException {
+        int importedCount = 0;
+        Iterator<Triple> conceptIterator = graph.filter(null,
+            org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, 
conceptClass);
+        while (conceptIterator.hasNext()) {
+            Triple conceptTriple = conceptIterator.next();
+            if (!(conceptTriple.getSubject() instanceof UriRef)) {
+                continue;
+            }
+            UriRef conceptUri = (UriRef) conceptTriple.getSubject();
+            GraphNode node = new GraphNode(conceptUri, graph);
+            List<String> broaderConcepts = new ArrayList<String>();
+            // TODO: use OWL property inference on sub-properties here instead 
of explicit
+            // property filter
+            Iterator<GraphNode> broaderIterator = 
node.getObjectNodes(broaderProperty);
+            while (broaderIterator.hasNext()) {
+                Resource node2 = broaderIterator.next().getNode();
+                if (node2 instanceof UriRef) {
+                    broaderConcepts.add(((UriRef) node2).getUnicodeString());
+                }
+            }
+            addConcept(conceptUri.getUnicodeString(), broaderConcepts);
+            importedCount++;
+        }
+        return importedCount;
+    }
 }
\ No newline at end of file

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 Tue Mar 27 12:56:42 2012
@@ -34,6 +34,10 @@ import java.util.Map;
 import java.util.Random;
 import java.util.TreeMap;
 
+import org.apache.clerezza.rdf.core.Graph;
+import org.apache.clerezza.rdf.core.serializedform.Parser;
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
+import org.apache.clerezza.rdf.jena.parser.JenaParserProvider;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
@@ -44,6 +48,8 @@ import org.apache.solr.client.solrj.resp
 import org.apache.solr.common.params.CommonParams;
 import org.apache.stanbol.commons.solr.utils.StreamQueryRequest;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.topic.ClassificationReport;
 import org.apache.stanbol.enhancer.topic.ClassifierException;
 import org.apache.stanbol.enhancer.topic.EmbeddedSolrHelper;
@@ -155,6 +161,17 @@ public class TopicEngineTest extends Emb
     }
 
     @Test
+    public void testImportModelFromSKOS() throws Exception {
+        Parser parser = Parser.getInstance();
+        parser.bindParsingProvider(new JenaParserProvider());
+        Graph graph = 
parser.parse(getClass().getResourceAsStream("/sample-scheme.skos.rdf.xml"),
+            SupportedFormat.RDF_XML);
+        int imported = classifier.importConceptsFromGraph(graph, 
OntologicalClasses.SKOS_CONCEPT,
+            Properties.SKOS_BROADER);
+        assertEquals(imported, 4);
+    }
+    
+    @Test
     public void testProgrammaticThesaurusConstruction() throws Exception {
         // Register the roots of the taxonomy
         classifier.addConcept("http://example.com/topics/root1";, null);
@@ -555,4 +572,5 @@ public class TopicEngineTest extends Emb
         config.put(SolrTrainingSet.MODIFICATION_DATE_FIELD, "modification_dt");
         return config;
     }
+
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README?rev=1305826&r1=1305825&r2=1305826&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README 
(original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/README 
Tue Mar 27 12:56:42 2012
@@ -15,4 +15,6 @@ limitations under the License.
 
 The following files are provided under the Apache License, Version 2.0:
 
-classifier/topics_abstracts_snippet.tsv
+sample-scheme.skos.rdf.xml
+
+

Added: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml?rev=1305826&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
 (added)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/sample-scheme.skos.rdf.xml
 Tue Mar 27 12:56:42 2012
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<rdf:RDF xmlns="http://example.com/ns#"; xmlns:ex="http://example.com/ns#";
+  xmlns:skos="http://www.w3.org/2004/02/skos/core#"; 
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";>
+
+  <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/";>
+    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#ConceptScheme"; 
/>
+    <skos:HasTopConcept>
+      <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100";>
+        <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+      </rdf:Description>
+    </skos:HasTopConcept>
+
+    <skos:HasTopConcept>
+      <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200";>
+        <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+      </rdf:Description>
+    </skos:HasTopConcept>
+
+  </rdf:Description>
+  <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100";>
+    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+    <skos:prefLabel xml:lang="en-GB">Root Topic 01
+    </skos:prefLabel>
+    <skos:definition xml:lang="en-GB">The first top level
+      topic
+      of the "someconceptscheme" controlled vocabulary.
+    </skos:definition>
+
+  </rdf:Description>
+
+  <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200";>
+    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+    <skos:prefLabel xml:lang="en-GB">Root Topic 02
+    </skos:prefLabel>
+    <skos:definition xml:lang="en-GB">The second top level
+      topic of the "someconceptscheme" controlled vocabulary.
+    </skos:definition>
+
+  </rdf:Description>
+
+  <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/010";>
+    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+    <skos:prefLabel xml:lang="en-GB">Nested topic 1
+    </skos:prefLabel>
+    <skos:definition xml:lang="en-GB">A subtopic of the first
+      top level node.
+    </skos:definition>
+
+    <skos:broader>
+      <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/100";>
+        <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+      </rdf:Description>
+    </skos:broader>
+
+  </rdf:Description>
+
+  <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/020";>
+    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+    <skos:prefLabel xml:lang="en-GB">Nested topic 2
+    </skos:prefLabel>
+    <skos:definition xml:lang="en-GB">Another nested topic.
+    </skos:definition>
+
+    <skos:broader>
+      <rdf:Description rdf:about="http://example.com/ns#someconceptscheme/200";>
+        <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"; />
+      </rdf:Description>
+    </skos:broader>
+
+  </rdf:Description>
+
+</rdf:RDF>


Reply via email to