Hi, when I see things like
+ * Author: Matthew Horridge<br> + * The University Of Manchester<br> + * Bio-Health Informatics Group<br> + * Date: 26-Apr-2007<br> I always ask myself whether Matthew knows that his code is contributed here. Do you have ensured that we can add Apache License to this? Best, - Fabian 2011/7/15 <[email protected]>: > Author: alexdma > Date: Fri Jul 15 16:36:16 2011 > New Revision: 1147221 > > URL: http://svn.apache.org/viewvc?rev=1147221&view=rev > Log: > STANBOL-285 : > - Added URI util to stanbol/owl (TODO: centralise uri management utilities?) > > Added: > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/ > > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/AutoIRIMapper.java > > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/URIUtils.java > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/ > > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestArchives.java > > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestUriUtils.java > incubator/stanbol/trunk/owl/src/test/resources/ > incubator/stanbol/trunk/owl/src/test/resources/ontologies/ > incubator/stanbol/trunk/owl/src/test/resources/ontologies/archivetest2.owl > incubator/stanbol/trunk/owl/src/test/resources/ontologies/ontoarchive.zip > (with props) > Removed: > > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owlapi/trasformation/ > > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owlapi/trasformation/ > Modified: > incubator/stanbol/trunk/owl/pom.xml > > Modified: incubator/stanbol/trunk/owl/pom.xml > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/pom.xml?rev=1147221&r1=1147220&r2=1147221&view=diff > ============================================================================== > --- incubator/stanbol/trunk/owl/pom.xml (original) > +++ incubator/stanbol/trunk/owl/pom.xml Fri Jul 15 16:36:16 2011 > @@ -135,7 +135,6 @@ > <artifactId>jettison</artifactId> > </dependency> > > - > <!-- OWL deps --> > <dependency> > <groupId>com.hp.hpl.jena</groupId> > @@ -183,6 +182,16 @@ > <groupId>javax.servlet</groupId> > <artifactId>servlet-api</artifactId> > </dependency> > + > + <!-- Misc deps --> > + <dependency> > + <groupId>org.apache.commons</groupId> > + <artifactId>commons-compress</artifactId> > + </dependency> > + <dependency> > + <groupId>commons-io</groupId> > + <artifactId>commons-io</artifactId> > + </dependency> > > <!-- Diagnostics deps --> > <dependency> > > Added: > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/AutoIRIMapper.java > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/AutoIRIMapper.java?rev=1147221&view=auto > ============================================================================== > --- > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/AutoIRIMapper.java > (added) > +++ > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/AutoIRIMapper.java > Fri Jul 15 16:36:16 2011 > @@ -0,0 +1,363 @@ > +package org.apache.stanbol.owl.util; > + > +import java.io.BufferedInputStream; > +import java.io.BufferedReader; > +import java.io.File; > +import java.io.FileInputStream; > +import java.io.FileNotFoundException; > +import java.io.IOException; > +import java.io.InputStream; > +import java.io.InputStreamReader; > +import java.util.Collections; > +import java.util.HashMap; > +import java.util.HashSet; > +import java.util.Map; > +import java.util.Set; > +import java.util.StringTokenizer; > +import java.util.zip.GZIPInputStream; > + > +import javax.xml.parsers.ParserConfigurationException; > +import javax.xml.parsers.SAXParser; > +import javax.xml.parsers.SAXParserFactory; > + > +import org.apache.commons.compress.archivers.ArchiveEntry; > +import org.apache.commons.compress.archivers.ArchiveException; > +import org.apache.commons.compress.archivers.ArchiveInputStream; > +import org.apache.commons.compress.archivers.ArchiveStreamFactory; > +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; > +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; > +import > org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; > +import org.apache.commons.io.FilenameUtils; > +import org.semanticweb.owlapi.model.IRI; > +import org.semanticweb.owlapi.model.OWLOntologyIRIMapper; > +import org.semanticweb.owlapi.model.OWLRuntimeException; > +import org.semanticweb.owlapi.vocab.Namespaces; > +import org.semanticweb.owlapi.vocab.OWLXMLVocabulary; > +import org.xml.sax.Attributes; > +import org.xml.sax.SAXException; > +import org.xml.sax.helpers.DefaultHandler; > + > +/** > + * Author: Matthew Horridge<br> > + * The University Of Manchester<br> > + * Bio-Health Informatics Group<br> > + * Date: 26-Apr-2007<br> > + * <br> > + * <p/> > + * A mapper which given a root folder attempts to automatically discover and > map files to ontologies. The > + * mapper is only capable of mapping ontologies in RDF/XML and OWL/XML > (other serialisations are not > + * supported). > + */ > +public class AutoIRIMapper extends DefaultHandler implements > OWLOntologyIRIMapper { > + > + private Set<String> fileExtensions; > + > + private File root; > + > + private boolean mapped; > + > + private boolean recursive; > + > + private Map<String,OntologyRootElementHandler> handlerMap; > + > + private File currentFile; > + > + private Map<IRI,IRI> ontologyIRI2PhysicalURIMap; > + > + private Map<String,IRI> oboFileMap; > + > + private SAXParserFactory parserFactory; > + > + private static final String[] DEFAULT_EXTENSIONS = new String[] {"owl", > "xml", "rdf", "omn"}; > + > + public AutoIRIMapper(File rootDirectory, boolean recursive) { > + this(rootDirectory, DEFAULT_EXTENSIONS, recursive); > + } > + > + /** > + * Creates an auto-mapper which examines ontologies that reside in the > specified root folder (and possibly > + * sub-folders). > + * > + * @param rootDirectory > + * The root directory which should be searched for ontologies. > + * @param recursive > + * Sub directories will be searched recursively if > <code>true</code>. > + */ > + public AutoIRIMapper(File rootDirectory, String[] fileExts, boolean > recursive) { > + this.root = rootDirectory; > + this.recursive = recursive; > + ontologyIRI2PhysicalURIMap = new HashMap<IRI,IRI>(); > + oboFileMap = new HashMap<String,IRI>(); > + fileExtensions = new HashSet<String>(); > + for (String ext : fileExts) > + fileExtensions.add(ext); > + mapped = false; > + handlerMap = new HashMap<String,OntologyRootElementHandler>(); > + handlerMap.put(Namespaces.RDF + "RDF", new > RDFXMLOntologyRootElementHandler()); > + handlerMap.put(OWLXMLVocabulary.ONTOLOGY.toString(), new > OWLXMLOntologyRootElementHandler()); > + parserFactory = SAXParserFactory.newInstance(); > + parserFactory.setNamespaceAware(true); > + } > + > + /** > + * The mapper only examines files that have specified file extensions. > This method returns the file > + * extensions that cause a file to be examined. > + * > + * @return A <code>Set</code> of file extensions. > + */ > + public Set<String> getFileExtensions() { > + return fileExtensions; > + } > + > + /** > + * Sets the extensions of files that are to be examined for ontological > content. (By default the > + * extensions are, owl, xml and rdf). Only files that have the specified > extensions will be examined to > + * see if they contain ontologies. > + */ > + public void setFileExtensions(Set<String> extensions) { > + this.fileExtensions.clear(); > + this.fileExtensions.addAll(extensions); > + } > + > + /** > + * Gets the set of ontology IRIs that this mapper has found > + * > + * @return A <code>Set</code> of ontology (logical) URIs > + */ > + public Set<IRI> getOntologyIRIs() { > + if (!mapped) { > + mapFiles(); > + } > + return new HashSet<IRI>(ontologyIRI2PhysicalURIMap.keySet()); > + } > + > + public void update() { > + mapFiles(); > + } > + > + public IRI getDocumentIRI(IRI ontologyIRI) { > + if (!mapped) { > + mapFiles(); > + } > + if (ontologyIRI.toString().endsWith(".obo")) { > + String path = ontologyIRI.toURI().getPath(); > + if (path != null) { > + int lastSepIndex = path.lastIndexOf('/'); > + String name = path.substring(lastSepIndex + 1, > path.length()); > + IRI documentIRI = oboFileMap.get(name); > + if (documentIRI != null) { > + return documentIRI; > + } > + } > + } > + return ontologyIRI2PhysicalURIMap.get(ontologyIRI); > + } > + > + private void mapFiles() { > + mapped = true; > + ontologyIRI2PhysicalURIMap.clear(); > + processFile(root); > + } > + > + private void processFile(File f) { > + if (f.isHidden()) { > + return; > + } > + File[] files = null; > + > + if (f.isDirectory()) > + files = f.listFiles(); > + else { > + > + try { > + ArchiveInputStream ais = new > ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(new > FileInputStream(f))); > + > + //getArchiveInputStream(f.getName(), new > FileInputStream(f)); > + ArchiveEntry entry; > + while ((entry = ais.getNextEntry())!=null) { > + > + > + System.out.println(entry); > + > + > + }; > + > + > + > + } catch (FileNotFoundException e) { > + // TODO Auto-generated catch block > + e.printStackTrace(); > + } catch (IOException e) { > + // TODO Auto-generated catch block > + e.printStackTrace(); > + } catch (ArchiveException e) { > + // TODO Auto-generated catch block > + e.printStackTrace(); > + } > + } > + > + > + if (files != null) { > + for (File file : files) { > + if (file.isDirectory() && recursive) { > + processFile(file); > + } else { > + // boolean parsedFile = false; > + if (file.getName().endsWith(".obo")) { > + oboFileMap.put(file.getName(), IRI.create(file)); > + } else if (file.getName().endsWith(".omn")) { > + parseManchesterSyntaxFile(file); > + } else { > + for (String ext : fileExtensions) { > + if (file.getName().endsWith(ext)) { > + parseFile(file); > + // parsedFile = true; > + break; > + } > + } > + } > + } > + } > + } > + } > + > + private void parseFile(File file) { > + try { > + InputStream is = new BufferedInputStream(new > FileInputStream(file)); > + try { > + currentFile = file; > + SAXParser parser = parserFactory.newSAXParser(); > + parser.parse(is, this); > + } catch (ParserConfigurationException e) { > + throw new OWLRuntimeException(e); > + } catch (SAXException e) { > + // We simply aren't interested in any parsing problems - if > + // we can't parse a file, then we can't map it and we don't > + // care! > + } catch (IOException e) { > + // Again - these kinds of exceptions are of no interest to > us! > + } > + } catch (FileNotFoundException e) { > + // Don't care? > + } > + } > + > + private void parseManchesterSyntaxFile(File file) { > + BufferedReader br = null; > + try { > + // Ontology: <URI> > + br = new BufferedReader(new InputStreamReader(new > FileInputStream(file), "UTF-8")); > + String line; > + IRI ontologyIRI = null; > + while ((line = br.readLine()) != null) { > + StringTokenizer tokenizer = new StringTokenizer(line, " > \r\n", false); > + while (tokenizer.hasMoreTokens()) { > + String tok = tokenizer.nextToken(); > + if (tok.startsWith("<") && tok.endsWith(">")) { > + ontologyIRI = IRI.create(tok.substring(1, > tok.length() - 1)); > + ontologyIRI2PhysicalURIMap.put(ontologyIRI, > IRI.create(file)); > + break; > + } > + } > + if (ontologyIRI != null) { > + break; > + } > + } > + } catch (IOException e) { > + // Ignore - don't care > + } finally { > + try { > + br.close(); > + } catch (IOException e2) { > + // no operation > + } > + } > + } > + > + @Override > + @SuppressWarnings("unused") > + public void startElement(String uri, String localName, String qName, > Attributes attributes) throws SAXException { > + OntologyRootElementHandler handler = handlerMap.get(uri + localName); > + if (handler != null) { > + IRI ontologyIRI = handler.handle(attributes); > + if (ontologyIRI != null) { > + ontologyIRI2PhysicalURIMap.put(ontologyIRI, > IRI.create(currentFile)); > + } > + throw new SAXException(); > + } > + } > + > + @Override > + public String toString() { > + StringBuilder sb = new StringBuilder(); > + sb.append("AutoURIMapper: ("); > + sb.append(ontologyIRI2PhysicalURIMap.size()); > + sb.append(" ontologies)\n"); > + for (IRI iri : ontologyIRI2PhysicalURIMap.keySet()) { > + sb.append(" "); > + sb.append(iri.toQuotedString()); > + sb.append(" -> "); > + sb.append(ontologyIRI2PhysicalURIMap.get(iri)); > + sb.append("\n"); > + } > + return sb.toString(); > + } > + > + /** > + * A simple interface which extracts an ontology URI from a set of > element attributes. > + */ > + private interface OntologyRootElementHandler { > + > + /** > + * Gets the ontology URI. > + * > + * @param attributes > + * The attributes which will be examined for the ontology > URI. > + * @return The ontology URI or <code>null</code> if no ontology URI > could be found. > + */ > + IRI handle(Attributes attributes); > + } > + > + /** > + * A handler to handle RDF/XML files. The xml:base (if present) is taken > to be the ontology URI of the > + * ontology document being parsed. > + */ > + private static class RDFXMLOntologyRootElementHandler implements > OntologyRootElementHandler { > + > + public RDFXMLOntologyRootElementHandler() { > + > + } > + > + public IRI handle(Attributes attributes) { > + String baseValue = > attributes.getValue(Namespaces.XML.toString(), "base"); > + if (baseValue == null) { > + return null; > + } > + return IRI.create(baseValue); > + } > + } > + > + /** > + * A handler that can handle OWL/XML files. > + */ > + private static class OWLXMLOntologyRootElementHandler implements > OntologyRootElementHandler { > + > + public OWLXMLOntologyRootElementHandler() { > + > + } > + > + public IRI handle(Attributes attributes) { > + String ontURI = attributes.getValue(Namespaces.OWL.toString(), > "ontologyIRI"); > + if (ontURI == null) { > + ontURI = attributes.getValue(Namespaces.OWL.toString(), > "ontologyIRI"); > + } > + if (ontURI == null) { > + return null; > + } > + return IRI.create(ontURI); > + } > + } > + > + > + > +} > \ No newline at end of file > > Added: > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/URIUtils.java > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/URIUtils.java?rev=1147221&view=auto > ============================================================================== > --- > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/URIUtils.java > (added) > +++ > incubator/stanbol/trunk/owl/src/main/java/org/apache/stanbol/owl/util/URIUtils.java > Fri Jul 15 16:36:16 2011 > @@ -0,0 +1,66 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +package org.apache.stanbol.owl.util; > + > +import java.net.URI; > + > +import org.semanticweb.owlapi.model.IRI; > + > +public class URIUtils { > + > + public static IRI upOne(IRI iri) { > + return upOne(iri.toURI()); > + } > + > + /** > + * Removes either the fragment, or query, or last path component from a > URI, whatever it finds first. > + * > + * @param uri > + * @return > + */ > + public static IRI upOne(URI uri) { > + int index = -1; > + String tmpstr = uri.toString(); > + // Strip the fragment > + String frag = uri.getFragment(); > + if (frag != null && !frag.isEmpty()) index = tmpstr.length() - > frag.length() - 1; > + else > + // Strip the query > + { > + frag = uri.getQuery(); > + if (frag != null && !frag.isEmpty()) index = tmpstr.length() - > frag.length() - 1; > + else > + // Strip the slash part > + { > + frag = uri.getPath(); > + if (frag != null && !frag.isEmpty()) { > + int i = frag.lastIndexOf("/"); > + boolean trimslash = false; > + // If it ends with a slash, remove that too > + if (i == frag.length() - 1) { > + trimslash = true; > + frag = frag.substring(0, i); > + } > + index = tmpstr.length() - frag.length() + > frag.lastIndexOf("/") + (trimslash ? -1 : 0); > + } > + } > + } > + if (index >= 0) return IRI.create(tmpstr.substring(0, index)); > + else return IRI.create(uri); > + } > + > +} > > Added: > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestArchives.java > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestArchives.java?rev=1147221&view=auto > ============================================================================== > --- > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestArchives.java > (added) > +++ > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestArchives.java > Fri Jul 15 16:36:16 2011 > @@ -0,0 +1,22 @@ > +package org.apache.stanbol.owl.util; > + > +import java.io.File; > +import java.net.URI; > + > +import org.junit.Test; > +import org.semanticweb.owlapi.apibinding.OWLManager; > +import org.semanticweb.owlapi.model.OWLOntologyManager; > + > +public class TestArchives { > + > + @Test > + public void testIRIMapper() throws Exception { > + URI uri = > getClass().getResource("/ontologies/ontoarchive.zip").toURI(); > + File f = new File(uri); > + OWLOntologyManager mgr = OWLManager.createOWLOntologyManager(); > + AutoIRIMapper mapp = new AutoIRIMapper(f, true); > + mgr.addIRIMapper(mapp); > + mapp.update(); > + } > + > +} > > Added: > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestUriUtils.java > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestUriUtils.java?rev=1147221&view=auto > ============================================================================== > --- > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestUriUtils.java > (added) > +++ > incubator/stanbol/trunk/owl/src/test/java/org/apache/stanbol/owl/util/TestUriUtils.java > Fri Jul 15 16:36:16 2011 > @@ -0,0 +1,52 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +package org.apache.stanbol.owl.util; > + > +import static org.junit.Assert.*; > + > +import org.junit.Test; > +import org.semanticweb.owlapi.model.IRI; > + > +public class TestUriUtils { > + > + private String _BASE = > "http://www.ontologydesignpatterns.org/registry/explanation"; > + > + private IRI iri_hash = IRI.create(_BASE + > ".owl#ExplanationSchemaCatalog"); > + > + private IRI iri_slash = IRI.create(_BASE + "/ExplanationSchemaCatalog"); > + > + private IRI iri_slash_end = IRI.create(_BASE + > "/ExplanationSchemaCatalog/"); > + > + private IRI iri_query = IRI.create(_BASE + "?arg1=value1&arg2=value2"); > + > + private IRI iri_slash_query = IRI.create(_BASE + > "/?arg1=value1&arg2=value2"); > + > + /** > + * Test that every IRI configuration is stripped as expected. > + * > + * @throws Exception > + */ > + @Test > + public void testUpOne() throws Exception { > + assertEquals(_BASE + ".owl", URIUtils.upOne(iri_hash).toString()); > + assertEquals(_BASE, URIUtils.upOne(iri_slash).toString()); > + assertEquals(_BASE, URIUtils.upOne(iri_slash_end).toString()); > + assertEquals(_BASE, URIUtils.upOne(iri_query).toString()); > + assertEquals(_BASE + "/", > URIUtils.upOne(iri_slash_query).toString()); > + } > + > +} > > Added: > incubator/stanbol/trunk/owl/src/test/resources/ontologies/archivetest2.owl > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/test/resources/ontologies/archivetest2.owl?rev=1147221&view=auto > ============================================================================== > --- > incubator/stanbol/trunk/owl/src/test/resources/ontologies/archivetest2.owl > (added) > +++ > incubator/stanbol/trunk/owl/src/test/resources/ontologies/archivetest2.owl > Fri Jul 15 16:36:16 2011 > @@ -0,0 +1,24 @@ > +<?xml version="1.0"?> > + > + > +<!DOCTYPE rdf:RDF [ > + <!ENTITY owl "http://www.w3.org/2002/07/owl#" > > + <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" > > + <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" > > + <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" > > +]> > + > + > +<rdf:RDF xmlns="http://www.semanticweb.org/ontologies/archivetest2.owl#" > + xml:base="http://www.semanticweb.org/ontologies/archivetest2.owl" > + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" > + xmlns:owl="http://www.w3.org/2002/07/owl#" > + xmlns:xsd="http://www.w3.org/2001/XMLSchema#" > + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> > + <owl:Ontology > rdf:about="http://www.semanticweb.org/ontologies/archivetest2.owl"/> > +</rdf:RDF> > + > + > + > +<!-- Generated by the OWL API (version 3.2.3.22702) > http://owlapi.sourceforge.net --> > + > > Added: > incubator/stanbol/trunk/owl/src/test/resources/ontologies/ontoarchive.zip > URL: > http://svn.apache.org/viewvc/incubator/stanbol/trunk/owl/src/test/resources/ontologies/ontoarchive.zip?rev=1147221&view=auto > ============================================================================== > Binary file - no diff available. > > Propchange: > incubator/stanbol/trunk/owl/src/test/resources/ontologies/ontoarchive.zip > ------------------------------------------------------------------------------ > svn:mime-type = application/octet-stream > > > -- Fabian
