Repository: incubator-commonsrdf Updated Branches: refs/heads/rdf4j-jena-compat [created] 84c703204
Always use RDFLoader but for Path support we'll always open the inputstream ourselves - both to support multiple providers and also to have more consistent handling of basePath for symlinked files. Project: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/commit/81b1be80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/tree/81b1be80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/diff/81b1be80 Branch: refs/heads/rdf4j-jena-compat Commit: 81b1be803e1a7d60fb272036e4bf596e7d8a8901 Parents: 9c66ae0 Author: Stian Soiland-Reyes <st...@apache.org> Authored: Mon Jun 20 14:38:07 2016 +0100 Committer: Stian Soiland-Reyes <st...@apache.org> Committed: Mon Jun 20 14:38:07 2016 +0100 ---------------------------------------------------------------------- .../commons/rdf/rdf4j/RDF4JParserBuilder.java | 74 +++++++++++++++----- 1 file changed, 56 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/81b1be80/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java ---------------------------------------------------------------------- diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java index 0c1aa12..d0b2c6c 100644 --- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java +++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java @@ -18,25 +18,39 @@ package org.apache.commons.rdf.rdf4j; import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Optional; import java.util.function.Consumer; +import org.apache.commons.rdf.api.IRI; import org.apache.commons.rdf.api.Quad; import org.apache.commons.rdf.api.RDFParserBuilder; -import org.apache.commons.rdf.rdf4j.RDF4JDataset; -import org.apache.commons.rdf.rdf4j.RDF4JGraph; -import org.apache.commons.rdf.rdf4j.RDF4JTermFactory; +import org.apache.commons.rdf.api.RDFSyntax; import org.apache.commons.rdf.simple.AbstractRDFParserBuilder; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.repository.util.RDFInserter; +import org.eclipse.rdf4j.repository.util.RDFLoader; +import org.eclipse.rdf4j.rio.ParserConfig; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFHandler; import org.eclipse.rdf4j.rio.RDFHandlerException; -import org.eclipse.rdf4j.rio.RDFParser; import org.eclipse.rdf4j.rio.Rio; import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; +/** + * RDF4J-based parser. + * <p> + * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD}, + * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES}, + * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE} + * - additional syntaxes can be supported by including the corresponding + * <em>rdf4j-rio-*</em> module on the classpath. + * + */ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements RDFParserBuilder { private final class AddToQuadConsumer extends AbstractRDFHandler { @@ -74,7 +88,7 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements RDFP throws org.eclipse.rdf4j.rio.RDFHandlerException { model.add(st); } - + @Override public void handleNamespace(String prefix, String uri) throws RDFHandlerException { model.setNamespace(prefix, uri); @@ -100,19 +114,43 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements RDFP @Override protected void parseSynchronusly() throws IOException, RDFParseException { - if (getContentType().isPresent()) { - Rio.getParserFormatForMIMEType(getContentType().get()); - } - Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType); - Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString) - .flatMap(Rio::getParserFormatForFileName); - RDFFormat format = formatByMimeType.orElse( - formatByFilename.orElseThrow(() -> new RDFParseException("Unrecognized or missing content type"))); - - RDFParser parser = Rio.createParser(format); - - parser.setRDFHandler(makeRDFHandler()); + String base = getBase().map(IRI::getIRIString).orElse(null); + + ParserConfig parserConfig = new ParserConfig(); + // TODO: Should we need to set anything? + RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory()); + RDFHandler rdfHandler = makeRDFHandler(); + if (getSourceFile().isPresent()) { + // NOTE: While we could have used + // loader.load(sourcePath.toFile() + // if the path fs provider == FileSystems.getDefault(), + // that RDFLoader method does not use absolute path + // as the base URI, so to be consistent + // we'll always do it with our own input stream + // + // That means we may have to guess format by extensions: + Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString) + .flatMap(Rio::getParserFormatForFileName); + // TODO: for the excited.. what about the extension after following symlinks? + + RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null)); + try (InputStream in = Files.newInputStream(getSourceFile().get())) { + loader.load(in, base, format, rdfHandler); + } + } else if (getSourceIri().isPresent()) { + try { + // TODO: Handle international IRIs properly + // (Unicode support for for hostname, path and query) + URL url = new URL(getSourceIri().get().getIRIString()); + // TODO: This probably does not support https:// -> http:// redirections + loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler()); + } catch (MalformedURLException ex) { + throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex); + } + } + // must be getSourceInputStream then, this is guaranteed by super.checkSource(); + loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler); } protected RDFHandler makeRDFHandler() { @@ -124,7 +162,7 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements RDFP if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) { // One of us, we can add them as Statements directly RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get(); - if (dataset.asRepository().isPresent()) { + if (dataset.asRepository().isPresent()) { return new RDFInserter(dataset.asRepository().get().getConnection()); } if (dataset.asModel().isPresent()) {