[
https://issues.apache.org/jira/browse/COMMONSRDF-35?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15543905#comment-15543905
]
ASF GitHub Bot commented on COMMONSRDF-35:
------------------------------------------
Github user ansell commented on a diff in the pull request:
https://github.com/apache/incubator-commonsrdf/pull/24#discussion_r81661560
--- Diff:
rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java
---
@@ -0,0 +1,206 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rdf.rdf4j.experimental;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+
+import org.apache.commons.rdf.api.IRI;
+import org.apache.commons.rdf.api.Quad;
+import org.apache.commons.rdf.api.RDFSyntax;
+import org.apache.commons.rdf.experimental.RDFParser;
+import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI;
+import org.apache.commons.rdf.rdf4j.RDF4JDataset;
+import org.apache.commons.rdf.rdf4j.RDF4JGraph;
+import org.apache.commons.rdf.rdf4j.RDF4JTermFactory;
+import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.repository.util.RDFInserter;
+import org.eclipse.rdf4j.repository.util.RDFLoader;
+import org.eclipse.rdf4j.rio.ParserConfig;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFHandler;
+import org.eclipse.rdf4j.rio.RDFHandlerException;
+import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
+
+/**
+ * RDF4J-based parser.
+ * <p>
+ * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
+ * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
+ * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link
RDFSyntax#TURTLE}
+ * - additional syntaxes can be supported by including the corresponding
+ * <em>rdf4j-rio-*</em> module on the classpath.
+ *
+ */
+public class RDF4JParser extends AbstractRDFParser<RDF4JParser> implements
RDFParser {
+
+ private final class AddToQuadConsumer extends AbstractRDFHandler {
+ private final Consumer<Quad> quadTarget;
+
+ private AddToQuadConsumer(Consumer<Quad> quadTarget) {
+ this.quadTarget = quadTarget;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement
st)
+ throws
org.eclipse.rdf4j.rio.RDFHandlerException {
+ // TODO: if getRdfTermFactory() is a non-rdf4j factory,
should
+ // we use factory.createQuad() instead?
+ // Unsure what is the promise of setting
getRdfTermFactory() --
+ // does it go all the way down to creating BlankNode,
IRI and
+ // Literal?
+ quadTarget.accept(rdf4jTermFactory.asQuad(st));
+ // Performance note:
+ // Graph/Quad.add should pick up again our
+ // RDF4JGraphLike.asStatement()
+ // and avoid double conversion.
+ // Additionally the RDF4JQuad and RDF4JTriple
implementations
+ // are lazily converting subj/obj/pred/graph.s
+ }
+ }
+
+ private final static class AddToModel extends AbstractRDFHandler {
+ private final Model model;
+
+ public AddToModel(Model model) {
+ this.model = model;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement
st)
+ throws
org.eclipse.rdf4j.rio.RDFHandlerException {
+ model.add(st);
+ }
+
+ @Override
+ public void handleNamespace(String prefix, String uri) throws
RDFHandlerException {
+ model.setNamespace(prefix, uri);
+ }
+ }
+
+ private RDF4JTermFactory rdf4jTermFactory;
+
+ @Override
+ protected RDF4JTermFactory createRDFTermFactory() {
+ return new RDF4JTermFactory();
+ }
+
+ @Override
+ protected RDF4JParser prepareForParsing() throws IOException,
IllegalStateException {
+ RDF4JParser c = prepareForParsing();
+ // Ensure we have an RDF4JTermFactory for conversion.
+ // We'll make a new one if user has provided a non-RDF4J factory
+ c.rdf4jTermFactory = (RDF4JTermFactory)
getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance)
+ .orElseGet(c::createRDFTermFactory);
+ return c;
+ }
+
+ @Override
+ protected void parseSynchronusly() throws IOException {
+ Optional<RDFFormat> formatByMimeType =
getContentType().flatMap(Rio::getParserFormatForMIMEType);
+ String base = getBase().map(IRI::getIRIString).orElse(null);
+
+ ParserConfig parserConfig = new ParserConfig();
+ // TODO: Should we need to set anything?
+ RDFLoader loader = new RDFLoader(parserConfig,
rdf4jTermFactory.getValueFactory());
+ RDFHandler rdfHandler = makeRDFHandler();
+ if (getSourceFile().isPresent()) {
+ // NOTE: While we could have used
+ // loader.load(sourcePath.toFile()
+ // if the path fs provider == FileSystems.getDefault(),
+ // that RDFLoader method does not use absolute path
+ // as the base URI, so to be consistent
+ // we'll always do it with our own input stream
+ //
+ // That means we may have to guess format by
extensions:
+ Optional<RDFFormat> formatByFilename =
getSourceFile().map(Path::getFileName).map(Path::toString)
+
.flatMap(Rio::getParserFormatForFileName);
+ // TODO: for the excited.. what about the extension
after following symlinks?
+
+ RDFFormat format =
formatByMimeType.orElse(formatByFilename.orElse(null));
+ try (InputStream in =
Files.newInputStream(getSourceFile().get())) {
+ loader.load(in, base, format, rdfHandler);
+ }
+ } else if (getSourceIri().isPresent()) {
+ try {
+ // TODO: Handle international IRIs properly
+ // (Unicode support for for hostname, path and
query)
+ URL url = new
URL(getSourceIri().get().getIRIString());
+ // TODO: This probably does not support
https:// -> http:// redirections
+ loader.load(url, base,
formatByMimeType.orElse(null), makeRDFHandler());
+ } catch (MalformedURLException ex) {
+ throw new IOException("Can't handle source URL:
" + getSourceIri().get(), ex);
+ }
+ }
+ // must be getSourceInputStream then, this is guaranteed by
super.checkSource();
+ loader.load(getSourceInputStream().get(), base,
formatByMimeType.orElse(null), rdfHandler);
+ }
+
+ protected RDFHandler makeRDFHandler() {
+
+ // TODO: Can we join the below DF4JDataset and RDF4JGraph cases
--- End diff --
You only need to support `Dataset`, as `Model`/`Repository`/`Sail` are all
based on `Quad`s rather than `Triple`s, per my comment earlier in the review
about all `Statement`s being `Quad`s
> rdf4j integration
> -----------------
>
> Key: COMMONSRDF-35
> URL: https://issues.apache.org/jira/browse/COMMONSRDF-35
> Project: Apache Commons RDF
> Issue Type: New Feature
> Reporter: Stian Soiland-Reyes
> Assignee: Stian Soiland-Reyes
> Labels: integration, rdf4j, sesame
> Fix For: 0.3.0
>
>
> Add a new rdf4j module with implementation for Eclipse rdf4j
> See https://github.com/apache/incubator-commonsrdf/tree/rdf4j/rdf4j
> A legacy sesame branch could then be added by mainly copy/paste and change
> the import
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)