[
https://issues.apache.org/jira/browse/COMMONSRDF-35?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15553822#comment-15553822
]
ASF GitHub Bot commented on COMMONSRDF-35:
------------------------------------------
Github user ansell commented on a diff in the pull request:
https://github.com/apache/incubator-commonsrdf/pull/24#discussion_r82311764
--- Diff:
rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java
---
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rdf.rdf4j.experimental;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+
+import org.apache.commons.rdf.api.IRI;
+import org.apache.commons.rdf.api.Quad;
+import org.apache.commons.rdf.api.RDFSyntax;
+import org.apache.commons.rdf.experimental.RDFParser;
+import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI;
+import org.apache.commons.rdf.rdf4j.RDF4JDataset;
+import org.apache.commons.rdf.rdf4j.RDF4JGraph;
+import org.apache.commons.rdf.rdf4j.RDF4JTermFactory;
+import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.repository.util.RDFInserter;
+import org.eclipse.rdf4j.repository.util.RDFLoader;
+import org.eclipse.rdf4j.rio.ParserConfig;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFHandler;
+import org.eclipse.rdf4j.rio.RDFHandlerException;
+import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
+
+/**
+ * RDF4J-based parser.
+ * <p>
+ * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
+ * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
+ * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link
RDFSyntax#TURTLE}
+ * - additional syntaxes can be supported by including the corresponding
+ * <em>rdf4j-rio-*</em> module on the classpath.
+ *
+ */
+public class RDF4JParser extends AbstractRDFParser<RDF4JParser> implements
RDFParser {
+
+ private final class AddToQuadConsumer extends AbstractRDFHandler {
+ private final Consumer<Quad> quadTarget;
+
+ private AddToQuadConsumer(Consumer<Quad> quadTarget) {
+ this.quadTarget = quadTarget;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement
st)
+ throws
org.eclipse.rdf4j.rio.RDFHandlerException {
+ // TODO: if getRdfTermFactory() is a non-rdf4j factory,
should
+ // we use factory.createQuad() instead?
+ // Unsure what is the promise of setting
getRdfTermFactory() --
+ // does it go all the way down to creating BlankNode,
IRI and
+ // Literal?
+ quadTarget.accept(rdf4jTermFactory.asQuad(st));
+ // Performance note:
+ // Graph/Quad.add should pick up again our
+ // RDF4JGraphLike.asStatement()
+ // and avoid double conversion.
+ // Additionally the RDF4JQuad and RDF4JTriple
implementations
+ // are lazily converting subj/obj/pred/graph.s
+ }
+ }
+
+ private final static class AddToModel extends AbstractRDFHandler {
+ private final Model model;
+
+ public AddToModel(Model model) {
+ this.model = model;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement
st)
+ throws
org.eclipse.rdf4j.rio.RDFHandlerException {
+ model.add(st);
+ }
+
+ @Override
+ public void handleNamespace(String prefix, String uri) throws
RDFHandlerException {
+ model.setNamespace(prefix, uri);
+ }
+ }
+
+ private RDF4JTermFactory rdf4jTermFactory;
+ private ParserConfig parserConfig = new ParserConfig();
+
+ @Override
+ protected RDF4JTermFactory createRDFTermFactory() {
+ return new RDF4JTermFactory();
+ }
+
+ @Override
+ protected RDF4JParser prepareForParsing() throws IOException,
IllegalStateException {
+ RDF4JParser c = prepareForParsing();
+ // Ensure we have an RDF4JTermFactory for conversion.
+ // We'll make a new one if user has provided a non-RDF4J factory
+ c.rdf4jTermFactory = (RDF4JTermFactory)
getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance)
+ .orElseGet(c::createRDFTermFactory);
+ return c;
+ }
+
+ @Override
+ protected void parseSynchronusly() throws IOException {
+ Optional<RDFFormat> formatByMimeType =
getContentType().flatMap(Rio::getParserFormatForMIMEType);
+ String base = getBase().map(IRI::getIRIString).orElse(null);
+
+ ParserConfig parserConfig = getParserConfig();
+ // TODO: Should we need to set anything?
+ RDFLoader loader = new RDFLoader(parserConfig,
rdf4jTermFactory.getValueFactory());
+ RDFHandler rdfHandler = makeRDFHandler();
+ if (getSourceFile().isPresent()) {
+ // NOTE: While we could have used
+ // loader.load(sourcePath.toFile()
+ // if the path fs provider == FileSystems.getDefault(),
+ // that RDFLoader method does not use absolute path
+ // as the base URI, so to be consistent
+ // we'll always do it with our own input stream
+ //
+ // That means we may have to guess format by
extensions:
+ Optional<RDFFormat> formatByFilename =
getSourceFile().map(Path::getFileName).map(Path::toString)
+
.flatMap(Rio::getParserFormatForFileName);
+ // TODO: for the excited.. what about the extension
after following symlinks?
+
+ RDFFormat format =
formatByMimeType.orElse(formatByFilename.orElse(null));
+ try (InputStream in =
Files.newInputStream(getSourceFile().get())) {
+ loader.load(in, base, format, rdfHandler);
+ }
+ } else if (getSourceIri().isPresent()) {
+ try {
+ // TODO: Handle international IRIs properly
+ // (Unicode support for for hostname, path and
query)
+ URL url = new
URL(getSourceIri().get().getIRIString());
+ // TODO: This probably does not support
https:// -> http:// redirections
+ loader.load(url, base,
formatByMimeType.orElse(null), makeRDFHandler());
+ } catch (MalformedURLException ex) {
+ throw new IOException("Can't handle source URL:
" + getSourceIri().get(), ex);
+ }
+ }
+ // must be getSourceInputStream then, this is guaranteed by
super.checkSource();
+ loader.load(getSourceInputStream().get(), base,
formatByMimeType.orElse(null), rdfHandler);
+ }
+
+ /**
+ * Get the RDF4J {@link ParserConfig} to use.
+ * <p>
+ * If no parser config is set, the default configuration is provided.
+ * <p>
+ * <strong>Note:</strong> The parser config is mutable - changes in the
+ * returned config is reflected in this instance of the parser.
+ * To avoid mutation, create a new {@link ParserConfig} and set
--- End diff --
I am a little confused about why you feel you need to comment about this
here. If they inject another instance, any other thread can still see it just
like they can the original instance.
It may be better to avoid encouraging multi-threaded use of this class, as
it isn't necessary to do so in a long term pattern and creates many more issues
than it seems to be worth.
I haven't encountered multi-threaded parsers often, but if you are actually
intending for this to be a multi-threaded parser, the parserConfig variable
must be "volatile" to fit with the JVM model, otherwise changes to the variable
using setParserConfig are never guaranteed to be propagated across threads.
> rdf4j integration
> -----------------
>
> Key: COMMONSRDF-35
> URL: https://issues.apache.org/jira/browse/COMMONSRDF-35
> Project: Apache Commons RDF
> Issue Type: New Feature
> Reporter: Stian Soiland-Reyes
> Assignee: Stian Soiland-Reyes
> Labels: integration, rdf4j, sesame
> Fix For: 0.3.0
>
>
> Add a new rdf4j module with implementation for Eclipse rdf4j
> See https://github.com/apache/incubator-commonsrdf/tree/rdf4j/rdf4j
> A legacy sesame branch could then be added by mainly copy/paste and change
> the import
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)