Github user ansell commented on a diff in the pull request:

    https://github.com/apache/incubator-commonsrdf/pull/24#discussion_r82311764
  
    --- Diff: 
rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java 
---
    @@ -0,0 +1,232 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.commons.rdf.rdf4j.experimental;
    +
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.net.MalformedURLException;
    +import java.net.URL;
    +import java.nio.file.Files;
    +import java.nio.file.Path;
    +import java.util.Optional;
    +import java.util.function.Consumer;
    +import java.util.stream.Stream;
    +
    +import org.apache.commons.rdf.api.IRI;
    +import org.apache.commons.rdf.api.Quad;
    +import org.apache.commons.rdf.api.RDFSyntax;
    +import org.apache.commons.rdf.experimental.RDFParser;
    +import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI;
    +import org.apache.commons.rdf.rdf4j.RDF4JDataset;
    +import org.apache.commons.rdf.rdf4j.RDF4JGraph;
    +import org.apache.commons.rdf.rdf4j.RDF4JTermFactory;
    +import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
    +import org.eclipse.rdf4j.model.Model;
    +import org.eclipse.rdf4j.model.Resource;
    +import org.eclipse.rdf4j.repository.util.RDFInserter;
    +import org.eclipse.rdf4j.repository.util.RDFLoader;
    +import org.eclipse.rdf4j.rio.ParserConfig;
    +import org.eclipse.rdf4j.rio.RDFFormat;
    +import org.eclipse.rdf4j.rio.RDFHandler;
    +import org.eclipse.rdf4j.rio.RDFHandlerException;
    +import org.eclipse.rdf4j.rio.Rio;
    +import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
    +
    +/**
    + * RDF4J-based parser.
    + * <p>
    + * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
    + * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
    + * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link 
RDFSyntax#TURTLE}
    + * - additional syntaxes can be supported by including the corresponding
    + * <em>rdf4j-rio-*</em> module on the classpath.
    + *
    + */
    +public class RDF4JParser extends AbstractRDFParser<RDF4JParser> implements 
RDFParser {
    +
    +   private final class AddToQuadConsumer extends AbstractRDFHandler {
    +           private final Consumer<Quad> quadTarget;
    +
    +           private AddToQuadConsumer(Consumer<Quad> quadTarget) {
    +                   this.quadTarget = quadTarget;
    +           }
    +
    +           public void handleStatement(org.eclipse.rdf4j.model.Statement 
st)
    +                           throws 
org.eclipse.rdf4j.rio.RDFHandlerException {
    +                   // TODO: if getRdfTermFactory() is a non-rdf4j factory, 
should
    +                   // we use factory.createQuad() instead?
    +                   // Unsure what is the promise of setting 
getRdfTermFactory() --
    +                   // does it go all the way down to creating BlankNode, 
IRI and
    +                   // Literal?
    +                   quadTarget.accept(rdf4jTermFactory.asQuad(st));
    +                   // Performance note:
    +                   // Graph/Quad.add should pick up again our
    +                   // RDF4JGraphLike.asStatement()
    +                   // and avoid double conversion.
    +                   // Additionally the RDF4JQuad and RDF4JTriple 
implementations
    +                   // are lazily converting subj/obj/pred/graph.s
    +           }
    +   }
    +
    +   private final static class AddToModel extends AbstractRDFHandler {
    +           private final Model model;
    +
    +           public AddToModel(Model model) {
    +                   this.model = model;
    +           }
    +
    +           public void handleStatement(org.eclipse.rdf4j.model.Statement 
st)
    +                           throws 
org.eclipse.rdf4j.rio.RDFHandlerException {
    +                   model.add(st);
    +           }
    +
    +           @Override
    +           public void handleNamespace(String prefix, String uri) throws 
RDFHandlerException {
    +                   model.setNamespace(prefix, uri);
    +           }
    +   }
    +
    +   private RDF4JTermFactory rdf4jTermFactory;
    +   private ParserConfig parserConfig = new ParserConfig();
    +
    +   @Override
    +   protected RDF4JTermFactory createRDFTermFactory() {
    +           return new RDF4JTermFactory();
    +   }
    +
    +   @Override
    +   protected RDF4JParser prepareForParsing() throws IOException, 
IllegalStateException {
    +           RDF4JParser c = prepareForParsing();
    +           // Ensure we have an RDF4JTermFactory for conversion.
    +           // We'll make a new one if user has provided a non-RDF4J factory
    +           c.rdf4jTermFactory = (RDF4JTermFactory) 
getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance)
    +                           .orElseGet(c::createRDFTermFactory);
    +           return c;
    +   }
    +
    +   @Override
    +   protected void parseSynchronusly() throws IOException {         
    +           Optional<RDFFormat> formatByMimeType = 
getContentType().flatMap(Rio::getParserFormatForMIMEType);
    +           String base = getBase().map(IRI::getIRIString).orElse(null);
    +                           
    +           ParserConfig parserConfig = getParserConfig();
    +           // TODO: Should we need to set anything?
    +           RDFLoader loader = new RDFLoader(parserConfig, 
rdf4jTermFactory.getValueFactory());
    +           RDFHandler rdfHandler = makeRDFHandler();               
    +           if (getSourceFile().isPresent()) {                      
    +                   // NOTE: While we could have used  
    +                   // loader.load(sourcePath.toFile()
    +                   // if the path fs provider == FileSystems.getDefault(), 
                        
    +                   // that RDFLoader method does not use absolute path
    +                   // as the base URI, so to be consistent 
    +                   // we'll always do it with our own input stream
    +                   //
    +                   // That means we may have to guess format by 
extensions:                        
    +                   Optional<RDFFormat> formatByFilename = 
getSourceFile().map(Path::getFileName).map(Path::toString)
    +                                   
.flatMap(Rio::getParserFormatForFileName);
    +                   // TODO: for the excited.. what about the extension 
after following symlinks? 
    +                   
    +                   RDFFormat format = 
formatByMimeType.orElse(formatByFilename.orElse(null));
    +                   try (InputStream in = 
Files.newInputStream(getSourceFile().get())) {
    +                           loader.load(in, base, format, rdfHandler);
    +                   }
    +           } else if (getSourceIri().isPresent()) {
    +                   try {
    +                           // TODO: Handle international IRIs properly
    +                           // (Unicode support for for hostname, path and 
query)
    +                           URL url = new 
URL(getSourceIri().get().getIRIString());
    +                           // TODO: This probably does not support 
https:// -> http:// redirections
    +                           loader.load(url, base, 
formatByMimeType.orElse(null), makeRDFHandler());
    +                   } catch (MalformedURLException ex) {
    +                           throw new IOException("Can't handle source URL: 
" + getSourceIri().get(), ex);
    +                   }                       
    +           }
    +           // must be getSourceInputStream then, this is guaranteed by 
super.checkSource();                
    +           loader.load(getSourceInputStream().get(), base, 
formatByMimeType.orElse(null), rdfHandler);
    +   }
    +
    +   /**
    +    * Get the RDF4J {@link ParserConfig} to use.
    +    * <p>
    +    * If no parser config is set, the default configuration is provided.
    +    * <p>
    +    * <strong>Note:</strong> The parser config is mutable - changes in the 
    +    * returned config is reflected in this instance of the parser.
    +    * To avoid mutation, create a new {@link ParserConfig} and set
    --- End diff --
    
    I am a little confused about why you feel you need to comment about this 
here. If they inject another instance, any other thread can still see it just 
like they can the original instance.
    
    It may be better to avoid encouraging multi-threaded use of this class, as 
it isn't necessary to do so in a long term pattern and creates many more issues 
than it seems to be worth.
    
    I haven't encountered multi-threaded parsers often, but if you are actually 
intending for this to be a multi-threaded parser, the parserConfig variable 
must be "volatile" to fit with the JVM model, otherwise changes to the variable 
using setParserConfig are never guaranteed to be propagated across threads.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to