RDFParser moved to org.apache.commons.rdf.experimental
Project: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/commit/a189f91e Tree: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/tree/a189f91e Diff: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/diff/a189f91e Branch: refs/heads/COMMONSRDF-46 Commit: a189f91efba570f0039c6187e612f7d42aeb2f8d Parents: 313fbf7 Author: Stian Soiland-Reyes <st...@apache.org> Authored: Tue Sep 27 17:29:28 2016 +0100 Committer: Stian Soiland-Reyes <st...@apache.org> Committed: Tue Sep 27 18:42:28 2016 +0100 ---------------------------------------------------------------------- .../org/apache/commons/rdf/api/RDFParser.java | 479 ---------------- .../commons/rdf/experimental/RDFParser.java | 489 +++++++++++++++++ .../commons/rdf/experimental/package-info.java | 34 ++ .../apache/commons/rdf/jena/JenaRDFParser.java | 103 ---- .../rdf/jena/experimental/JenaRDFParser.java | 105 ++++ .../rdf/jena/experimental/package-info.java | 34 ++ .../commons/rdf/jena/TestRDFParserBuilder.java | 3 +- .../rdf/jsonldjava/JsonLdParserBuilder.java | 157 ------ .../jsonldjava/experimental/JsonLdParser.java | 160 ++++++ .../jsonldjava/experimental/package-info.java | 34 ++ .../rdf/jsonldjava/JsonLdParserBuilderTest.java | 7 +- .../apache/commons/rdf/rdf4j/RDF4JParser.java | 194 ------- .../rdf/rdf4j/experimental/RDF4JParser.java | 197 +++++++ .../rdf/rdf4j/experimental/package-info.java | 34 ++ .../apache/commons/rdf/rdf4j/package-info.java | 6 +- .../commons/rdf/simple/AbstractRDFParser.java | 541 ------------------ .../commons/rdf/simple/RDFParseException.java | 2 +- .../simple/experimental/AbstractRDFParser.java | 543 +++++++++++++++++++ .../rdf/simple/experimental/package-info.java | 34 ++ .../simple/AbstractRDFParserBuilderTest.java | 3 +- .../rdf/simple/DummyRDFParserBuilder.java | 3 +- 21 files changed, 1678 insertions(+), 1484 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/api/src/main/java/org/apache/commons/rdf/api/RDFParser.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/commons/rdf/api/RDFParser.java b/api/src/main/java/org/apache/commons/rdf/api/RDFParser.java deleted file mode 100644 index 72c7e61..0000000 --- a/api/src/main/java/org/apache/commons/rdf/api/RDFParser.java +++ /dev/null @@ -1,479 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rdf.api; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.Optional; -import java.util.concurrent.Future; -import java.util.function.Consumer; - -/** - * Parse an RDF source into a target (e.g. a Graph/Dataset). - * <p> - * This interface follows the - * <a href="https://en.wikipedia.org/wiki/Builder_pattern">Builder pattern</a>, - * allowing to set parser settings like {@link #contentType(RDFSyntax)} and - * {@link #base(IRI)}. A caller MUST call one of the <code>source</code> methods - * (e.g. {@link #source(IRI)}, {@link #source(Path)}, - * {@link #source(InputStream)}), and MUST call one of the <code>target</code> - * methods (e.g. {@link #target(Consumer)}, {@link #target(Dataset)}, - * {@link #target(Graph)}) before calling {@link #parse()} on the returned - * RDFParser - however methods can be called in any order. - * <p> - * The call to {@link #parse()} returns a {@link Future}, allowing asynchronous - * parse operations. Callers are recommended to check {@link Future#get()} to - * ensure parsing completed successfully, or catch exceptions thrown during - * parsing. - * <p> - * Setting a method that has already been set will override any existing value - * in the returned builder - regardless of the parameter type (e.g. - * {@link #source(IRI)} will override a previous {@link #source(Path)}. Settings - * can be unset by passing <code>null</code> - note that this may - * require casting, e.g. <code>contentType( (RDFSyntax) null )</code> - * to undo a previous call to {@link #contentType(RDFSyntax)}. - * <p> - * It is undefined if a RDFParser is mutable or thread-safe, so callers - * should always use the returned modified RDFParser from the builder - * methods. The builder may return itself after modification, - * or a cloned builder with the modified settings applied. - * Implementations are however encouraged to be immutable, - * thread-safe and document this. As an example starting point, see - * {@link org.apache.commons.rdf.simple.AbstractRDFParserBuilder}. - * <p> - * Example usage: - * </p> - * - * <pre> - * Graph g1 = rDFTermFactory.createGraph(); - * new ExampleRDFParserBuilder() - * .source(Paths.get("/tmp/graph.ttl")) - * .contentType(RDFSyntax.TURTLE) - * .target(g1) - * .parse().get(30, TimeUnit.Seconds); - * </pre> - * - */ -public interface RDFParser { - - /** - * The result of {@link RDFParser#parse()} indicating - * parsing completed. - * <p> - * This is a marker interface that may be subclassed to include - * parser details, e.g. warning messages or triple counts. - */ - public interface ParseResult { - } - - /** - * Specify which {@link RDFTermFactory} to use for generating - * {@link RDFTerm}s. - * <p> - * This option may be used together with {@link #target(Graph)} to - * override the implementation's default factory and graph. - * <p> - * <strong>Warning:</strong> Using the same {@link RDFTermFactory} for - * multiple {@link #parse()} calls may accidentally merge - * {@link BlankNode}s having the same label, as the parser may - * use the {@link RDFTermFactory#createBlankNode(String)} method - * from the parsed blank node labels. - * - * @see #target(Graph) - * @param rdfTermFactory - * {@link RDFTermFactory} to use for generating RDFTerms. - * @return An {@link RDFParser} that will use the specified - * rdfTermFactory - */ - RDFParser rdfTermFactory(RDFTermFactory rdfTermFactory); - - /** - * Specify the content type of the RDF syntax to parse. - * <p> - * This option can be used to select the RDFSyntax of the source, overriding - * any <code>Content-Type</code> headers or equivalent. - * <p> - * The character set of the RDFSyntax is assumed to be - * {@link StandardCharsets#UTF_8} unless overridden within the document - * (e.g. <?xml version="1.0" encoding="iso-8859-1"?></code> in - * {@link RDFSyntax#RDFXML}). - * <p> - * This method will override any contentType set with - * {@link #contentType(String)}. - * - * @see #contentType(String) - * @param rdfSyntax - * An {@link RDFSyntax} to parse the source according to, e.g. - * {@link RDFSyntax#TURTLE}. - * @throws IllegalArgumentException - * If this RDFParser does not support the specified - * RDFSyntax. - * @return An {@link RDFParser} that will use the specified content - * type. - */ - RDFParser contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException; - - /** - * Specify the content type of the RDF syntax to parse. - * <p> - * This option can be used to select the RDFSyntax of the source, overriding - * any <code>Content-Type</code> headers or equivalent. - * <p> - * The content type MAY include a <code>charset</code> parameter if the RDF - * media types permit it; the default charset is - * {@link StandardCharsets#UTF_8} unless overridden within the document. - * <p> - * This method will override any contentType set with - * {@link #contentType(RDFSyntax)}. - * - * @see #contentType(RDFSyntax) - * @param contentType - * A content-type string, e.g. <code>application/ld+json</code> - * or <code>text/turtle;charset="UTF-8"</code> as specified by - * <a href="https://tools.ietf.org/html/rfc7231#section-3.1.1.1"> - * RFC7231</a>. - * @return An {@link RDFParser} that will use the specified content - * type. - * @throws IllegalArgumentException - * If the contentType has an invalid syntax, or this - * RDFParser does not support the specified contentType. - */ - RDFParser contentType(String contentType) throws IllegalArgumentException; - - /** - * Specify a {@link Graph} to add parsed triples to. - * <p> - * If the source supports datasets (e.g. the {@link #contentType(RDFSyntax)} - * set has {@link RDFSyntax#supportsDataset} is true)), then only quads in - * the <em>default graph</em> will be added to the Graph as {@link Triple}s. - * <p> - * It is undefined if any triples are added to the specified {@link Graph} - * if {@link #parse()} throws any exceptions. (However implementations are - * free to prevent this using transaction mechanisms or similar). If - * {@link Future#get()} does not indicate an exception, the parser - * implementation SHOULD have inserted all parsed triples to the specified - * graph. - * <p> - * Calling this method will override any earlier targets set with - * {@link #target(Graph)}, {@link #target(Consumer)} or - * {@link #target(Dataset)}. - * <p> - * The default implementation of this method calls {@link #target(Consumer)} - * with a {@link Consumer} that does {@link Graph#add(Triple)} with - * {@link Quad#asTriple()} if the quad is in the default graph. - * - * @param graph - * The {@link Graph} to add triples to. - * @return An {@link RDFParser} that will insert triples into the - * specified graph. - */ - default RDFParser target(Graph graph) { - return target(q -> { - if (! q.getGraphName().isPresent()) { - graph.add(q.asTriple()); - } - }); - } - - /** - * Specify a {@link Dataset} to add parsed quads to. - * <p> - * It is undefined if any quads are added to the specified - * {@link Dataset} if {@link #parse()} throws any exceptions. - * (However implementations are free to prevent this using transaction - * mechanisms or similar). On the other hand, if {@link #parse()} - * does not indicate an exception, the - * implementation SHOULD have inserted all parsed quads - * to the specified dataset. - * <p> - * Calling this method will override any earlier targets set with - * {@link #target(Graph)}, {@link #target(Consumer)} or {@link #target(Dataset)}. - * <p> - * The default implementation of this method calls {@link #target(Consumer)} - * with a {@link Consumer} that does {@link Dataset#add(Quad)}. - * - * @param dataset - * The {@link Dataset} to add quads to. - * @return An {@link RDFParser} that will insert triples into the - * specified dataset. - */ - default RDFParser target(Dataset dataset) { - return target(dataset::add); - } - - /** - * Specify a consumer for parsed quads. - * <p> - * The quads will include triples in all named graphs of the parsed - * source, including any triples in the default graph. - * When parsing a source format which do not support datasets, all quads - * delivered to the consumer will be in the default graph - * (e.g. their {@link Quad#getGraphName()} will be - * as {@link Optional#empty()}), while for a source - * <p> - * It is undefined if any quads are consumed if {@link #parse()} throws any - * exceptions. On the other hand, if {@link #parse()} does not indicate an - * exception, the implementation SHOULD have produced all parsed quads to - * the specified consumer. - * <p> - * Calling this method will override any earlier targets set with - * {@link #target(Graph)}, {@link #target(Consumer)} or - * {@link #target(Dataset)}. - * <p> - * The consumer is not assumed to be thread safe - only one - * {@link Consumer#accept(Object)} is delivered at a time for a given - * {@link RDFParser#parse()} call. - * <p> - * This method is typically called with a functional consumer, for example: - * <pre> - * List<Quad> quads = new ArrayList<Quad>; - * parserBuilder.target(quads::add).parse(); - * </pre> - * - * @param consumer - * A {@link Consumer} of {@link Quad}s - * @return An {@link RDFParser} that will call the consumer for into - * the specified dataset. - */ - RDFParser target(Consumer<Quad> consumer); - - /** - * Specify a base IRI to use for parsing any relative IRI references. - * <p> - * Setting this option will override any protocol-specific base IRI (e.g. - * <code>Content-Location</code> header) or the {@link #source(IRI)} IRI, - * but does not override any base IRIs set within the source document (e.g. - * <code>@base</code> in Turtle documents). - * <p> - * If the source is in a syntax that does not support relative IRI - * references (e.g. {@link RDFSyntax#NTRIPLES}), setting the - * <code>base</code> has no effect. - * <p> - * This method will override any base IRI set with {@link #base(String)}. - * - * @see #base(String) - * @param base - * An absolute IRI to use as a base. - * @return An {@link RDFParser} that will use the specified base IRI. - */ - RDFParser base(IRI base); - - /** - * Specify a base IRI to use for parsing any relative IRI references. - * <p> - * Setting this option will override any protocol-specific base IRI (e.g. - * <code>Content-Location</code> header) or the {@link #source(IRI)} IRI, - * but does not override any base IRIs set within the source document (e.g. - * <code>@base</code> in Turtle documents). - * <p> - * If the source is in a syntax that does not support relative IRI - * references (e.g. {@link RDFSyntax#NTRIPLES}), setting the - * <code>base</code> has no effect. - * <p> - * This method will override any base IRI set with {@link #base(IRI)}. - * - * @see #base(IRI) - * @param base - * An absolute IRI to use as a base. - * @return An {@link RDFParser} that will use the specified base IRI. - * @throws IllegalArgumentException - * If the base is not a valid absolute IRI string - */ - RDFParser base(String base) throws IllegalArgumentException; - - /** - * Specify a source {@link InputStream} to parse. - * <p> - * The source set will not be read before the call to {@link #parse()}. - * <p> - * The InputStream will not be closed after parsing. The InputStream does - * not need to support {@link InputStream#markSupported()}. - * <p> - * The parser might not consume the complete stream (e.g. an RDF/XML parser - * may not read beyond the closing tag of - * <code></rdf:Description></code>). - * <p> - * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} - * SHOULD be set before calling {@link #parse()}. - * <p> - * The character set is assumed to be {@link StandardCharsets#UTF_8} unless - * the {@link #contentType(String)} specifies otherwise or the document - * declares its own charset (e.g. RDF/XML with a - * <code><?xml encoding="iso-8859-1"></code> header). - * <p> - * The {@link #base(IRI)} or {@link #base(String)} MUST be set before - * calling {@link #parse()}, unless the RDF syntax does not permit relative - * IRIs (e.g. {@link RDFSyntax#NTRIPLES}). - * <p> - * This method will override any source set with {@link #source(IRI)}, - * {@link #source(Path)} or {@link #source(String)}. - * - * @param inputStream - * An InputStream to consume - * @return An {@link RDFParser} that will use the specified source. - */ - RDFParser source(InputStream inputStream); - - /** - * Specify a source file {@link Path} to parse. - * <p> - * The source set will not be read before the call to {@link #parse()}. - * <p> - * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} - * SHOULD be set before calling {@link #parse()}. - * <p> - * The character set is assumed to be {@link StandardCharsets#UTF_8} unless - * the {@link #contentType(String)} specifies otherwise or the document - * declares its own charset (e.g. RDF/XML with a - * <code><?xml encoding="iso-8859-1"></code> header). - * <p> - * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling - * {@link #parse()}, otherwise {@link Path#toUri()} will be used as the base - * IRI. - * <p> - * This method will override any source set with {@link #source(IRI)}, - * {@link #source(InputStream)} or {@link #source(String)}. - * - * @param file - * A Path for a file to parse - * @return An {@link RDFParser} that will use the specified source. - */ - RDFParser source(Path file); - - /** - * Specify an absolute source {@link IRI} to retrieve and parse. - * <p> - * The source set will not be read before the call to {@link #parse()}. - * <p> - * If this builder does not support the given IRI protocol (e.g. - * <code>urn:uuid:ce667463-c5ab-4c23-9b64-701d055c4890</code>), this method - * should succeed, while the {@link #parse()} should throw an - * {@link IOException}. - * <p> - * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} MAY - * be set before calling {@link #parse()}, in which case that type MAY be - * used for content negotiation (e.g. <code>Accept</code> header in HTTP), - * and SHOULD be used for selecting the RDFSyntax. - * <p> - * The character set is assumed to be {@link StandardCharsets#UTF_8} unless - * the protocol's equivalent of <code>Content-Type</code> specifies - * otherwise or the document declares its own charset (e.g. RDF/XML with a - * <code><?xml encoding="iso-8859-1"></code> header). - * <p> - * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling - * {@link #parse()}, otherwise the source IRI will be used as the base IRI. - * <p> - * This method will override any source set with {@link #source(Path)}, - * {@link #source(InputStream)} or {@link #source(String)}. - * - * @param iri - * An IRI to retrieve and parse - * @return An {@link RDFParser} that will use the specified source. - */ - RDFParser source(IRI iri); - - /** - * Specify an absolute source IRI to retrieve and parse. - * <p> - * The source set will not be read before the call to {@link #parse()}. - * <p> - * If this builder does not support the given IRI (e.g. - * <code>urn:uuid:ce667463-c5ab-4c23-9b64-701d055c4890</code>), this method - * should succeed, while the {@link #parse()} should throw an - * {@link IOException}. - * <p> - * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} MAY - * be set before calling {@link #parse()}, in which case that type MAY be - * used for content negotiation (e.g. <code>Accept</code> header in HTTP), - * and SHOULD be used for selecting the RDFSyntax. - * <p> - * The character set is assumed to be {@link StandardCharsets#UTF_8} unless - * the protocol's equivalent of <code>Content-Type</code> specifies - * otherwise or the document declares its own charset (e.g. RDF/XML with a - * <code><?xml encoding="iso-8859-1"></code> header). - * <p> - * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling - * {@link #parse()}, otherwise the source IRI will be used as the base IRI. - * <p> - * This method will override any source set with {@link #source(Path)}, - * {@link #source(InputStream)} or {@link #source(IRI)}. - * - * @param iri - * An IRI to retrieve and parse - * @return An {@link RDFParser} that will use the specified source. - * @throws IllegalArgumentException - * If the base is not a valid absolute IRI string - * - */ - RDFParser source(String iri) throws IllegalArgumentException; - - /** - * Parse the specified source. - * <p> - * A source method (e.g. {@link #source(InputStream)}, {@link #source(IRI)}, - * {@link #source(Path)}, {@link #source(String)} or an equivalent subclass - * method) MUST have been called before calling this method, otherwise an - * {@link IllegalStateException} will be thrown. - * <p> - * A target method (e.g. {@link #target(Consumer)}, {@link #target(Dataset)}, - * {@link #target(Graph)} or an equivalent subclass method) MUST have been - * called before calling parse(), otherwise an - * {@link IllegalStateException} will be thrown. - * <p> - * It is undefined if this method is thread-safe, however the - * {@link RDFParser} may be reused (e.g. setting a different source) - * as soon as the {@link Future} has been returned from this method. - * <p> - * The RDFParser SHOULD perform the parsing as an asynchronous - * operation, and return the {@link Future} as soon as preliminary checks - * (such as validity of the {@link #source(IRI)} and - * {@link #contentType(RDFSyntax)} settings) have finished. The future - * SHOULD not mark {@link Future#isDone()} before parsing is complete. A - * synchronous implementation MAY be blocking on the <code>parse()</code> - * call and return a Future that is already {@link Future#isDone()}. - * <p> - * The returned {@link Future} contains a {@link ParseResult}. - * Implementations may subclass this interface to provide any - * parser details, e.g. list of warnings. <code>null</code> is a - * possible return value if no details are available, but - * parsing succeeded. - * <p> - * If an exception occurs during parsing, (e.g. {@link IOException} or - * {@link org.apache.commons.rdf.simple.RDFParseException}), - * it should be indicated as the - * {@link java.util.concurrent.ExecutionException#getCause()} in the - * {@link java.util.concurrent.ExecutionException} thrown on - * {@link Future#get()}. - * - * @return A Future that will return the populated {@link Graph} when the - * parsing has finished. - * @throws IOException - * If an error occurred while starting to read the source (e.g. - * file not found, unsupported IRI protocol). Note that IO - * errors during parsing would instead be the - * {@link java.util.concurrent.ExecutionException#getCause()} of - * the {@link java.util.concurrent.ExecutionException} thrown on - * {@link Future#get()}. - * @throws IllegalStateException - * If the builder is in an invalid state, e.g. a - * <code>source</code> has not been set. - */ - Future<? extends ParseResult> parse() throws IOException, IllegalStateException; -} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/api/src/main/java/org/apache/commons/rdf/experimental/RDFParser.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/commons/rdf/experimental/RDFParser.java b/api/src/main/java/org/apache/commons/rdf/experimental/RDFParser.java new file mode 100644 index 0000000..39b7253 --- /dev/null +++ b/api/src/main/java/org/apache/commons/rdf/experimental/RDFParser.java @@ -0,0 +1,489 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rdf.experimental; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import java.util.concurrent.Future; +import java.util.function.Consumer; + +import org.apache.commons.rdf.api.BlankNode; +import org.apache.commons.rdf.api.Dataset; +import org.apache.commons.rdf.api.Graph; +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.Quad; +import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.api.RDFTerm; +import org.apache.commons.rdf.api.RDFTermFactory; +import org.apache.commons.rdf.api.Triple; + +/** + * Parse an RDF source into a target (e.g. a Graph/Dataset). + * <p> + * This interface follows the + * <a href="https://en.wikipedia.org/wiki/Builder_pattern">Builder pattern</a>, + * allowing to set parser settings like {@link #contentType(RDFSyntax)} and + * {@link #base(IRI)}. A caller MUST call one of the <code>source</code> methods + * (e.g. {@link #source(IRI)}, {@link #source(Path)}, + * {@link #source(InputStream)}), and MUST call one of the <code>target</code> + * methods (e.g. {@link #target(Consumer)}, {@link #target(Dataset)}, + * {@link #target(Graph)}) before calling {@link #parse()} on the returned + * RDFParser - however methods can be called in any order. + * <p> + * The call to {@link #parse()} returns a {@link Future}, allowing asynchronous + * parse operations. Callers are recommended to check {@link Future#get()} to + * ensure parsing completed successfully, or catch exceptions thrown during + * parsing. + * <p> + * Setting a method that has already been set will override any existing value + * in the returned builder - regardless of the parameter type (e.g. + * {@link #source(IRI)} will override a previous {@link #source(Path)}. Settings + * can be unset by passing <code>null</code> - note that this may + * require casting, e.g. <code>contentType( (RDFSyntax) null )</code> + * to undo a previous call to {@link #contentType(RDFSyntax)}. + * <p> + * It is undefined if a RDFParser is mutable or thread-safe, so callers + * should always use the returned modified RDFParser from the builder + * methods. The builder may return itself after modification, + * or a cloned builder with the modified settings applied. + * Implementations are however encouraged to be immutable, + * thread-safe and document this. As an example starting point, see + * {@link org.apache.commons.rdf.simple.AbstractRDFParserBuilder}. + * <p> + * Example usage: + * </p> + * + * <pre> + * Graph g1 = rDFTermFactory.createGraph(); + * new ExampleRDFParserBuilder() + * .source(Paths.get("/tmp/graph.ttl")) + * .contentType(RDFSyntax.TURTLE) + * .target(g1) + * .parse().get(30, TimeUnit.Seconds); + * </pre> + * + */ +public interface RDFParser { + + /** + * The result of {@link RDFParser#parse()} indicating + * parsing completed. + * <p> + * This is a marker interface that may be subclassed to include + * parser details, e.g. warning messages or triple counts. + */ + public interface ParseResult { + } + + /** + * Specify which {@link RDFTermFactory} to use for generating + * {@link RDFTerm}s. + * <p> + * This option may be used together with {@link #target(Graph)} to + * override the implementation's default factory and graph. + * <p> + * <strong>Warning:</strong> Using the same {@link RDFTermFactory} for + * multiple {@link #parse()} calls may accidentally merge + * {@link BlankNode}s having the same label, as the parser may + * use the {@link RDFTermFactory#createBlankNode(String)} method + * from the parsed blank node labels. + * + * @see #target(Graph) + * @param rdfTermFactory + * {@link RDFTermFactory} to use for generating RDFTerms. + * @return An {@link RDFParser} that will use the specified + * rdfTermFactory + */ + RDFParser rdfTermFactory(RDFTermFactory rdfTermFactory); + + /** + * Specify the content type of the RDF syntax to parse. + * <p> + * This option can be used to select the RDFSyntax of the source, overriding + * any <code>Content-Type</code> headers or equivalent. + * <p> + * The character set of the RDFSyntax is assumed to be + * {@link StandardCharsets#UTF_8} unless overridden within the document + * (e.g. <?xml version="1.0" encoding="iso-8859-1"?></code> in + * {@link RDFSyntax#RDFXML}). + * <p> + * This method will override any contentType set with + * {@link #contentType(String)}. + * + * @see #contentType(String) + * @param rdfSyntax + * An {@link RDFSyntax} to parse the source according to, e.g. + * {@link RDFSyntax#TURTLE}. + * @throws IllegalArgumentException + * If this RDFParser does not support the specified + * RDFSyntax. + * @return An {@link RDFParser} that will use the specified content + * type. + */ + RDFParser contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException; + + /** + * Specify the content type of the RDF syntax to parse. + * <p> + * This option can be used to select the RDFSyntax of the source, overriding + * any <code>Content-Type</code> headers or equivalent. + * <p> + * The content type MAY include a <code>charset</code> parameter if the RDF + * media types permit it; the default charset is + * {@link StandardCharsets#UTF_8} unless overridden within the document. + * <p> + * This method will override any contentType set with + * {@link #contentType(RDFSyntax)}. + * + * @see #contentType(RDFSyntax) + * @param contentType + * A content-type string, e.g. <code>application/ld+json</code> + * or <code>text/turtle;charset="UTF-8"</code> as specified by + * <a href="https://tools.ietf.org/html/rfc7231#section-3.1.1.1"> + * RFC7231</a>. + * @return An {@link RDFParser} that will use the specified content + * type. + * @throws IllegalArgumentException + * If the contentType has an invalid syntax, or this + * RDFParser does not support the specified contentType. + */ + RDFParser contentType(String contentType) throws IllegalArgumentException; + + /** + * Specify a {@link Graph} to add parsed triples to. + * <p> + * If the source supports datasets (e.g. the {@link #contentType(RDFSyntax)} + * set has {@link RDFSyntax#supportsDataset} is true)), then only quads in + * the <em>default graph</em> will be added to the Graph as {@link Triple}s. + * <p> + * It is undefined if any triples are added to the specified {@link Graph} + * if {@link #parse()} throws any exceptions. (However implementations are + * free to prevent this using transaction mechanisms or similar). If + * {@link Future#get()} does not indicate an exception, the parser + * implementation SHOULD have inserted all parsed triples to the specified + * graph. + * <p> + * Calling this method will override any earlier targets set with + * {@link #target(Graph)}, {@link #target(Consumer)} or + * {@link #target(Dataset)}. + * <p> + * The default implementation of this method calls {@link #target(Consumer)} + * with a {@link Consumer} that does {@link Graph#add(Triple)} with + * {@link Quad#asTriple()} if the quad is in the default graph. + * + * @param graph + * The {@link Graph} to add triples to. + * @return An {@link RDFParser} that will insert triples into the + * specified graph. + */ + default RDFParser target(Graph graph) { + return target(q -> { + if (! q.getGraphName().isPresent()) { + graph.add(q.asTriple()); + } + }); + } + + /** + * Specify a {@link Dataset} to add parsed quads to. + * <p> + * It is undefined if any quads are added to the specified + * {@link Dataset} if {@link #parse()} throws any exceptions. + * (However implementations are free to prevent this using transaction + * mechanisms or similar). On the other hand, if {@link #parse()} + * does not indicate an exception, the + * implementation SHOULD have inserted all parsed quads + * to the specified dataset. + * <p> + * Calling this method will override any earlier targets set with + * {@link #target(Graph)}, {@link #target(Consumer)} or {@link #target(Dataset)}. + * <p> + * The default implementation of this method calls {@link #target(Consumer)} + * with a {@link Consumer} that does {@link Dataset#add(Quad)}. + * + * @param dataset + * The {@link Dataset} to add quads to. + * @return An {@link RDFParser} that will insert triples into the + * specified dataset. + */ + default RDFParser target(Dataset dataset) { + return target(dataset::add); + } + + /** + * Specify a consumer for parsed quads. + * <p> + * The quads will include triples in all named graphs of the parsed + * source, including any triples in the default graph. + * When parsing a source format which do not support datasets, all quads + * delivered to the consumer will be in the default graph + * (e.g. their {@link Quad#getGraphName()} will be + * as {@link Optional#empty()}), while for a source + * <p> + * It is undefined if any quads are consumed if {@link #parse()} throws any + * exceptions. On the other hand, if {@link #parse()} does not indicate an + * exception, the implementation SHOULD have produced all parsed quads to + * the specified consumer. + * <p> + * Calling this method will override any earlier targets set with + * {@link #target(Graph)}, {@link #target(Consumer)} or + * {@link #target(Dataset)}. + * <p> + * The consumer is not assumed to be thread safe - only one + * {@link Consumer#accept(Object)} is delivered at a time for a given + * {@link RDFParser#parse()} call. + * <p> + * This method is typically called with a functional consumer, for example: + * <pre> + * List<Quad> quads = new ArrayList<Quad>; + * parserBuilder.target(quads::add).parse(); + * </pre> + * + * @param consumer + * A {@link Consumer} of {@link Quad}s + * @return An {@link RDFParser} that will call the consumer for into + * the specified dataset. + */ + RDFParser target(Consumer<Quad> consumer); + + /** + * Specify a base IRI to use for parsing any relative IRI references. + * <p> + * Setting this option will override any protocol-specific base IRI (e.g. + * <code>Content-Location</code> header) or the {@link #source(IRI)} IRI, + * but does not override any base IRIs set within the source document (e.g. + * <code>@base</code> in Turtle documents). + * <p> + * If the source is in a syntax that does not support relative IRI + * references (e.g. {@link RDFSyntax#NTRIPLES}), setting the + * <code>base</code> has no effect. + * <p> + * This method will override any base IRI set with {@link #base(String)}. + * + * @see #base(String) + * @param base + * An absolute IRI to use as a base. + * @return An {@link RDFParser} that will use the specified base IRI. + */ + RDFParser base(IRI base); + + /** + * Specify a base IRI to use for parsing any relative IRI references. + * <p> + * Setting this option will override any protocol-specific base IRI (e.g. + * <code>Content-Location</code> header) or the {@link #source(IRI)} IRI, + * but does not override any base IRIs set within the source document (e.g. + * <code>@base</code> in Turtle documents). + * <p> + * If the source is in a syntax that does not support relative IRI + * references (e.g. {@link RDFSyntax#NTRIPLES}), setting the + * <code>base</code> has no effect. + * <p> + * This method will override any base IRI set with {@link #base(IRI)}. + * + * @see #base(IRI) + * @param base + * An absolute IRI to use as a base. + * @return An {@link RDFParser} that will use the specified base IRI. + * @throws IllegalArgumentException + * If the base is not a valid absolute IRI string + */ + RDFParser base(String base) throws IllegalArgumentException; + + /** + * Specify a source {@link InputStream} to parse. + * <p> + * The source set will not be read before the call to {@link #parse()}. + * <p> + * The InputStream will not be closed after parsing. The InputStream does + * not need to support {@link InputStream#markSupported()}. + * <p> + * The parser might not consume the complete stream (e.g. an RDF/XML parser + * may not read beyond the closing tag of + * <code></rdf:Description></code>). + * <p> + * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} + * SHOULD be set before calling {@link #parse()}. + * <p> + * The character set is assumed to be {@link StandardCharsets#UTF_8} unless + * the {@link #contentType(String)} specifies otherwise or the document + * declares its own charset (e.g. RDF/XML with a + * <code><?xml encoding="iso-8859-1"></code> header). + * <p> + * The {@link #base(IRI)} or {@link #base(String)} MUST be set before + * calling {@link #parse()}, unless the RDF syntax does not permit relative + * IRIs (e.g. {@link RDFSyntax#NTRIPLES}). + * <p> + * This method will override any source set with {@link #source(IRI)}, + * {@link #source(Path)} or {@link #source(String)}. + * + * @param inputStream + * An InputStream to consume + * @return An {@link RDFParser} that will use the specified source. + */ + RDFParser source(InputStream inputStream); + + /** + * Specify a source file {@link Path} to parse. + * <p> + * The source set will not be read before the call to {@link #parse()}. + * <p> + * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} + * SHOULD be set before calling {@link #parse()}. + * <p> + * The character set is assumed to be {@link StandardCharsets#UTF_8} unless + * the {@link #contentType(String)} specifies otherwise or the document + * declares its own charset (e.g. RDF/XML with a + * <code><?xml encoding="iso-8859-1"></code> header). + * <p> + * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling + * {@link #parse()}, otherwise {@link Path#toUri()} will be used as the base + * IRI. + * <p> + * This method will override any source set with {@link #source(IRI)}, + * {@link #source(InputStream)} or {@link #source(String)}. + * + * @param file + * A Path for a file to parse + * @return An {@link RDFParser} that will use the specified source. + */ + RDFParser source(Path file); + + /** + * Specify an absolute source {@link IRI} to retrieve and parse. + * <p> + * The source set will not be read before the call to {@link #parse()}. + * <p> + * If this builder does not support the given IRI protocol (e.g. + * <code>urn:uuid:ce667463-c5ab-4c23-9b64-701d055c4890</code>), this method + * should succeed, while the {@link #parse()} should throw an + * {@link IOException}. + * <p> + * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} MAY + * be set before calling {@link #parse()}, in which case that type MAY be + * used for content negotiation (e.g. <code>Accept</code> header in HTTP), + * and SHOULD be used for selecting the RDFSyntax. + * <p> + * The character set is assumed to be {@link StandardCharsets#UTF_8} unless + * the protocol's equivalent of <code>Content-Type</code> specifies + * otherwise or the document declares its own charset (e.g. RDF/XML with a + * <code><?xml encoding="iso-8859-1"></code> header). + * <p> + * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling + * {@link #parse()}, otherwise the source IRI will be used as the base IRI. + * <p> + * This method will override any source set with {@link #source(Path)}, + * {@link #source(InputStream)} or {@link #source(String)}. + * + * @param iri + * An IRI to retrieve and parse + * @return An {@link RDFParser} that will use the specified source. + */ + RDFParser source(IRI iri); + + /** + * Specify an absolute source IRI to retrieve and parse. + * <p> + * The source set will not be read before the call to {@link #parse()}. + * <p> + * If this builder does not support the given IRI (e.g. + * <code>urn:uuid:ce667463-c5ab-4c23-9b64-701d055c4890</code>), this method + * should succeed, while the {@link #parse()} should throw an + * {@link IOException}. + * <p> + * The {@link #contentType(RDFSyntax)} or {@link #contentType(String)} MAY + * be set before calling {@link #parse()}, in which case that type MAY be + * used for content negotiation (e.g. <code>Accept</code> header in HTTP), + * and SHOULD be used for selecting the RDFSyntax. + * <p> + * The character set is assumed to be {@link StandardCharsets#UTF_8} unless + * the protocol's equivalent of <code>Content-Type</code> specifies + * otherwise or the document declares its own charset (e.g. RDF/XML with a + * <code><?xml encoding="iso-8859-1"></code> header). + * <p> + * The {@link #base(IRI)} or {@link #base(String)} MAY be set before calling + * {@link #parse()}, otherwise the source IRI will be used as the base IRI. + * <p> + * This method will override any source set with {@link #source(Path)}, + * {@link #source(InputStream)} or {@link #source(IRI)}. + * + * @param iri + * An IRI to retrieve and parse + * @return An {@link RDFParser} that will use the specified source. + * @throws IllegalArgumentException + * If the base is not a valid absolute IRI string + * + */ + RDFParser source(String iri) throws IllegalArgumentException; + + /** + * Parse the specified source. + * <p> + * A source method (e.g. {@link #source(InputStream)}, {@link #source(IRI)}, + * {@link #source(Path)}, {@link #source(String)} or an equivalent subclass + * method) MUST have been called before calling this method, otherwise an + * {@link IllegalStateException} will be thrown. + * <p> + * A target method (e.g. {@link #target(Consumer)}, {@link #target(Dataset)}, + * {@link #target(Graph)} or an equivalent subclass method) MUST have been + * called before calling parse(), otherwise an + * {@link IllegalStateException} will be thrown. + * <p> + * It is undefined if this method is thread-safe, however the + * {@link RDFParser} may be reused (e.g. setting a different source) + * as soon as the {@link Future} has been returned from this method. + * <p> + * The RDFParser SHOULD perform the parsing as an asynchronous + * operation, and return the {@link Future} as soon as preliminary checks + * (such as validity of the {@link #source(IRI)} and + * {@link #contentType(RDFSyntax)} settings) have finished. The future + * SHOULD not mark {@link Future#isDone()} before parsing is complete. A + * synchronous implementation MAY be blocking on the <code>parse()</code> + * call and return a Future that is already {@link Future#isDone()}. + * <p> + * The returned {@link Future} contains a {@link ParseResult}. + * Implementations may subclass this interface to provide any + * parser details, e.g. list of warnings. <code>null</code> is a + * possible return value if no details are available, but + * parsing succeeded. + * <p> + * If an exception occurs during parsing, (e.g. {@link IOException} or + * {@link org.apache.commons.rdf.simple.RDFParseException}), + * it should be indicated as the + * {@link java.util.concurrent.ExecutionException#getCause()} in the + * {@link java.util.concurrent.ExecutionException} thrown on + * {@link Future#get()}. + * + * @return A Future that will return the populated {@link Graph} when the + * parsing has finished. + * @throws IOException + * If an error occurred while starting to read the source (e.g. + * file not found, unsupported IRI protocol). Note that IO + * errors during parsing would instead be the + * {@link java.util.concurrent.ExecutionException#getCause()} of + * the {@link java.util.concurrent.ExecutionException} thrown on + * {@link Future#get()}. + * @throws IllegalStateException + * If the builder is in an invalid state, e.g. a + * <code>source</code> has not been set. + */ + Future<? extends ParseResult> parse() throws IOException, IllegalStateException; +} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/api/src/main/java/org/apache/commons/rdf/experimental/package-info.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/commons/rdf/experimental/package-info.java b/api/src/main/java/org/apache/commons/rdf/experimental/package-info.java new file mode 100644 index 0000000..5f24ddc --- /dev/null +++ b/api/src/main/java/org/apache/commons/rdf/experimental/package-info.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Experimental Commons RDF features. + * <p> + * Interfaces/classes in this package should be considered <strong>at + * risk</strong>; they might change or be removed in the next minor update of + * Commons RDF. + * <p> + * When class/interface has stabilized, it will move to the + * {@link org.apache.commons.rdf.api} package. + * <p> + * <ul> + * <li>{@link RDFParser} - a builder-like interface for parsing RDF to a + * {@link org.apache.commons.rdf.api.Graph} or + * {@link org.apache.commons.rdf.api.Dataset}.</li> + * </ul> + */ +package org.apache.commons.rdf.experimental; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jena/src/main/java/org/apache/commons/rdf/jena/JenaRDFParser.java ---------------------------------------------------------------------- diff --git a/jena/src/main/java/org/apache/commons/rdf/jena/JenaRDFParser.java b/jena/src/main/java/org/apache/commons/rdf/jena/JenaRDFParser.java deleted file mode 100644 index 9690811..0000000 --- a/jena/src/main/java/org/apache/commons/rdf/jena/JenaRDFParser.java +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.rdf.jena; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.util.function.Consumer; - -import org.apache.commons.rdf.api.IRI; -import org.apache.commons.rdf.api.QuadLike; -import org.apache.commons.rdf.api.RDFParser; -import org.apache.commons.rdf.api.RDFTerm; -import org.apache.commons.rdf.api.RDFTermFactory; -import org.apache.commons.rdf.api.TripleLike; -import org.apache.commons.rdf.simple.AbstractRDFParser; -import org.apache.jena.graph.Graph; -import org.apache.jena.riot.Lang; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.system.StreamRDFLib; - -public class JenaRDFParser extends AbstractRDFParser<JenaRDFParser> implements RDFParser { - - private Consumer<TripleLike<RDFTerm, RDFTerm, RDFTerm>> generalizedConsumerTriple; - private Consumer<QuadLike<RDFTerm, RDFTerm, RDFTerm, RDFTerm>> generalizedConsumerQuad; - - protected RDFTermFactory createRDFTermFactory() { - return new JenaRDFTermFactory(); - } - - public JenaRDFParser targetGeneralizedTriple(Consumer<TripleLike<RDFTerm,RDFTerm,RDFTerm>> consumer) { - JenaRDFParser c = this.clone(); - c.resetTarget(); - c.generalizedConsumerTriple = consumer; - return c; - } - - public JenaRDFParser targetGeneralizedQuad(Consumer<QuadLike<RDFTerm,RDFTerm,RDFTerm,RDFTerm>> consumer) { - JenaRDFParser c = this.clone(); - c.resetTarget(); - c.generalizedConsumerQuad = consumer; - return c; - } - - @Override - protected void resetTarget() { - super.resetTarget(); - this.generalizedConsumerTriple = null; - this.generalizedConsumerQuad = null; - } - - @Override - protected void parseSynchronusly() throws IOException { - StreamRDF dest; - if (getTargetGraph().isPresent() && getTargetGraph().get() instanceof JenaGraph) { - Graph jenaGraph = ((JenaGraph) getTargetGraph().get()).asJenaGraph(); - dest = StreamRDFLib.graph(jenaGraph); - } else if (generalizedConsumerQuad != null) { - dest = getJenaFactory().streamJenaToGeneralizedQuad(generalizedConsumerQuad); - } else if (generalizedConsumerTriple != null) { - dest = getJenaFactory().streamJenaToGeneralizedTriple(generalizedConsumerTriple); - } else { - dest = JenaRDFTermFactory.streamJenaToCommonsRDF(getRdfTermFactory().get(), getTarget()); - } - - Lang lang = getContentTypeSyntax().flatMap(JenaRDFTermFactory::rdfSyntaxToLang).orElse(null); - String baseStr = getBase().map(IRI::getIRIString).orElse(null); - - if (getSourceIri().isPresent()) { - RDFDataMgr.parse(dest, getSourceIri().get().toString(), baseStr, lang, null); - } else if (getSourceFile().isPresent()) { - try (InputStream s = Files.newInputStream(getSourceFile().get())) { - RDFDataMgr.parse(dest, s, baseStr, lang, null); - } - } else { - RDFDataMgr.parse(dest, getSourceInputStream().get(), baseStr, lang, null); - } - } - - private JenaRDFTermFactory getJenaFactory() { - return (JenaRDFTermFactory) getRdfTermFactory() - .filter(JenaRDFTermFactory.class::isInstance) - .orElseGet(this::createRDFTermFactory); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jena/src/main/java/org/apache/commons/rdf/jena/experimental/JenaRDFParser.java ---------------------------------------------------------------------- diff --git a/jena/src/main/java/org/apache/commons/rdf/jena/experimental/JenaRDFParser.java b/jena/src/main/java/org/apache/commons/rdf/jena/experimental/JenaRDFParser.java new file mode 100644 index 0000000..873f1cf --- /dev/null +++ b/jena/src/main/java/org/apache/commons/rdf/jena/experimental/JenaRDFParser.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rdf.jena.experimental; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.util.function.Consumer; + +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.QuadLike; +import org.apache.commons.rdf.api.RDFTerm; +import org.apache.commons.rdf.api.RDFTermFactory; +import org.apache.commons.rdf.api.TripleLike; +import org.apache.commons.rdf.experimental.RDFParser; +import org.apache.commons.rdf.jena.JenaGraph; +import org.apache.commons.rdf.jena.JenaRDFTermFactory; +import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; +import org.apache.jena.graph.Graph; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFLib; + +public class JenaRDFParser extends AbstractRDFParser<JenaRDFParser> implements RDFParser { + + private Consumer<TripleLike<RDFTerm, RDFTerm, RDFTerm>> generalizedConsumerTriple; + private Consumer<QuadLike<RDFTerm, RDFTerm, RDFTerm, RDFTerm>> generalizedConsumerQuad; + + protected RDFTermFactory createRDFTermFactory() { + return new JenaRDFTermFactory(); + } + + public JenaRDFParser targetGeneralizedTriple(Consumer<TripleLike<RDFTerm,RDFTerm,RDFTerm>> consumer) { + JenaRDFParser c = this.clone(); + c.resetTarget(); + c.generalizedConsumerTriple = consumer; + return c; + } + + public JenaRDFParser targetGeneralizedQuad(Consumer<QuadLike<RDFTerm,RDFTerm,RDFTerm,RDFTerm>> consumer) { + JenaRDFParser c = this.clone(); + c.resetTarget(); + c.generalizedConsumerQuad = consumer; + return c; + } + + @Override + protected void resetTarget() { + super.resetTarget(); + this.generalizedConsumerTriple = null; + this.generalizedConsumerQuad = null; + } + + @Override + protected void parseSynchronusly() throws IOException { + StreamRDF dest; + if (getTargetGraph().isPresent() && getTargetGraph().get() instanceof JenaGraph) { + Graph jenaGraph = ((JenaGraph) getTargetGraph().get()).asJenaGraph(); + dest = StreamRDFLib.graph(jenaGraph); + } else if (generalizedConsumerQuad != null) { + dest = getJenaFactory().streamJenaToGeneralizedQuad(generalizedConsumerQuad); + } else if (generalizedConsumerTriple != null) { + dest = getJenaFactory().streamJenaToGeneralizedTriple(generalizedConsumerTriple); + } else { + dest = JenaRDFTermFactory.streamJenaToCommonsRDF(getRdfTermFactory().get(), getTarget()); + } + + Lang lang = getContentTypeSyntax().flatMap(JenaRDFTermFactory::rdfSyntaxToLang).orElse(null); + String baseStr = getBase().map(IRI::getIRIString).orElse(null); + + if (getSourceIri().isPresent()) { + RDFDataMgr.parse(dest, getSourceIri().get().toString(), baseStr, lang, null); + } else if (getSourceFile().isPresent()) { + try (InputStream s = Files.newInputStream(getSourceFile().get())) { + RDFDataMgr.parse(dest, s, baseStr, lang, null); + } + } else { + RDFDataMgr.parse(dest, getSourceInputStream().get(), baseStr, lang, null); + } + } + + private JenaRDFTermFactory getJenaFactory() { + return (JenaRDFTermFactory) getRdfTermFactory() + .filter(JenaRDFTermFactory.class::isInstance) + .orElseGet(this::createRDFTermFactory); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jena/src/main/java/org/apache/commons/rdf/jena/experimental/package-info.java ---------------------------------------------------------------------- diff --git a/jena/src/main/java/org/apache/commons/rdf/jena/experimental/package-info.java b/jena/src/main/java/org/apache/commons/rdf/jena/experimental/package-info.java new file mode 100644 index 0000000..9fe39f4 --- /dev/null +++ b/jena/src/main/java/org/apache/commons/rdf/jena/experimental/package-info.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Experimental Commons RDF Jena implementations. + * <p> + * Classes in this package should be considered <strong>at + * risk</strong>; they might change or be removed in the next minor update of + * Commons RDF. + * <p> + * When a class has stabilized, it will move to the + * {@link org.apache.commons.rdf.jena} package. + * <p> + * <ul> + * <li>{@link JenaRDFParser} - a Jena-backed + * implementations of + * {@link org.apache.commons.rdf.api.experimental.RDFParser}.</li> + * </ul> + */ +package org.apache.commons.rdf.jena.experimental; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jena/src/test/java/org/apache/commons/rdf/jena/TestRDFParserBuilder.java ---------------------------------------------------------------------- diff --git a/jena/src/test/java/org/apache/commons/rdf/jena/TestRDFParserBuilder.java b/jena/src/test/java/org/apache/commons/rdf/jena/TestRDFParserBuilder.java index ead3e3a..cd57a0e 100644 --- a/jena/src/test/java/org/apache/commons/rdf/jena/TestRDFParserBuilder.java +++ b/jena/src/test/java/org/apache/commons/rdf/jena/TestRDFParserBuilder.java @@ -28,8 +28,9 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.commons.rdf.api.Graph; -import org.apache.commons.rdf.api.RDFParser.ParseResult; import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.experimental.RDFParser.ParseResult; +import org.apache.commons.rdf.jena.experimental.JenaRDFParser; import org.junit.After; import org.junit.Before; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilder.java ---------------------------------------------------------------------- diff --git a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilder.java b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilder.java deleted file mode 100644 index 2219c74..0000000 --- a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilder.java +++ /dev/null @@ -1,157 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rdf.jsonldjava; - -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Files; -import java.util.function.Predicate; - -import org.apache.commons.rdf.api.Dataset; -import org.apache.commons.rdf.api.Graph; -import org.apache.commons.rdf.api.IRI; -import org.apache.commons.rdf.api.RDFSyntax; -import org.apache.commons.rdf.simple.AbstractRDFParser; - -import com.github.jsonldjava.core.JsonLdError; -import com.github.jsonldjava.core.JsonLdOptions; -import com.github.jsonldjava.core.JsonLdProcessor; -import com.github.jsonldjava.core.RDFDataset; -import com.github.jsonldjava.utils.JsonUtils; - -public class JsonLdParserBuilder extends AbstractRDFParser<JsonLdParserBuilder> { - - @Override - protected JsonLdRDFTermFactory createRDFTermFactory() { - return new JsonLdRDFTermFactory(); - } - - @Override - public JsonLdParserBuilder contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { - if (rdfSyntax != null && rdfSyntax != RDFSyntax.JSONLD) { - throw new IllegalArgumentException("Unsupported contentType: " + rdfSyntax); - } - return super.contentType(rdfSyntax); - } - - @Override - public JsonLdParserBuilder contentType(String contentType) throws IllegalArgumentException { - JsonLdParserBuilder c = (JsonLdParserBuilder) super.contentType(contentType); - if (c.getContentType().filter(Predicate.isEqual(RDFSyntax.JSONLD).negate()).isPresent()) { - throw new IllegalArgumentException("Unsupported contentType: " + contentType); - } - return c; - } - - private static URL asURL(IRI iri) throws IllegalStateException { - try { - return new URI(iri.getIRIString()).toURL(); - } catch (MalformedURLException | URISyntaxException e) { - throw new IllegalStateException("Invalid URL: " + iri.getIRIString()); - } - } - - @Override - protected void checkSource() throws IOException { - super.checkSource(); - // Might throw IllegalStateException if invalid - getSourceIri().map(JsonLdParserBuilder::asURL); - } - - @Override - protected void parseSynchronusly() throws IOException { - Object json = readSource(); - JsonLdOptions options = new JsonLdOptions(); - getBase().map(IRI::getIRIString).ifPresent(options::setBase); - // TODO: base from readSource() (after redirection and Content-Location header) - // should be forwarded - - // TODO: Modify JsonLdProcessor to accept the target RDFDataset - RDFDataset rdfDataset; - try { - rdfDataset = (RDFDataset) JsonLdProcessor.toRDF(json, options); - } catch (JsonLdError e) { - throw new IOException("Could not parse Json-LD", e); - } - if (getTargetGraph().isPresent()) { - Graph intoGraph = getTargetGraph().get(); - if (intoGraph instanceof JsonLdGraph && ! intoGraph.contains(null, null, null)) { - // Empty graph, we can just move over the map content directly: - JsonLdGraph jsonLdGraph = (JsonLdGraph) intoGraph; - jsonLdGraph.getRdfDataSet().putAll(rdfDataset); - return; - // otherwise we have to merge as normal - } - // TODO: Modify JsonLdProcessor to have an actual triple callback - Graph parsedGraph = getJsonLdRDFTermFactory().asGraph(rdfDataset); - // sequential() as we don't know if destination is thread safe :-/ - parsedGraph.stream().sequential().forEach(intoGraph::add); - } else if (getTargetDataset().isPresent()) { - Dataset intoDataset = getTargetDataset().get(); - if (intoDataset instanceof JsonLdDataset && - ! intoDataset.contains(null, null, null, null)) { - JsonLdDataset jsonLdDataset = (JsonLdDataset) intoDataset; - // Empty - we can just do a brave replace! - jsonLdDataset.getRdfDataSet().putAll(rdfDataset); - return; - // otherwise we have to merge.. but also avoid duplicate triples, - // map blank nodes etc, so we'll fall back to normal Dataset appending. - } - Dataset fromDataset = getJsonLdRDFTermFactory().asDataset(rdfDataset); - // .sequential() as we don't know if destination is thread-safe :-/ - fromDataset.stream().sequential().forEach(intoDataset::add); - } else { - Dataset fromDataset = getJsonLdRDFTermFactory().asDataset(rdfDataset); - // No need for .sequential() here - fromDataset.stream().forEach(getTarget()); - } - } - - private JsonLdRDFTermFactory getJsonLdRDFTermFactory() { - if (getRdfTermFactory().isPresent() && getRdfTermFactory().get() instanceof JsonLdRDFTermFactory) { - return (JsonLdRDFTermFactory) getRdfTermFactory().get(); - } - return createRDFTermFactory(); - } - - private Object readSource() throws IOException { - // Due to checked IOException we can't easily - // do this with .map and .orElseGet() - - if (getSourceInputStream().isPresent()) { - return JsonUtils.fromInputStream(getSourceInputStream().get()); - } - if (getSourceIri().isPresent()) { - // TODO: propagate @base from content - return JsonUtils.fromURL(asURL(getSourceIri().get()), - JsonUtils.getDefaultHttpClient()); - } - if (getSourceFile().isPresent()) { - try (InputStream inputStream = Files.newInputStream(getSourceFile().get())){ - return JsonUtils.fromInputStream(inputStream); - } - } - throw new IllegalStateException("No known source found"); - } - -} - http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/JsonLdParser.java ---------------------------------------------------------------------- diff --git a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/JsonLdParser.java b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/JsonLdParser.java new file mode 100644 index 0000000..102b2d4 --- /dev/null +++ b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/JsonLdParser.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rdf.jsonldjava.experimental; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.util.function.Predicate; + +import org.apache.commons.rdf.api.Dataset; +import org.apache.commons.rdf.api.Graph; +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.jsonldjava.JsonLdDataset; +import org.apache.commons.rdf.jsonldjava.JsonLdGraph; +import org.apache.commons.rdf.jsonldjava.JsonLdRDFTermFactory; +import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; + +import com.github.jsonldjava.core.JsonLdError; +import com.github.jsonldjava.core.JsonLdOptions; +import com.github.jsonldjava.core.JsonLdProcessor; +import com.github.jsonldjava.core.RDFDataset; +import com.github.jsonldjava.utils.JsonUtils; + +public class JsonLdParser extends AbstractRDFParser<JsonLdParser> { + + @Override + protected JsonLdRDFTermFactory createRDFTermFactory() { + return new JsonLdRDFTermFactory(); + } + + @Override + public JsonLdParser contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { + if (rdfSyntax != null && rdfSyntax != RDFSyntax.JSONLD) { + throw new IllegalArgumentException("Unsupported contentType: " + rdfSyntax); + } + return super.contentType(rdfSyntax); + } + + @Override + public JsonLdParser contentType(String contentType) throws IllegalArgumentException { + JsonLdParser c = (JsonLdParser) super.contentType(contentType); + if (c.getContentType().filter(Predicate.isEqual(RDFSyntax.JSONLD).negate()).isPresent()) { + throw new IllegalArgumentException("Unsupported contentType: " + contentType); + } + return c; + } + + private static URL asURL(IRI iri) throws IllegalStateException { + try { + return new URI(iri.getIRIString()).toURL(); + } catch (MalformedURLException | URISyntaxException e) { + throw new IllegalStateException("Invalid URL: " + iri.getIRIString()); + } + } + + @Override + protected void checkSource() throws IOException { + super.checkSource(); + // Might throw IllegalStateException if invalid + getSourceIri().map(JsonLdParser::asURL); + } + + @Override + protected void parseSynchronusly() throws IOException { + Object json = readSource(); + JsonLdOptions options = new JsonLdOptions(); + getBase().map(IRI::getIRIString).ifPresent(options::setBase); + // TODO: base from readSource() (after redirection and Content-Location header) + // should be forwarded + + // TODO: Modify JsonLdProcessor to accept the target RDFDataset + RDFDataset rdfDataset; + try { + rdfDataset = (RDFDataset) JsonLdProcessor.toRDF(json, options); + } catch (JsonLdError e) { + throw new IOException("Could not parse Json-LD", e); + } + if (getTargetGraph().isPresent()) { + Graph intoGraph = getTargetGraph().get(); + if (intoGraph instanceof JsonLdGraph && ! intoGraph.contains(null, null, null)) { + // Empty graph, we can just move over the map content directly: + JsonLdGraph jsonLdGraph = (JsonLdGraph) intoGraph; + jsonLdGraph.getRdfDataSet().putAll(rdfDataset); + return; + // otherwise we have to merge as normal + } + // TODO: Modify JsonLdProcessor to have an actual triple callback + Graph parsedGraph = getJsonLdRDFTermFactory().asGraph(rdfDataset); + // sequential() as we don't know if destination is thread safe :-/ + parsedGraph.stream().sequential().forEach(intoGraph::add); + } else if (getTargetDataset().isPresent()) { + Dataset intoDataset = getTargetDataset().get(); + if (intoDataset instanceof JsonLdDataset && + ! intoDataset.contains(null, null, null, null)) { + JsonLdDataset jsonLdDataset = (JsonLdDataset) intoDataset; + // Empty - we can just do a brave replace! + jsonLdDataset.getRdfDataSet().putAll(rdfDataset); + return; + // otherwise we have to merge.. but also avoid duplicate triples, + // map blank nodes etc, so we'll fall back to normal Dataset appending. + } + Dataset fromDataset = getJsonLdRDFTermFactory().asDataset(rdfDataset); + // .sequential() as we don't know if destination is thread-safe :-/ + fromDataset.stream().sequential().forEach(intoDataset::add); + } else { + Dataset fromDataset = getJsonLdRDFTermFactory().asDataset(rdfDataset); + // No need for .sequential() here + fromDataset.stream().forEach(getTarget()); + } + } + + private JsonLdRDFTermFactory getJsonLdRDFTermFactory() { + if (getRdfTermFactory().isPresent() && getRdfTermFactory().get() instanceof JsonLdRDFTermFactory) { + return (JsonLdRDFTermFactory) getRdfTermFactory().get(); + } + return createRDFTermFactory(); + } + + private Object readSource() throws IOException { + // Due to checked IOException we can't easily + // do this with .map and .orElseGet() + + if (getSourceInputStream().isPresent()) { + return JsonUtils.fromInputStream(getSourceInputStream().get()); + } + if (getSourceIri().isPresent()) { + // TODO: propagate @base from content + return JsonUtils.fromURL(asURL(getSourceIri().get()), + JsonUtils.getDefaultHttpClient()); + } + if (getSourceFile().isPresent()) { + try (InputStream inputStream = Files.newInputStream(getSourceFile().get())){ + return JsonUtils.fromInputStream(inputStream); + } + } + throw new IllegalStateException("No known source found"); + } + +} + http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/package-info.java ---------------------------------------------------------------------- diff --git a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/package-info.java b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/package-info.java new file mode 100644 index 0000000..fbd595e --- /dev/null +++ b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/experimental/package-info.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Experimental Commons RDF RDF4J implementations. + * <p> + * Classes in this package should be considered <strong>at + * risk</strong>; they might change or be removed in the next minor update of + * Commons RDF. + * <p> + * When a class has stabilized, it will move to the + * {@link org.apache.commons.rdf.rdf4j} package. + * <p> + * <ul> + * <li>{@link RDF4JParser} - an RDF4J-backed + * implementations of + * {@link org.apache.commons.rdf.api.experimental.RDFParser}.</li> + * </ul> + */ +package org.apache.commons.rdf.jsonldjava.experimental; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/jsonld-java/src/test/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilderTest.java ---------------------------------------------------------------------- diff --git a/jsonld-java/src/test/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilderTest.java b/jsonld-java/src/test/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilderTest.java index 37fa560..4d846ee 100644 --- a/jsonld-java/src/test/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilderTest.java +++ b/jsonld-java/src/test/java/org/apache/commons/rdf/jsonldjava/JsonLdParserBuilderTest.java @@ -32,6 +32,7 @@ import org.apache.commons.rdf.api.Graph; import org.apache.commons.rdf.api.IRI; import org.apache.commons.rdf.api.Literal; import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.jsonldjava.experimental.JsonLdParser; import org.apache.commons.rdf.simple.Types; import org.junit.Test; @@ -57,7 +58,7 @@ public class JsonLdParserBuilderTest { assertNotNull("Test resource not found: " + TEST_JSONLD, url); IRI iri = factory.createIRI(url.toString()); Graph g = factory.createGraph(); - new JsonLdParserBuilder() + new JsonLdParser() .contentType(RDFSyntax.JSONLD) .source(iri) .target(g) @@ -75,7 +76,7 @@ public class JsonLdParserBuilderTest { Files.copy(is, path, StandardCopyOption.REPLACE_EXISTING); } Graph g = factory.createGraph(); - new JsonLdParserBuilder() + new JsonLdParser() .contentType(RDFSyntax.JSONLD) .source(path) .target(g) @@ -89,7 +90,7 @@ public class JsonLdParserBuilderTest { Graph g = factory.createGraph(); try (InputStream is = getClass().getResourceAsStream(TEST_JSONLD)) { assertNotNull("Test resource not found: " + TEST_JSONLD, is); - new JsonLdParserBuilder() + new JsonLdParser() .base("http://example.com/base/") .contentType(RDFSyntax.JSONLD).source(is) .target(g) http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/a189f91e/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParser.java ---------------------------------------------------------------------- diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParser.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParser.java deleted file mode 100644 index dd82044..0000000 --- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParser.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rdf.rdf4j; - -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Optional; -import java.util.function.Consumer; - -import org.apache.commons.rdf.api.IRI; -import org.apache.commons.rdf.api.Quad; -import org.apache.commons.rdf.api.RDFParser; -import org.apache.commons.rdf.api.RDFSyntax; -import org.apache.commons.rdf.simple.AbstractRDFParser; -import org.eclipse.rdf4j.model.Model; -import org.eclipse.rdf4j.repository.util.RDFInserter; -import org.eclipse.rdf4j.repository.util.RDFLoader; -import org.eclipse.rdf4j.rio.ParserConfig; -import org.eclipse.rdf4j.rio.RDFFormat; -import org.eclipse.rdf4j.rio.RDFHandler; -import org.eclipse.rdf4j.rio.RDFHandlerException; -import org.eclipse.rdf4j.rio.Rio; -import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; - -/** - * RDF4J-based parser. - * <p> - * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD}, - * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES}, - * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE} - * - additional syntaxes can be supported by including the corresponding - * <em>rdf4j-rio-*</em> module on the classpath. - * - */ -public class RDF4JParser extends AbstractRDFParser<RDF4JParser> implements RDFParser { - - private final class AddToQuadConsumer extends AbstractRDFHandler { - private final Consumer<Quad> quadTarget; - - private AddToQuadConsumer(Consumer<Quad> quadTarget) { - this.quadTarget = quadTarget; - } - - public void handleStatement(org.eclipse.rdf4j.model.Statement st) - throws org.eclipse.rdf4j.rio.RDFHandlerException { - // TODO: if getRdfTermFactory() is a non-rdf4j factory, should - // we use factory.createQuad() instead? - // Unsure what is the promise of setting getRdfTermFactory() -- - // does it go all the way down to creating BlankNode, IRI and - // Literal? - quadTarget.accept(rdf4jTermFactory.asQuad(st)); - // Performance note: - // Graph/Quad.add should pick up again our - // RDF4JGraphLike.asStatement() - // and avoid double conversion. - // Additionally the RDF4JQuad and RDF4JTriple implementations - // are lazily converting subj/obj/pred/graph.s - } - } - - private final static class AddToModel extends AbstractRDFHandler { - private final Model model; - - public AddToModel(Model model) { - this.model = model; - } - - public void handleStatement(org.eclipse.rdf4j.model.Statement st) - throws org.eclipse.rdf4j.rio.RDFHandlerException { - model.add(st); - } - - @Override - public void handleNamespace(String prefix, String uri) throws RDFHandlerException { - model.setNamespace(prefix, uri); - } - } - - private RDF4JTermFactory rdf4jTermFactory; - - @Override - protected RDF4JTermFactory createRDFTermFactory() { - return new RDF4JTermFactory(); - } - - @Override - protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException { - RDF4JParser c = prepareForParsing(); - // Ensure we have an RDF4JTermFactory for conversion. - // We'll make a new one if user has provided a non-RDF4J factory - c.rdf4jTermFactory = (RDF4JTermFactory) getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance) - .orElseGet(c::createRDFTermFactory); - return c; - } - - @Override - protected void parseSynchronusly() throws IOException { - Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType); - String base = getBase().map(IRI::getIRIString).orElse(null); - - ParserConfig parserConfig = new ParserConfig(); - // TODO: Should we need to set anything? - RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory()); - RDFHandler rdfHandler = makeRDFHandler(); - if (getSourceFile().isPresent()) { - // NOTE: While we could have used - // loader.load(sourcePath.toFile() - // if the path fs provider == FileSystems.getDefault(), - // that RDFLoader method does not use absolute path - // as the base URI, so to be consistent - // we'll always do it with our own input stream - // - // That means we may have to guess format by extensions: - Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString) - .flatMap(Rio::getParserFormatForFileName); - // TODO: for the excited.. what about the extension after following symlinks? - - RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null)); - try (InputStream in = Files.newInputStream(getSourceFile().get())) { - loader.load(in, base, format, rdfHandler); - } - } else if (getSourceIri().isPresent()) { - try { - // TODO: Handle international IRIs properly - // (Unicode support for for hostname, path and query) - URL url = new URL(getSourceIri().get().getIRIString()); - // TODO: This probably does not support https:// -> http:// redirections - loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler()); - } catch (MalformedURLException ex) { - throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex); - } - } - // must be getSourceInputStream then, this is guaranteed by super.checkSource(); - loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler); - } - - protected RDFHandler makeRDFHandler() { - - // TODO: Can we join the below DF4JDataset and RDF4JGraph cases - // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>> - // or will that need tricky generics types? - - if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) { - // One of us, we can add them as Statements directly - RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get(); - if (dataset.asRepository().isPresent()) { - return new RDFInserter(dataset.asRepository().get().getConnection()); - } - if (dataset.asModel().isPresent()) { - Model model = dataset.asModel().get(); - return new AddToModel(model); - } - // Not backed by Repository or Model? - // Third-party RDF4JDataset subclass, so we'll fall through to the - // getTarget() handling further down - } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) { - RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get(); - - if (graph.asRepository().isPresent()) { - RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection()); - graph.getContextFilter().ifPresent(inserter::enforceContext); - return inserter; - } - if (graph.asModel().isPresent() && graph.getContextFilter().isPresent()) { - Model model = graph.asModel().get(); - return new AddToModel(model); - } - // else - fall through - } - - // Fall thorough: let target() consume our converted quads. - return new AddToQuadConsumer(getTarget()); - } - -}