http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/313fbf75/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParser.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParser.java b/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParser.java new file mode 100644 index 0000000..e58fced --- /dev/null +++ b/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParser.java @@ -0,0 +1,541 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rdf.simple; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.function.Consumer; + +import org.apache.commons.rdf.api.Dataset; +import org.apache.commons.rdf.api.Graph; +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.Quad; +import org.apache.commons.rdf.api.RDFParser; +import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.api.RDFTermFactory; + +/** + * Abstract RDFParser + * <p> + * This abstract class keeps the properties in protected fields like + * {@link #sourceFile} using {@link Optional}. Some basic checking like + * {@link #checkIsAbsolute(IRI)} is performed. + * <p> + * This class and its subclasses are {@link Cloneable}, immutable and + * (therefore) thread-safe - each call to option methods like + * {@link #contentType(String)} or {@link #source(IRI)} will return a cloned, + * mutated copy. + * <p> + * By default, parsing is done by the abstract method + * {@link #parseSynchronusly()} - which is executed in a cloned snapshot - hence + * multiple {@link #parse()} calls are thread-safe. The default {@link #parse()} + * uses a thread pool in {@link #threadGroup} - but implementations can override + * {@link #parse()} (e.g. because it has its own threading model or use + * asynchronous remote execution). + */ +public abstract class AbstractRDFParser<T extends AbstractRDFParser<T>> + implements RDFParser, Cloneable { + + public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers"); + private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r)); + + // Basically only used for creating IRIs + private static RDFTermFactory internalRdfTermFactory = new SimpleRDFTermFactory(); + + /** + * Get the set {@link RDFTermFactory}, if any. + */ + public Optional<RDFTermFactory> getRdfTermFactory() { + return rdfTermFactory; + } + + /** + * Get the set content-type {@link RDFSyntax}, if any. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getContentType()} contains the + * value of {@link RDFSyntax#mediaType}. + */ + public Optional<RDFSyntax> getContentTypeSyntax() { + return contentTypeSyntax; + } + + /** + * Get the set content-type String, if any. + * <p> + * If this is {@link Optional#isPresent()} and + * is recognized by {@link RDFSyntax#byMediaType(String)}, then + * the corresponding {@link RDFSyntax} is set on + * {@link #getContentType()}, otherwise that is + * {@link Optional#empty()}. + */ + public final Optional<String> getContentType() { + return contentType; + } + + /** + * Get the target to consume parsed Quads. + * <p> + * From the call to {@link #parseSynchronusly()}, this + * method is always {@link Optional#isPresent()}. + * + */ + public Consumer<Quad> getTarget() { + return target; + } + + /** + * Get the target dataset as set by {@link #target(Dataset)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Dataset)} has been set, meaning that the implementation + * may choose to append parsed quads to the {@link Dataset} directly instead + * of relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetGraph()} MUST + * be {@link Optional#empty()}. + * + * @return The target Dataset, or {@link Optional#empty()} if another kind of target has been set. + */ + public Optional<Dataset> getTargetDataset() { + return targetDataset; + } + + /** + * Get the target graph as set by {@link #target(Graph)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Graph)} has been set, meaning that the implementation + * may choose to append parsed triples to the {@link Graph} directly instead + * of relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetDataset()} MUST + * be {@link Optional#empty()}. + * + * @return The target Graph, or {@link Optional#empty()} if another kind of target has been set. + */ + public Optional<Graph> getTargetGraph() { + return targetGraph; + } + + /** + * Get the set base {@link IRI}, if present. + * <p> + * + */ + public Optional<IRI> getBase() { + return base; + } + + /** + * Get the set source {@link InputStream}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceFile()} and {@link #getSourceIri()} + * are {@link Optional#empty()}. + */ + public Optional<InputStream> getSourceInputStream() { + return sourceInputStream; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceIri()} + * are {@link Optional#empty()}. + */ + public Optional<Path> getSourceFile() { + return sourceFile; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceInputStream()()} + * are {@link Optional#empty()}. + */ + public Optional<IRI> getSourceIri() { + return sourceIri; + } + + + private Optional<RDFTermFactory> rdfTermFactory = Optional.empty(); + private Optional<RDFSyntax> contentTypeSyntax = Optional.empty(); + private Optional<String> contentType = Optional.empty(); + private Optional<IRI> base = Optional.empty(); + private Optional<InputStream> sourceInputStream = Optional.empty(); + private Optional<Path> sourceFile = Optional.empty(); + private Optional<IRI> sourceIri = Optional.empty(); + private Consumer<Quad> target; + private Optional<Dataset> targetDataset; + private Optional<Graph> targetGraph; + + @SuppressWarnings("unchecked") + @Override + public T clone() { + try { + return (T) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + @SuppressWarnings("unchecked") + protected T asT() { + return (T) this; + } + + @Override + public T rdfTermFactory(RDFTermFactory rdfTermFactory) { + AbstractRDFParser<T> c = clone(); + c.rdfTermFactory = Optional.ofNullable(rdfTermFactory); + return c.asT(); + } + + @Override + public T contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentTypeSyntax = Optional.ofNullable(rdfSyntax); + c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType); + return c.asT(); + } + + @Override + public T contentType(String contentType) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentType = Optional.ofNullable(contentType); + c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType); + return c.asT(); + } + + @Override + public T base(IRI base) { + AbstractRDFParser<T> c = clone(); + c.base = Optional.ofNullable(base); + c.base.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T base(String base) throws IllegalArgumentException { + return base(internalRdfTermFactory.createIRI(base)); + } + + @Override + public T source(InputStream inputStream) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceInputStream = Optional.ofNullable(inputStream); + return c.asT(); + } + + @Override + public T source(Path file) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceFile = Optional.ofNullable(file); + return c.asT(); + } + + @Override + public T source(IRI iri) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T source(String iri) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return source(internalRdfTermFactory.createIRI(iri)); + } + + /** + * Check if an iri is absolute. + * <p> + * Used by {@link #source(String)} and {@link #base(String)} + * + * @param iri + */ + protected void checkIsAbsolute(IRI iri) { + if (!URI.create(iri.getIRIString()).isAbsolute()) { + throw new IllegalArgumentException("IRI is not absolute: " + iri); + } + } + + /** + * Check that one and only one source is present and valid. + * <p> + * Used by {@link #parse()}. + * <p> + * Subclasses might override this method, e.g. to support other + * source combinations, or to check if the sourceIri is + * resolvable. + * + * @throws IOException If a source file can't be read + */ + protected void checkSource() throws IOException { + if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) { + throw new IllegalStateException("No source has been set"); + } + if (sourceIri.isPresent() && sourceInputStream.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceInputStream have been set"); + } + if (sourceIri.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceFile have been set"); + } + if (sourceInputStream.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceInputStream and sourceFile have been set"); + } + if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) { + throw new IOException("Can't read file: " + sourceFile); + } + } + + /** + * Check if base is required. + * + * @throws IllegalStateException if base is required, but not set. + */ + protected void checkBaseRequired() { + if (!base.isPresent() && sourceInputStream.isPresent() + && !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) { + throw new IllegalStateException("base iri required for inputstream source"); + } + } + + /** + * Reset all source* fields to Optional.empty() + * <p> + * Subclasses should override this and call <code>super.resetSource()</code> + * if they need to reset any additional source* fields. + * + */ + protected void resetSource() { + sourceInputStream = Optional.empty(); + sourceIri = Optional.empty(); + sourceFile = Optional.empty(); + } + + + /** + * Reset all optional target* fields to Optional.empty()</code> + * <p> + * Note that the consumer set for {@link #getTarget()} is + * NOT reset. + * <p> + * Subclasses should override this and call <code>super.resetTarget()</code> + * if they need to reset any additional target* fields. + * + */ + protected void resetTarget() { + targetDataset = Optional.empty(); + targetGraph = Optional.empty(); + } + + /** + * Parse {@link #sourceInputStream}, {@link #sourceFile} or + * {@link #sourceIri}. + * <p> + * One of the source fields MUST be present, as checked by {@link #checkSource()}. + * <p> + * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is required. + * + * @throws IOException If the source could not be read + * @throws RDFParseException If the source could not be parsed (e.g. a .ttl file was not valid Turtle) + */ + protected abstract void parseSynchronusly() throws IOException, RDFParseException; + + /** + * Prepare a clone of this RDFParser which have been checked and + * completed. + * <p> + * The returned clone will always have + * {@link #getTarget()} and {@link #getRdfTermFactory()} present. + * <p> + * If the {@link #getSourceFile()} is present, but the + * {@link #getBase()} is not present, the base will be set to the + * <code>file:///</code> IRI for the Path's real path (e.g. resolving any + * symbolic links). + * + * @return A completed and checked clone of this RDFParser + * @throws IOException If the source was not accessible (e.g. a file was not found) + * @throws IllegalStateException If the parser was not in a compatible setting (e.g. contentType was an invalid string) + */ + protected T prepareForParsing() throws IOException, IllegalStateException { + checkSource(); + checkBaseRequired(); + checkContentType(); + checkTarget(); + + // We'll make a clone of our current state which will be passed to + // parseSynchronously() + AbstractRDFParser<T> c = clone(); + + // Use a fresh SimpleRDFTermFactory for each parse + if (!c.rdfTermFactory.isPresent()) { + c.rdfTermFactory = Optional.of(createRDFTermFactory()); + } + // sourceFile, but no base? Let's follow any symlinks and use + // the file:/// URI + if (c.sourceFile.isPresent() && !c.base.isPresent()) { + URI baseUri = c.sourceFile.get().toRealPath().toUri(); + c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString())); + } + + return c.asT(); + } + + /** + * Subclasses can override this method to check the target is + * valid. + * <p> + * The default implementation throws an IllegalStateException if the + * target has not been set. + */ + protected void checkTarget() { + if (target == null) { + throw new IllegalStateException("target has not been set"); + } + if (targetGraph.isPresent() && targetDataset.isPresent()) { + // This should not happen as each target(..) method resets the optionals + throw new IllegalStateException("targetGraph and targetDataset can't both be set"); + } + } + + /** + * Subclasses can override this method to check compatibility with the + * contentType setting. + * + * @throws IllegalStateException + * if the {@link #getContentType()} or + * {@link #getContentTypeSyntax()} is not compatible or invalid + */ + protected void checkContentType() throws IllegalStateException { + } + + /** + * Guess RDFSyntax from a local file's extension. + * <p> + * This method can be used by subclasses if {@link #getContentType()} is not + * present and {@link #getSourceFile()} is set. + * + * @param path Path which extension should be checked + * @return The {@link RDFSyntax} which has a matching {@link RDFSyntax#fileExtension}, + * otherwise {@link Optional#empty()}. + */ + protected static Optional<RDFSyntax> guessRDFSyntax(Path path) { + return fileExtension(path).flatMap(RDFSyntax::byFileExtension); + } + + /** + * Return the file extension of a Path - if any. + * <p> + * The returned file extension includes the leading <code>.</code> + * <p> + * Note that this only returns the last extension, e.g. the + * file extension for <code>archive.tar.gz</code> would be <code>.gz</code> + * + * @param path Path which filename might contain an extension + * @return File extension (including the leading <code>.</code>, + * or {@link Optional#empty()} if the path has no extension + */ + private static Optional<String> fileExtension(Path path) { + Path fileName = path.getFileName(); + if (fileName == null) { + return Optional.empty(); + } + String filenameStr = fileName.toString(); + int last = filenameStr.lastIndexOf("."); + if (last > -1) { + return Optional.of(filenameStr.substring(last)); + } + return Optional.empty(); + } + + + /** + * Create a new {@link RDFTermFactory} for a parse session. + * <p> + * This is called by {@link #parse()} to set + * {@link #rdfTermFactory(RDFTermFactory)} if it is + * {@link Optional#empty()}. + * <p> + * As parsed blank nodes might be made with + * {@link RDFTermFactory#createBlankNode(String)}, + * each call to this method SHOULD return + * a new RDFTermFactory instance. + * + * @return A new {@link RDFTermFactory} + */ + protected RDFTermFactory createRDFTermFactory() { + return new SimpleRDFTermFactory(); + } + + @Override + public Future<ParseResult> parse() throws IOException, IllegalStateException { + final AbstractRDFParser<T> c = prepareForParsing(); + return threadpool.submit(() -> { + c.parseSynchronusly(); + return null; + }); + } + + @Override + public T target(Consumer<Quad> consumer) { + AbstractRDFParser<T> c = clone(); + c.resetTarget(); + c.target = consumer; + return c.asT(); + } + + @Override + public T target(Dataset dataset) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(dataset); + c.resetTarget(); + c.targetDataset = Optional.of(dataset); + return c.asT(); + } + + @Override + public T target(Graph graph) { + @SuppressWarnings({ "rawtypes", "unchecked" }) // super calls our .clone() + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(graph); + c.resetTarget(); + c.targetGraph = Optional.of(graph); + return c.asT(); + } + + + +}
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/313fbf75/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java b/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java deleted file mode 100644 index 9e97487..0000000 --- a/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java +++ /dev/null @@ -1,541 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rdf.simple; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Optional; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.function.Consumer; - -import org.apache.commons.rdf.api.Dataset; -import org.apache.commons.rdf.api.Graph; -import org.apache.commons.rdf.api.IRI; -import org.apache.commons.rdf.api.Quad; -import org.apache.commons.rdf.api.RDFParserBuilder; -import org.apache.commons.rdf.api.RDFSyntax; -import org.apache.commons.rdf.api.RDFTermFactory; - -/** - * Abstract RDFParserBuilder - * <p> - * This abstract class keeps the builder properties in protected fields like - * {@link #sourceFile} using {@link Optional}. Some basic checking like - * {@link #checkIsAbsolute(IRI)} is performed. - * <p> - * This class and its subclasses are {@link Cloneable}, immutable and - * (therefore) thread-safe - each call to option methods like - * {@link #contentType(String)} or {@link #source(IRI)} will return a cloned, - * mutated copy. - * <p> - * By default, parsing is done by the abstract method - * {@link #parseSynchronusly()} - which is executed in a cloned snapshot - hence - * multiple {@link #parse()} calls are thread-safe. The default {@link #parse()} - * uses a thread pool in {@link #threadGroup} - but implementations can override - * {@link #parse()} (e.g. because it has its own threading model or use - * asynchronous remote execution). - */ -public abstract class AbstractRDFParserBuilder<T extends AbstractRDFParserBuilder<T>> - implements RDFParserBuilder, Cloneable { - - public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers"); - private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r)); - - // Basically only used for creating IRIs - private static RDFTermFactory internalRdfTermFactory = new SimpleRDFTermFactory(); - - /** - * Get the set {@link RDFTermFactory}, if any. - */ - public Optional<RDFTermFactory> getRdfTermFactory() { - return rdfTermFactory; - } - - /** - * Get the set content-type {@link RDFSyntax}, if any. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getContentType()} contains the - * value of {@link RDFSyntax#mediaType}. - */ - public Optional<RDFSyntax> getContentTypeSyntax() { - return contentTypeSyntax; - } - - /** - * Get the set content-type String, if any. - * <p> - * If this is {@link Optional#isPresent()} and - * is recognized by {@link RDFSyntax#byMediaType(String)}, then - * the corresponding {@link RDFSyntax} is set on - * {@link #getContentType()}, otherwise that is - * {@link Optional#empty()}. - */ - public final Optional<String> getContentType() { - return contentType; - } - - /** - * Get the target to consume parsed Quads. - * <p> - * From the call to {@link #parseSynchronusly()}, this - * method is always {@link Optional#isPresent()}. - * - */ - public Consumer<Quad> getTarget() { - return target; - } - - /** - * Get the target dataset as set by {@link #target(Dataset)}. - * <p> - * The return value is {@link Optional#isPresent()} if and only if - * {@link #target(Dataset)} has been set, meaning that the implementation - * may choose to append parsed quads to the {@link Dataset} directly instead - * of relying on the generated {@link #getTarget()} consumer. - * <p> - * If this value is present, then {@link #getTargetGraph()} MUST - * be {@link Optional#empty()}. - * - * @return The target Dataset, or {@link Optional#empty()} if another kind of target has been set. - */ - public Optional<Dataset> getTargetDataset() { - return targetDataset; - } - - /** - * Get the target graph as set by {@link #target(Graph)}. - * <p> - * The return value is {@link Optional#isPresent()} if and only if - * {@link #target(Graph)} has been set, meaning that the implementation - * may choose to append parsed triples to the {@link Graph} directly instead - * of relying on the generated {@link #getTarget()} consumer. - * <p> - * If this value is present, then {@link #getTargetDataset()} MUST - * be {@link Optional#empty()}. - * - * @return The target Graph, or {@link Optional#empty()} if another kind of target has been set. - */ - public Optional<Graph> getTargetGraph() { - return targetGraph; - } - - /** - * Get the set base {@link IRI}, if present. - * <p> - * - */ - public Optional<IRI> getBase() { - return base; - } - - /** - * Get the set source {@link InputStream}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceFile()} and {@link #getSourceIri()} - * are {@link Optional#empty()}. - */ - public Optional<InputStream> getSourceInputStream() { - return sourceInputStream; - } - - /** - * Get the set source {@link Path}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceInputStream()} and {@link #getSourceIri()} - * are {@link Optional#empty()}. - */ - public Optional<Path> getSourceFile() { - return sourceFile; - } - - /** - * Get the set source {@link Path}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceInputStream()} and {@link #getSourceInputStream()()} - * are {@link Optional#empty()}. - */ - public Optional<IRI> getSourceIri() { - return sourceIri; - } - - - private Optional<RDFTermFactory> rdfTermFactory = Optional.empty(); - private Optional<RDFSyntax> contentTypeSyntax = Optional.empty(); - private Optional<String> contentType = Optional.empty(); - private Optional<IRI> base = Optional.empty(); - private Optional<InputStream> sourceInputStream = Optional.empty(); - private Optional<Path> sourceFile = Optional.empty(); - private Optional<IRI> sourceIri = Optional.empty(); - private Consumer<Quad> target; - private Optional<Dataset> targetDataset; - private Optional<Graph> targetGraph; - - @SuppressWarnings("unchecked") - @Override - public T clone() { - try { - return (T) super.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - } - - @SuppressWarnings("unchecked") - protected T asT() { - return (T) this; - } - - @Override - public T rdfTermFactory(RDFTermFactory rdfTermFactory) { - AbstractRDFParserBuilder<T> c = clone(); - c.rdfTermFactory = Optional.ofNullable(rdfTermFactory); - return c.asT(); - } - - @Override - public T contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { - AbstractRDFParserBuilder<T> c = clone(); - c.contentTypeSyntax = Optional.ofNullable(rdfSyntax); - c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType); - return c.asT(); - } - - @Override - public T contentType(String contentType) throws IllegalArgumentException { - AbstractRDFParserBuilder<T> c = clone(); - c.contentType = Optional.ofNullable(contentType); - c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType); - return c.asT(); - } - - @Override - public T base(IRI base) { - AbstractRDFParserBuilder<T> c = clone(); - c.base = Optional.ofNullable(base); - c.base.ifPresent(i -> checkIsAbsolute(i)); - return c.asT(); - } - - @Override - public T base(String base) throws IllegalArgumentException { - return base(internalRdfTermFactory.createIRI(base)); - } - - @Override - public T source(InputStream inputStream) { - AbstractRDFParserBuilder<T> c = clone(); - c.resetSource(); - c.sourceInputStream = Optional.ofNullable(inputStream); - return c.asT(); - } - - @Override - public T source(Path file) { - AbstractRDFParserBuilder<T> c = clone(); - c.resetSource(); - c.sourceFile = Optional.ofNullable(file); - return c.asT(); - } - - @Override - public T source(IRI iri) { - AbstractRDFParserBuilder<T> c = clone(); - c.resetSource(); - c.sourceIri = Optional.ofNullable(iri); - c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); - return c.asT(); - } - - @Override - public T source(String iri) throws IllegalArgumentException { - AbstractRDFParserBuilder<T> c = clone(); - c.resetSource(); - c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI); - c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); - return source(internalRdfTermFactory.createIRI(iri)); - } - - /** - * Check if an iri is absolute. - * <p> - * Used by {@link #source(String)} and {@link #base(String)} - * - * @param iri - */ - protected void checkIsAbsolute(IRI iri) { - if (!URI.create(iri.getIRIString()).isAbsolute()) { - throw new IllegalArgumentException("IRI is not absolute: " + iri); - } - } - - /** - * Check that one and only one source is present and valid. - * <p> - * Used by {@link #parse()}. - * <p> - * Subclasses might override this method, e.g. to support other - * source combinations, or to check if the sourceIri is - * resolvable. - * - * @throws IOException If a source file can't be read - */ - protected void checkSource() throws IOException { - if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) { - throw new IllegalStateException("No source has been set"); - } - if (sourceIri.isPresent() && sourceInputStream.isPresent()) { - throw new IllegalStateException("Both sourceIri and sourceInputStream have been set"); - } - if (sourceIri.isPresent() && sourceFile.isPresent()) { - throw new IllegalStateException("Both sourceIri and sourceFile have been set"); - } - if (sourceInputStream.isPresent() && sourceFile.isPresent()) { - throw new IllegalStateException("Both sourceInputStream and sourceFile have been set"); - } - if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) { - throw new IOException("Can't read file: " + sourceFile); - } - } - - /** - * Check if base is required. - * - * @throws IllegalStateException if base is required, but not set. - */ - protected void checkBaseRequired() { - if (!base.isPresent() && sourceInputStream.isPresent() - && !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) { - throw new IllegalStateException("base iri required for inputstream source"); - } - } - - /** - * Reset all source* fields to Optional.empty() - * <p> - * Subclasses should override this and call <code>super.resetSource()</code> - * if they need to reset any additional source* fields. - * - */ - protected void resetSource() { - sourceInputStream = Optional.empty(); - sourceIri = Optional.empty(); - sourceFile = Optional.empty(); - } - - - /** - * Reset all optional target* fields to Optional.empty()</code> - * <p> - * Note that the consumer set for {@link #getTarget()} is - * NOT reset. - * <p> - * Subclasses should override this and call <code>super.resetTarget()</code> - * if they need to reset any additional target* fields. - * - */ - protected void resetTarget() { - targetDataset = Optional.empty(); - targetGraph = Optional.empty(); - } - - /** - * Parse {@link #sourceInputStream}, {@link #sourceFile} or - * {@link #sourceIri}. - * <p> - * One of the source fields MUST be present, as checked by {@link #checkSource()}. - * <p> - * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is required. - * - * @throws IOException If the source could not be read - * @throws RDFParseException If the source could not be parsed (e.g. a .ttl file was not valid Turtle) - */ - protected abstract void parseSynchronusly() throws IOException, RDFParseException; - - /** - * Prepare a clone of this RDFParserBuilder which have been checked and - * completed. - * <p> - * The returned clone will always have - * {@link #getTarget()} and {@link #getRdfTermFactory()} present. - * <p> - * If the {@link #getSourceFile()} is present, but the - * {@link #getBase()} is not present, the base will be set to the - * <code>file:///</code> IRI for the Path's real path (e.g. resolving any - * symbolic links). - * - * @return A completed and checked clone of this RDFParserBuilder - * @throws IOException If the source was not accessible (e.g. a file was not found) - * @throws IllegalStateException If the parser was not in a compatible setting (e.g. contentType was an invalid string) - */ - protected T prepareForParsing() throws IOException, IllegalStateException { - checkSource(); - checkBaseRequired(); - checkContentType(); - checkTarget(); - - // We'll make a clone of our current state which will be passed to - // parseSynchronously() - AbstractRDFParserBuilder<T> c = clone(); - - // Use a fresh SimpleRDFTermFactory for each parse - if (!c.rdfTermFactory.isPresent()) { - c.rdfTermFactory = Optional.of(createRDFTermFactory()); - } - // sourceFile, but no base? Let's follow any symlinks and use - // the file:/// URI - if (c.sourceFile.isPresent() && !c.base.isPresent()) { - URI baseUri = c.sourceFile.get().toRealPath().toUri(); - c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString())); - } - - return c.asT(); - } - - /** - * Subclasses can override this method to check the target is - * valid. - * <p> - * The default implementation throws an IllegalStateException if the - * target has not been set. - */ - protected void checkTarget() { - if (target == null) { - throw new IllegalStateException("target has not been set"); - } - if (targetGraph.isPresent() && targetDataset.isPresent()) { - // This should not happen as each target(..) method resets the optionals - throw new IllegalStateException("targetGraph and targetDataset can't both be set"); - } - } - - /** - * Subclasses can override this method to check compatibility with the - * contentType setting. - * - * @throws IllegalStateException - * if the {@link #getContentType()} or - * {@link #getContentTypeSyntax()} is not compatible or invalid - */ - protected void checkContentType() throws IllegalStateException { - } - - /** - * Guess RDFSyntax from a local file's extension. - * <p> - * This method can be used by subclasses if {@link #getContentType()} is not - * present and {@link #getSourceFile()} is set. - * - * @param path Path which extension should be checked - * @return The {@link RDFSyntax} which has a matching {@link RDFSyntax#fileExtension}, - * otherwise {@link Optional#empty()}. - */ - protected static Optional<RDFSyntax> guessRDFSyntax(Path path) { - return fileExtension(path).flatMap(RDFSyntax::byFileExtension); - } - - /** - * Return the file extension of a Path - if any. - * <p> - * The returned file extension includes the leading <code>.</code> - * <p> - * Note that this only returns the last extension, e.g. the - * file extension for <code>archive.tar.gz</code> would be <code>.gz</code> - * - * @param path Path which filename might contain an extension - * @return File extension (including the leading <code>.</code>, - * or {@link Optional#empty()} if the path has no extension - */ - private static Optional<String> fileExtension(Path path) { - Path fileName = path.getFileName(); - if (fileName == null) { - return Optional.empty(); - } - String filenameStr = fileName.toString(); - int last = filenameStr.lastIndexOf("."); - if (last > -1) { - return Optional.of(filenameStr.substring(last)); - } - return Optional.empty(); - } - - - /** - * Create a new {@link RDFTermFactory} for a parse session. - * <p> - * This is called by {@link #parse()} to set - * {@link #rdfTermFactory(RDFTermFactory)} if it is - * {@link Optional#empty()}. - * <p> - * As parsed blank nodes might be made with - * {@link RDFTermFactory#createBlankNode(String)}, - * each call to this method SHOULD return - * a new RDFTermFactory instance. - * - * @return A new {@link RDFTermFactory} - */ - protected RDFTermFactory createRDFTermFactory() { - return new SimpleRDFTermFactory(); - } - - @Override - public Future<ParseResult> parse() throws IOException, IllegalStateException { - final AbstractRDFParserBuilder<T> c = prepareForParsing(); - return threadpool.submit(() -> { - c.parseSynchronusly(); - return null; - }); - } - - @Override - public T target(Consumer<Quad> consumer) { - AbstractRDFParserBuilder<T> c = clone(); - c.resetTarget(); - c.target = consumer; - return c.asT(); - } - - @Override - public T target(Dataset dataset) { - @SuppressWarnings({ "rawtypes", "unchecked" }) - AbstractRDFParserBuilder<T> c = (AbstractRDFParserBuilder) RDFParserBuilder.super.target(dataset); - c.resetTarget(); - c.targetDataset = Optional.of(dataset); - return c.asT(); - } - - @Override - public T target(Graph graph) { - @SuppressWarnings({ "rawtypes", "unchecked" }) // super calls our .clone() - AbstractRDFParserBuilder<T> c = (AbstractRDFParserBuilder) RDFParserBuilder.super.target(graph); - c.resetTarget(); - c.targetGraph = Optional.of(graph); - return c.asT(); - } - - - -} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/313fbf75/simple/src/main/java/org/apache/commons/rdf/simple/RDFParseException.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/RDFParseException.java b/simple/src/main/java/org/apache/commons/rdf/simple/RDFParseException.java index ed16bb2..d1cdd57 100644 --- a/simple/src/main/java/org/apache/commons/rdf/simple/RDFParseException.java +++ b/simple/src/main/java/org/apache/commons/rdf/simple/RDFParseException.java @@ -18,33 +18,33 @@ package org.apache.commons.rdf.simple; -import org.apache.commons.rdf.api.RDFParserBuilder; +import org.apache.commons.rdf.api.RDFParser; public class RDFParseException extends Exception { private static final long serialVersionUID = 5427752643780702976L; - private RDFParserBuilder builder; + private RDFParser builder; - public RDFParseException(RDFParserBuilder builder) { + public RDFParseException(RDFParser builder) { super(); this.builder = builder; } - public RDFParseException(RDFParserBuilder builder, String message, Throwable cause) { + public RDFParseException(RDFParser builder, String message, Throwable cause) { super(message, cause); this.builder = builder; } - public RDFParseException(RDFParserBuilder builder, String message) { + public RDFParseException(RDFParser builder, String message) { super(message); this.builder = builder; } - public RDFParseException(RDFParserBuilder builder, Throwable cause) { + public RDFParseException(RDFParser builder, Throwable cause) { super(cause); this.builder = builder; } - public RDFParserBuilder getRDFParserBuilder() { + public RDFParser getRDFParserBuilder() { return builder; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/313fbf75/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java index 439bacb..acb75b7 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java @@ -32,7 +32,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.rdf.api.Graph; import org.apache.commons.rdf.api.IRI; import org.apache.commons.rdf.api.Literal; -import org.apache.commons.rdf.api.RDFParserBuilder; +import org.apache.commons.rdf.api.RDFParser; import org.apache.commons.rdf.api.RDFSyntax; import org.apache.commons.rdf.api.RDFTerm; import org.apache.commons.rdf.api.RDFTermFactory; @@ -71,9 +71,9 @@ public class AbstractRDFParserBuilderTest { @Test public void guessRDFSyntax() throws Exception { - assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParserBuilder.guessRDFSyntax(testNt).get()); - assertEquals(RDFSyntax.TURTLE, AbstractRDFParserBuilder.guessRDFSyntax(testTtl).get()); - assertFalse(AbstractRDFParserBuilder.guessRDFSyntax(testXml).isPresent()); + assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParser.guessRDFSyntax(testNt).get()); + assertEquals(RDFSyntax.TURTLE, AbstractRDFParser.guessRDFSyntax(testTtl).get()); + assertFalse(AbstractRDFParser.guessRDFSyntax(testXml).isPresent()); } private void checkGraph(Graph g) throws Exception { @@ -107,7 +107,7 @@ public class AbstractRDFParserBuilderTest { @Test public void parseFile() throws Exception { Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(testNt).target(g); + RDFParser parser = dummyParser.source(testNt).target(g); parser.parse().get(5, TimeUnit.SECONDS); checkGraph(g); // FIXME: this could potentially break if the equivalent of /tmp includes @@ -132,7 +132,7 @@ public class AbstractRDFParserBuilderTest { public void parseBaseAndContentTypeNoSource() throws Exception { // Can set the other options, even without source() IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); - RDFParserBuilder parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); + RDFParser parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); thrown.expect(IllegalStateException.class); thrown.expectMessage("No source has been set"); // but .parse() should fail @@ -143,7 +143,7 @@ public class AbstractRDFParserBuilderTest { public void parseFileMissing() throws Exception { Files.delete(testNt); // This should not fail yet - RDFParserBuilder parser = dummyParser.source(testNt); + RDFParser parser = dummyParser.source(testNt); // but here: thrown.expect(IOException.class); parser.parse(); @@ -153,7 +153,7 @@ public class AbstractRDFParserBuilderTest { @Test public void parseFileContentType() throws Exception { Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser + RDFParser parser = dummyParser .source(testNt) .contentType(RDFSyntax.NTRIPLES) .target(g); @@ -180,7 +180,7 @@ public class AbstractRDFParserBuilderTest { public void parseInputStreamFailsIfBaseMissing() throws Exception { InputStream inputStream = new ByteArrayInputStream(new byte[0]); // Should not fail at this point - RDFParserBuilder parser = dummyParser.source(inputStream); + RDFParser parser = dummyParser.source(inputStream); // but here: thrown.expect(IllegalStateException.class); thrown.expectMessage("base iri required for inputstream source"); @@ -192,7 +192,7 @@ public class AbstractRDFParserBuilderTest { InputStream inputStream = new ByteArrayInputStream(new byte[0]); IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(inputStream).base(base).target(g); + RDFParser parser = dummyParser.source(inputStream).base(base).target(g); parser.parse().get(5, TimeUnit.SECONDS); checkGraph(g); assertEquals("<http://www.example.org/test.rdf>", firstPredicate(g, "base")); @@ -207,7 +207,7 @@ public class AbstractRDFParserBuilderTest { public void parseInputStreamWithNQuads() throws Exception { InputStream inputStream = new ByteArrayInputStream(new byte[0]); Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); + RDFParser parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); parser.parse().get(5, TimeUnit.SECONDS); checkGraph(g); assertNull(firstPredicate(g, "base")); @@ -222,7 +222,7 @@ public class AbstractRDFParserBuilderTest { public void parseIRI() throws Exception { IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(iri).target(g); + RDFParser parser = dummyParser.source(iri).target(g); parser.parse().get(5, TimeUnit.SECONDS); checkGraph(g); assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); @@ -240,7 +240,7 @@ public class AbstractRDFParserBuilderTest { public void parseIRIBaseContentType() throws Exception { IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); + RDFParser parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); parser.parse().get(5, TimeUnit.SECONDS); checkGraph(g); assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/313fbf75/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java index baf9768..fe0b36e 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java @@ -23,18 +23,18 @@ import java.util.function.Consumer; import org.apache.commons.rdf.api.IRI; import org.apache.commons.rdf.api.Quad; -import org.apache.commons.rdf.api.RDFParserBuilder; +import org.apache.commons.rdf.api.RDFParser; import org.apache.commons.rdf.api.RDFTermFactory; /** - * For test purposes - a {@link RDFParserBuilder} that inserts information + * For test purposes - a {@link RDFParser} that inserts information * about what it has been asked to parse instead of actually parsing anything. * <p> * This always insert at least the triple equivalent to: * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/greeting> "Hello world" . * </pre> - * Additional triples match the corresponding getter in AbstractRDFParserBuilder, + * Additional triples match the corresponding getter in AbstractRDFParser, * e.g.: * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/base> <http://www.example.org/> . @@ -43,7 +43,7 @@ import org.apache.commons.rdf.api.RDFTermFactory; * * */ -public class DummyRDFParserBuilder extends AbstractRDFParserBuilder<DummyRDFParserBuilder> { +public class DummyRDFParserBuilder extends AbstractRDFParser<DummyRDFParserBuilder> { @Override protected void parseSynchronusly() throws IOException, IllegalStateException, RDFParseException { @@ -57,7 +57,7 @@ public class DummyRDFParserBuilder extends AbstractRDFParserBuilder<DummyRDFPars t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/greeting"), factory.createLiteral("Hello world"))); - // Now we'll expose the finalized AbstractRDFParserBuilder settings + // Now we'll expose the finalized AbstractRDFParser settings // so they can be inspected by the junit test if (getSourceIri().isPresent()) {