http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java index e478c39..584bff3 100644 --- a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java +++ b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java @@ -56,503 +56,514 @@ import org.apache.commons.rdf.simple.SimpleRDF; * {@link #parse()} (e.g. because it has its own threading model or use * asynchronous remote execution). */ -public abstract class AbstractRDFParser<T extends AbstractRDFParser<T>> - implements RDFParser, Cloneable { - - public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers"); - private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r)); - - // Basically only used for creating IRIs - private static RDF internalRdfTermFactory = new SimpleRDF(); - - /** - * Get the set {@link RDF}, if any. - * - * @return The {@link RDF} to use, or {@link Optional#empty()} if - * it has not been set - */ - public Optional<RDF> getRdfTermFactory() { - return rdfTermFactory; - } - - /** - * Get the set content-type {@link RDFSyntax}, if any. - * <p> - * If this is {@link Optional#isPresent()}, then {@link #getContentType()} - * contains the value of {@link RDFSyntax#mediaType}. - * - * @return The {@link RDFSyntax} of the content type, or - * {@link Optional#empty()} if it has not been set - */ - public Optional<RDFSyntax> getContentTypeSyntax() { - return contentTypeSyntax; - } - - /** - * Get the set content-type String, if any. - * <p> - * If this is {@link Optional#isPresent()} and is recognized by - * {@link RDFSyntax#byMediaType(String)}, then the corresponding - * {@link RDFSyntax} is set on {@link #getContentType()}, otherwise that is - * {@link Optional#empty()}. - * - * @return The Content-Type IANA media type, e.g. <code>text/turtle</code>, - * or {@link Optional#empty()} if it has not been set - */ - public final Optional<String> getContentType() { - return contentType; - } - - /** - * Get the target to consume parsed Quads. - * <p> - * From the call to {@link #parseSynchronusly()}, this will be a - * non-<code>null</code> value (as a target is a required setting). - * - * @return The target consumer of {@link Quad}s, or <code>null</code> if it - * has not yet been set. - * - */ - public Consumer<Quad> getTarget() { - return target; - } - - /** - * Get the target dataset as set by {@link #target(Dataset)}. - * <p> - * The return value is {@link Optional#isPresent()} if and only if - * {@link #target(Dataset)} has been set, meaning that the implementation - * may choose to append parsed quads to the {@link Dataset} directly instead - * of relying on the generated {@link #getTarget()} consumer. - * <p> - * If this value is present, then {@link #getTargetGraph()} MUST - * be {@link Optional#empty()}. - * - * @return The target Dataset, or {@link Optional#empty()} if another kind of target has been set. - */ - public Optional<Dataset> getTargetDataset() { - return targetDataset; - } - - /** - * Get the target graph as set by {@link #target(Graph)}. - * <p> - * The return value is {@link Optional#isPresent()} if and only if - * {@link #target(Graph)} has been set, meaning that the implementation - * may choose to append parsed triples to the {@link Graph} directly instead - * of relying on the generated {@link #getTarget()} consumer. - * <p> - * If this value is present, then {@link #getTargetDataset()} MUST - * be {@link Optional#empty()}. - * - * @return The target Graph, or {@link Optional#empty()} if another kind of target has been set. - */ - public Optional<Graph> getTargetGraph() { - return targetGraph; - } - - /** - * Get the set base {@link IRI}, if present. - * - * @return The base {@link IRI}, or {@link Optional#empty()} if it has not been set - */ - public Optional<IRI> getBase() { - return base; - } - - /** - * Get the set source {@link InputStream}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceFile()} and {@link #getSourceIri()} - * are {@link Optional#empty()}. - * - * @return The source {@link InputStream}, or {@link Optional#empty()} if it has not been set - */ - public Optional<InputStream> getSourceInputStream() { - return sourceInputStream; - } - - /** - * Get the set source {@link Path}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceInputStream()} and {@link #getSourceIri()} - * are {@link Optional#empty()}. - * - * @return The source {@link Path}, or {@link Optional#empty()} if it has not been set - */ - public Optional<Path> getSourceFile() { - return sourceFile; - } - - /** - * Get the set source {@link Path}. - * <p> - * If this is {@link Optional#isPresent()}, then - * {@link #getSourceInputStream()} and {@link #getSourceInputStream()} - * are {@link Optional#empty()}. - * - * @return The source {@link IRI}, or {@link Optional#empty()} if it has not been set - */ - public Optional<IRI> getSourceIri() { - return sourceIri; - } - - - private Optional<RDF> rdfTermFactory = Optional.empty(); - private Optional<RDFSyntax> contentTypeSyntax = Optional.empty(); - private Optional<String> contentType = Optional.empty(); - private Optional<IRI> base = Optional.empty(); - private Optional<InputStream> sourceInputStream = Optional.empty(); - private Optional<Path> sourceFile = Optional.empty(); - private Optional<IRI> sourceIri = Optional.empty(); - private Consumer<Quad> target; - private Optional<Dataset> targetDataset; - private Optional<Graph> targetGraph; - - @SuppressWarnings("unchecked") - @Override - public T clone() { - try { - return (T) super.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - } - - @SuppressWarnings("unchecked") - protected T asT() { - return (T) this; - } - - @Override - public T rdfTermFactory(RDF rdfTermFactory) { - AbstractRDFParser<T> c = clone(); - c.rdfTermFactory = Optional.ofNullable(rdfTermFactory); - return c.asT(); - } - - @Override - public T contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { - AbstractRDFParser<T> c = clone(); - c.contentTypeSyntax = Optional.ofNullable(rdfSyntax); - c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType); - return c.asT(); - } - - @Override - public T contentType(String contentType) throws IllegalArgumentException { - AbstractRDFParser<T> c = clone(); - c.contentType = Optional.ofNullable(contentType); - c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType); - return c.asT(); - } - - @Override - public T base(IRI base) { - AbstractRDFParser<T> c = clone(); - c.base = Optional.ofNullable(base); - c.base.ifPresent(i -> checkIsAbsolute(i)); - return c.asT(); - } - - @Override - public T base(String base) throws IllegalArgumentException { - return base(internalRdfTermFactory.createIRI(base)); - } - - @Override - public T source(InputStream inputStream) { - AbstractRDFParser<T> c = clone(); - c.resetSource(); - c.sourceInputStream = Optional.ofNullable(inputStream); - return c.asT(); - } - - @Override - public T source(Path file) { - AbstractRDFParser<T> c = clone(); - c.resetSource(); - c.sourceFile = Optional.ofNullable(file); - return c.asT(); - } - - @Override - public T source(IRI iri) { - AbstractRDFParser<T> c = clone(); - c.resetSource(); - c.sourceIri = Optional.ofNullable(iri); - c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); - return c.asT(); - } - - @Override - public T source(String iri) throws IllegalArgumentException { - AbstractRDFParser<T> c = clone(); - c.resetSource(); - c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI); - c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); - return source(internalRdfTermFactory.createIRI(iri)); - } - - /** - * Check if an iri is absolute. - * <p> - * Used by {@link #source(String)} and {@link #base(String)}. - * - * @param iri IRI to check - * @throws IllegalArgumentException If the IRI is not absolute - */ - protected void checkIsAbsolute(IRI iri) throws IllegalArgumentException { - if (!URI.create(iri.getIRIString()).isAbsolute()) { - throw new IllegalArgumentException("IRI is not absolute: " + iri); - } - } - - /** - * Check that one and only one source is present and valid. - * <p> - * Used by {@link #parse()}. - * <p> - * Subclasses might override this method, e.g. to support other - * source combinations, or to check if the sourceIri is - * resolvable. - * - * @throws IOException If a source file can't be read - */ - protected void checkSource() throws IOException { - if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) { - throw new IllegalStateException("No source has been set"); - } - if (sourceIri.isPresent() && sourceInputStream.isPresent()) { - throw new IllegalStateException("Both sourceIri and sourceInputStream have been set"); - } - if (sourceIri.isPresent() && sourceFile.isPresent()) { - throw new IllegalStateException("Both sourceIri and sourceFile have been set"); - } - if (sourceInputStream.isPresent() && sourceFile.isPresent()) { - throw new IllegalStateException("Both sourceInputStream and sourceFile have been set"); - } - if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) { - throw new IOException("Can't read file: " + sourceFile); - } - } - - /** - * Check if base is required. - * - * @throws IllegalStateException if base is required, but not set. - */ - protected void checkBaseRequired() throws IllegalStateException { - if (!base.isPresent() && sourceInputStream.isPresent() - && !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) { - throw new IllegalStateException("base iri required for inputstream source"); - } - } - - /** - * Reset all source* fields to Optional.empty() - * <p> - * Subclasses should override this and call <code>super.resetSource()</code> - * if they need to reset any additional source* fields. - * - */ - protected void resetSource() { - sourceInputStream = Optional.empty(); - sourceIri = Optional.empty(); - sourceFile = Optional.empty(); - } - - /** - * Reset all optional target* fields to {@link Optional#empty()}. - * <p> - * Note that the consumer set for {@link #getTarget()} is - * <strong>note</strong> reset. - * <p> - * Subclasses should override this and call <code>super.resetTarget()</code> - * if they need to reset any additional target* fields. - * - */ - protected void resetTarget() { - targetDataset = Optional.empty(); - targetGraph = Optional.empty(); - } - - /** - * Parse {@link #sourceInputStream}, {@link #sourceFile} or - * {@link #sourceIri}. - * <p> - * One of the source fields MUST be present, as checked by {@link #checkSource()}. - * <p> - * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is required. - * - * @throws IOException If the source could not be read - * @throws RDFParseException If the source could not be parsed (e.g. a .ttl file was not valid Turtle) - */ - protected abstract void parseSynchronusly() throws IOException, RDFParseException; - - /** - * Prepare a clone of this RDFParser which have been checked and - * completed. - * <p> - * The returned clone will always have - * {@link #getTarget()} and {@link #getRdfTermFactory()} present. - * <p> - * If the {@link #getSourceFile()} is present, but the - * {@link #getBase()} is not present, the base will be set to the - * <code>file:///</code> IRI for the Path's real path (e.g. resolving any - * symbolic links). - * - * @return A completed and checked clone of this RDFParser - * @throws IOException If the source was not accessible (e.g. a file was not found) - * @throws IllegalStateException If the parser was not in a compatible setting (e.g. contentType was an invalid string) - */ - protected T prepareForParsing() throws IOException, IllegalStateException { - checkSource(); - checkBaseRequired(); - checkContentType(); - checkTarget(); - - // We'll make a clone of our current state which will be passed to - // parseSynchronously() - AbstractRDFParser<T> c = clone(); - - // Use a fresh SimpleRDF for each parse - if (!c.rdfTermFactory.isPresent()) { - c.rdfTermFactory = Optional.of(createRDFTermFactory()); - } - // sourceFile, but no base? Let's follow any symlinks and use - // the file:/// URI - if (c.sourceFile.isPresent() && !c.base.isPresent()) { - URI baseUri = c.sourceFile.get().toRealPath().toUri(); - c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString())); - } - - return c.asT(); - } - - /** - * Subclasses can override this method to check the target is - * valid. - * <p> - * The default implementation throws an IllegalStateException if the - * target has not been set. - */ - protected void checkTarget() { - if (target == null) { - throw new IllegalStateException("target has not been set"); - } - if (targetGraph.isPresent() && targetDataset.isPresent()) { - // This should not happen as each target(..) method resets the optionals - throw new IllegalStateException("targetGraph and targetDataset can't both be set"); - } - } - - /** - * Subclasses can override this method to check compatibility with the - * contentType setting. - * - * @throws IllegalStateException - * if the {@link #getContentType()} or - * {@link #getContentTypeSyntax()} is not compatible or invalid - */ - protected void checkContentType() throws IllegalStateException { - } - - /** - * Guess RDFSyntax from a local file's extension. - * <p> - * This method can be used by subclasses if {@link #getContentType()} is not - * present and {@link #getSourceFile()} is set. - * - * @param path Path which extension should be checked - * @return The {@link RDFSyntax} which has a matching {@link RDFSyntax#fileExtension}, - * otherwise {@link Optional#empty()}. - */ - protected static Optional<RDFSyntax> guessRDFSyntax(Path path) { - return fileExtension(path).flatMap(RDFSyntax::byFileExtension); - } - - /** - * Return the file extension of a Path - if any. - * <p> - * The returned file extension includes the leading <code>.</code> - * <p> - * Note that this only returns the last extension, e.g. the - * file extension for <code>archive.tar.gz</code> would be <code>.gz</code> - * - * @param path Path which filename might contain an extension - * @return File extension (including the leading <code>.</code>, - * or {@link Optional#empty()} if the path has no extension - */ - private static Optional<String> fileExtension(Path path) { - Path fileName = path.getFileName(); - if (fileName == null) { - return Optional.empty(); - } - String filenameStr = fileName.toString(); - int last = filenameStr.lastIndexOf("."); - if (last > -1) { - return Optional.of(filenameStr.substring(last)); - } - return Optional.empty(); - } - - - /** - * Create a new {@link RDF} for a parse session. - * <p> - * This is called by {@link #parse()} to set - * {@link #rdfTermFactory(RDF)} if it is - * {@link Optional#empty()}. - * <p> - * As parsed blank nodes might be made with - * {@link RDF#createBlankNode(String)}, - * each call to this method SHOULD return - * a new RDF instance. - * - * @return A new {@link RDF} - */ - protected RDF createRDFTermFactory() { - return new SimpleRDF(); - } - - @Override - public Future<ParseResult> parse() throws IOException, IllegalStateException { - final AbstractRDFParser<T> c = prepareForParsing(); - return threadpool.submit(() -> { - c.parseSynchronusly(); - return null; - }); - } - - @Override - public T target(Consumer<Quad> consumer) { - AbstractRDFParser<T> c = clone(); - c.resetTarget(); - c.target = consumer; - return c.asT(); - } - - @Override - public T target(Dataset dataset) { - @SuppressWarnings({ "rawtypes", "unchecked" }) - AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(dataset); - c.resetTarget(); - c.targetDataset = Optional.of(dataset); - return c.asT(); - } - - @Override - public T target(Graph graph) { - @SuppressWarnings({ "rawtypes", "unchecked" }) // super calls our .clone() - AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(graph); - c.resetTarget(); - c.targetGraph = Optional.of(graph); - return c.asT(); - } - - +public abstract class AbstractRDFParser<T extends AbstractRDFParser<T>> implements RDFParser, Cloneable { + + public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers"); + private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r)); + + // Basically only used for creating IRIs + private static RDF internalRdfTermFactory = new SimpleRDF(); + + /** + * Get the set {@link RDF}, if any. + * + * @return The {@link RDF} to use, or {@link Optional#empty()} if it has not + * been set + */ + public Optional<RDF> getRdfTermFactory() { + return rdfTermFactory; + } + + /** + * Get the set content-type {@link RDFSyntax}, if any. + * <p> + * If this is {@link Optional#isPresent()}, then {@link #getContentType()} + * contains the value of {@link RDFSyntax#mediaType}. + * + * @return The {@link RDFSyntax} of the content type, or + * {@link Optional#empty()} if it has not been set + */ + public Optional<RDFSyntax> getContentTypeSyntax() { + return contentTypeSyntax; + } + + /** + * Get the set content-type String, if any. + * <p> + * If this is {@link Optional#isPresent()} and is recognized by + * {@link RDFSyntax#byMediaType(String)}, then the corresponding + * {@link RDFSyntax} is set on {@link #getContentType()}, otherwise that is + * {@link Optional#empty()}. + * + * @return The Content-Type IANA media type, e.g. <code>text/turtle</code>, + * or {@link Optional#empty()} if it has not been set + */ + public final Optional<String> getContentType() { + return contentType; + } + + /** + * Get the target to consume parsed Quads. + * <p> + * From the call to {@link #parseSynchronusly()}, this will be a + * non-<code>null</code> value (as a target is a required setting). + * + * @return The target consumer of {@link Quad}s, or <code>null</code> if it + * has not yet been set. + * + */ + public Consumer<Quad> getTarget() { + return target; + } + + /** + * Get the target dataset as set by {@link #target(Dataset)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Dataset)} has been set, meaning that the implementation + * may choose to append parsed quads to the {@link Dataset} directly instead + * of relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetGraph()} MUST be + * {@link Optional#empty()}. + * + * @return The target Dataset, or {@link Optional#empty()} if another kind + * of target has been set. + */ + public Optional<Dataset> getTargetDataset() { + return targetDataset; + } + + /** + * Get the target graph as set by {@link #target(Graph)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Graph)} has been set, meaning that the implementation may + * choose to append parsed triples to the {@link Graph} directly instead of + * relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetDataset()} MUST be + * {@link Optional#empty()}. + * + * @return The target Graph, or {@link Optional#empty()} if another kind of + * target has been set. + */ + public Optional<Graph> getTargetGraph() { + return targetGraph; + } + + /** + * Get the set base {@link IRI}, if present. + * + * @return The base {@link IRI}, or {@link Optional#empty()} if it has not + * been set + */ + public Optional<IRI> getBase() { + return base; + } + + /** + * Get the set source {@link InputStream}. + * <p> + * If this is {@link Optional#isPresent()}, then {@link #getSourceFile()} + * and {@link #getSourceIri()} are {@link Optional#empty()}. + * + * @return The source {@link InputStream}, or {@link Optional#empty()} if it + * has not been set + */ + public Optional<InputStream> getSourceInputStream() { + return sourceInputStream; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceIri()} are + * {@link Optional#empty()}. + * + * @return The source {@link Path}, or {@link Optional#empty()} if it has + * not been set + */ + public Optional<Path> getSourceFile() { + return sourceFile; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceInputStream()} are + * {@link Optional#empty()}. + * + * @return The source {@link IRI}, or {@link Optional#empty()} if it has not + * been set + */ + public Optional<IRI> getSourceIri() { + return sourceIri; + } + + private Optional<RDF> rdfTermFactory = Optional.empty(); + private Optional<RDFSyntax> contentTypeSyntax = Optional.empty(); + private Optional<String> contentType = Optional.empty(); + private Optional<IRI> base = Optional.empty(); + private Optional<InputStream> sourceInputStream = Optional.empty(); + private Optional<Path> sourceFile = Optional.empty(); + private Optional<IRI> sourceIri = Optional.empty(); + private Consumer<Quad> target; + private Optional<Dataset> targetDataset; + private Optional<Graph> targetGraph; + + @SuppressWarnings("unchecked") + @Override + public T clone() { + try { + return (T) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + @SuppressWarnings("unchecked") + protected T asT() { + return (T) this; + } + + @Override + public T rdfTermFactory(RDF rdfTermFactory) { + AbstractRDFParser<T> c = clone(); + c.rdfTermFactory = Optional.ofNullable(rdfTermFactory); + return c.asT(); + } + + @Override + public T contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentTypeSyntax = Optional.ofNullable(rdfSyntax); + c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType); + return c.asT(); + } + + @Override + public T contentType(String contentType) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentType = Optional.ofNullable(contentType); + c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType); + return c.asT(); + } + + @Override + public T base(IRI base) { + AbstractRDFParser<T> c = clone(); + c.base = Optional.ofNullable(base); + c.base.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T base(String base) throws IllegalArgumentException { + return base(internalRdfTermFactory.createIRI(base)); + } + + @Override + public T source(InputStream inputStream) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceInputStream = Optional.ofNullable(inputStream); + return c.asT(); + } + + @Override + public T source(Path file) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceFile = Optional.ofNullable(file); + return c.asT(); + } + + @Override + public T source(IRI iri) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T source(String iri) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return source(internalRdfTermFactory.createIRI(iri)); + } + + /** + * Check if an iri is absolute. + * <p> + * Used by {@link #source(String)} and {@link #base(String)}. + * + * @param iri + * IRI to check + * @throws IllegalArgumentException + * If the IRI is not absolute + */ + protected void checkIsAbsolute(IRI iri) throws IllegalArgumentException { + if (!URI.create(iri.getIRIString()).isAbsolute()) { + throw new IllegalArgumentException("IRI is not absolute: " + iri); + } + } + + /** + * Check that one and only one source is present and valid. + * <p> + * Used by {@link #parse()}. + * <p> + * Subclasses might override this method, e.g. to support other source + * combinations, or to check if the sourceIri is resolvable. + * + * @throws IOException + * If a source file can't be read + */ + protected void checkSource() throws IOException { + if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) { + throw new IllegalStateException("No source has been set"); + } + if (sourceIri.isPresent() && sourceInputStream.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceInputStream have been set"); + } + if (sourceIri.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceFile have been set"); + } + if (sourceInputStream.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceInputStream and sourceFile have been set"); + } + if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) { + throw new IOException("Can't read file: " + sourceFile); + } + } + + /** + * Check if base is required. + * + * @throws IllegalStateException + * if base is required, but not set. + */ + protected void checkBaseRequired() throws IllegalStateException { + if (!base.isPresent() && sourceInputStream.isPresent() + && !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) { + throw new IllegalStateException("base iri required for inputstream source"); + } + } + + /** + * Reset all source* fields to Optional.empty() + * <p> + * Subclasses should override this and call <code>super.resetSource()</code> + * if they need to reset any additional source* fields. + * + */ + protected void resetSource() { + sourceInputStream = Optional.empty(); + sourceIri = Optional.empty(); + sourceFile = Optional.empty(); + } + + /** + * Reset all optional target* fields to {@link Optional#empty()}. + * <p> + * Note that the consumer set for {@link #getTarget()} is + * <strong>note</strong> reset. + * <p> + * Subclasses should override this and call <code>super.resetTarget()</code> + * if they need to reset any additional target* fields. + * + */ + protected void resetTarget() { + targetDataset = Optional.empty(); + targetGraph = Optional.empty(); + } + + /** + * Parse {@link #sourceInputStream}, {@link #sourceFile} or + * {@link #sourceIri}. + * <p> + * One of the source fields MUST be present, as checked by + * {@link #checkSource()}. + * <p> + * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is + * required. + * + * @throws IOException + * If the source could not be read + * @throws RDFParseException + * If the source could not be parsed (e.g. a .ttl file was not + * valid Turtle) + */ + protected abstract void parseSynchronusly() throws IOException, RDFParseException; + + /** + * Prepare a clone of this RDFParser which have been checked and completed. + * <p> + * The returned clone will always have {@link #getTarget()} and + * {@link #getRdfTermFactory()} present. + * <p> + * If the {@link #getSourceFile()} is present, but the {@link #getBase()} is + * not present, the base will be set to the <code>file:///</code> IRI for + * the Path's real path (e.g. resolving any symbolic links). + * + * @return A completed and checked clone of this RDFParser + * @throws IOException + * If the source was not accessible (e.g. a file was not found) + * @throws IllegalStateException + * If the parser was not in a compatible setting (e.g. + * contentType was an invalid string) + */ + protected T prepareForParsing() throws IOException, IllegalStateException { + checkSource(); + checkBaseRequired(); + checkContentType(); + checkTarget(); + + // We'll make a clone of our current state which will be passed to + // parseSynchronously() + AbstractRDFParser<T> c = clone(); + + // Use a fresh SimpleRDF for each parse + if (!c.rdfTermFactory.isPresent()) { + c.rdfTermFactory = Optional.of(createRDFTermFactory()); + } + // sourceFile, but no base? Let's follow any symlinks and use + // the file:/// URI + if (c.sourceFile.isPresent() && !c.base.isPresent()) { + URI baseUri = c.sourceFile.get().toRealPath().toUri(); + c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString())); + } + + return c.asT(); + } + + /** + * Subclasses can override this method to check the target is valid. + * <p> + * The default implementation throws an IllegalStateException if the target + * has not been set. + */ + protected void checkTarget() { + if (target == null) { + throw new IllegalStateException("target has not been set"); + } + if (targetGraph.isPresent() && targetDataset.isPresent()) { + // This should not happen as each target(..) method resets the + // optionals + throw new IllegalStateException("targetGraph and targetDataset can't both be set"); + } + } + + /** + * Subclasses can override this method to check compatibility with the + * contentType setting. + * + * @throws IllegalStateException + * if the {@link #getContentType()} or + * {@link #getContentTypeSyntax()} is not compatible or invalid + */ + protected void checkContentType() throws IllegalStateException { + } + + /** + * Guess RDFSyntax from a local file's extension. + * <p> + * This method can be used by subclasses if {@link #getContentType()} is not + * present and {@link #getSourceFile()} is set. + * + * @param path + * Path which extension should be checked + * @return The {@link RDFSyntax} which has a matching + * {@link RDFSyntax#fileExtension}, otherwise + * {@link Optional#empty()}. + */ + protected static Optional<RDFSyntax> guessRDFSyntax(Path path) { + return fileExtension(path).flatMap(RDFSyntax::byFileExtension); + } + + /** + * Return the file extension of a Path - if any. + * <p> + * The returned file extension includes the leading <code>.</code> + * <p> + * Note that this only returns the last extension, e.g. the file extension + * for <code>archive.tar.gz</code> would be <code>.gz</code> + * + * @param path + * Path which filename might contain an extension + * @return File extension (including the leading <code>.</code>, or + * {@link Optional#empty()} if the path has no extension + */ + private static Optional<String> fileExtension(Path path) { + Path fileName = path.getFileName(); + if (fileName == null) { + return Optional.empty(); + } + String filenameStr = fileName.toString(); + int last = filenameStr.lastIndexOf("."); + if (last > -1) { + return Optional.of(filenameStr.substring(last)); + } + return Optional.empty(); + } + + /** + * Create a new {@link RDF} for a parse session. + * <p> + * This is called by {@link #parse()} to set {@link #rdfTermFactory(RDF)} if + * it is {@link Optional#empty()}. + * <p> + * As parsed blank nodes might be made with + * {@link RDF#createBlankNode(String)}, each call to this method SHOULD + * return a new RDF instance. + * + * @return A new {@link RDF} + */ + protected RDF createRDFTermFactory() { + return new SimpleRDF(); + } + + @Override + public Future<ParseResult> parse() throws IOException, IllegalStateException { + final AbstractRDFParser<T> c = prepareForParsing(); + return threadpool.submit(() -> { + c.parseSynchronusly(); + return null; + }); + } + + @Override + public T target(Consumer<Quad> consumer) { + AbstractRDFParser<T> c = clone(); + c.resetTarget(); + c.target = consumer; + return c.asT(); + } + + @Override + public T target(Dataset dataset) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(dataset); + c.resetTarget(); + c.targetDataset = Optional.of(dataset); + return c.asT(); + } + + @Override + public T target(Graph graph) { + @SuppressWarnings({ "rawtypes", "unchecked" }) // super calls our + // .clone() + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(graph); + c.resetTarget(); + c.targetGraph = Optional.of(graph); + return c.asT(); + } }
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java index cb3ad82..aedab78 100644 --- a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java +++ b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java @@ -21,30 +21,30 @@ package org.apache.commons.rdf.simple.experimental; import org.apache.commons.rdf.experimental.RDFParser; public class RDFParseException extends Exception { - private static final long serialVersionUID = 5427752643780702976L; - private RDFParser builder; - - public RDFParseException(RDFParser builder) { - super(); - this.builder = builder; - } - - public RDFParseException(RDFParser builder, String message, Throwable cause) { - super(message, cause); - this.builder = builder; - } - - public RDFParseException(RDFParser builder, String message) { - super(message); - this.builder = builder; - } - - public RDFParseException(RDFParser builder, Throwable cause) { - super(cause); - this.builder = builder; - } - - public RDFParser getRDFParserBuilder() { - return builder; - } + private static final long serialVersionUID = 5427752643780702976L; + private RDFParser builder; + + public RDFParseException(RDFParser builder) { + super(); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, String message, Throwable cause) { + super(message, cause); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, String message) { + super(message); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, Throwable cause) { + super(cause); + this.builder = builder; + } + + public RDFParser getRDFParserBuilder() { + return builder; + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/main/java/org/apache/commons/rdf/simple/package-info.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/package-info.java b/simple/src/main/java/org/apache/commons/rdf/simple/package-info.java index f536594..782c7d5 100644 --- a/simple/src/main/java/org/apache/commons/rdf/simple/package-info.java +++ b/simple/src/main/java/org/apache/commons/rdf/simple/package-info.java @@ -19,17 +19,15 @@ * A simple in-memory implementation of the Commons RDF API. * <p> * This package contains a simple (if not naive) implementation of - * {@link org.apache.commons.rdf.api} - * using in-memory POJO objects. + * {@link org.apache.commons.rdf.api} using in-memory POJO objects. * <p> - * Note that although this module fully implements the commons-rdf API, - * it should <strong>not</strong> be considered a reference implementation. - * It is <strong>not thread-safe</strong> nor scalable, but may be useful for - * testing and simple usage (e.g. prototyping). + * Note that although this module fully implements the commons-rdf API, it + * should <strong>not</strong> be considered a reference implementation. It is + * <strong>not thread-safe</strong> nor scalable, but may be useful for testing + * and simple usage (e.g. prototyping). * <p> * To use this implementation, create an instance of - * {@link org.apache.commons.rdf.simple.SimpleRDF} - * and use methods like + * {@link org.apache.commons.rdf.simple.SimpleRDF} and use methods like * {@link org.apache.commons.rdf.simple.SimpleRDF#createGraph} and * {@link org.apache.commons.rdf.simple.SimpleRDF#createIRI(String)}. * <p> @@ -38,5 +36,3 @@ * */ package org.apache.commons.rdf.simple; - - http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java index af91978..afb2129 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java @@ -28,71 +28,66 @@ import org.apache.commons.rdf.experimental.RDFParser; import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; import org.apache.commons.rdf.simple.experimental.RDFParseException; -/** - * For test purposes - a {@link RDFParser} that inserts information - * about what it has been asked to parse instead of actually parsing anything. +/** + * For test purposes - a {@link RDFParser} that inserts information about what + * it has been asked to parse instead of actually parsing anything. * <p> * This always insert at least the triple equivalent to: + * * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/greeting> "Hello world" . * </pre> - * Additional triples match the corresponding getter in AbstractRDFParser, - * e.g.: + * + * Additional triples match the corresponding getter in AbstractRDFParser, e.g.: + * * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/base> <http://www.example.org/> . - * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/sourceFile> "/tmp/file.ttl" . - * </pre> + * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/sourceFile> "/tmp/file.ttl" . + * </pre> * * */ public class DummyRDFParserBuilder extends AbstractRDFParser<DummyRDFParserBuilder> { - - @Override - protected void parseSynchronusly() throws IOException, IllegalStateException, RDFParseException { - // From parseSynchronusly both of these are always present - RDF factory = getRdfTermFactory().get(); - Consumer<Quad> t = getTarget(); - - // well - each parsing is unique. This should hopefully - // catch any accidental double parsing - IRI parsing = factory.createIRI("urn:uuid:" + UUID.randomUUID()); - t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/greeting"), - factory.createLiteral("Hello world"))); - - // Now we'll expose the finalized AbstractRDFParser settings - // so they can be inspected by the junit test - if (getSourceIri().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/source"), - getSourceIri().get())); - } - if (getSourceFile().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/source"), - factory.createIRI(getSourceFile().get().toUri().toString()))); - } - if (getSourceInputStream().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/source"), - factory.createBlankNode())); - } + @Override + protected void parseSynchronusly() throws IOException, IllegalStateException, RDFParseException { + // From parseSynchronusly both of these are always present + RDF factory = getRdfTermFactory().get(); + Consumer<Quad> t = getTarget(); + + // well - each parsing is unique. This should hopefully + // catch any accidental double parsing + IRI parsing = factory.createIRI("urn:uuid:" + UUID.randomUUID()); + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/greeting"), + factory.createLiteral("Hello world"))); + + // Now we'll expose the finalized AbstractRDFParser settings + // so they can be inspected by the junit test + + if (getSourceIri().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/source"), + getSourceIri().get())); + } + if (getSourceFile().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/source"), + factory.createIRI(getSourceFile().get().toUri().toString()))); + } + if (getSourceInputStream().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/source"), + factory.createBlankNode())); + } - if (getBase().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/base"), - getBase().get())); - } - if (getContentType().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/contentType"), - factory.createLiteral(getContentType().get()))); - } - if (getContentTypeSyntax().isPresent()) { - t.accept(factory.createQuad(null, parsing, - factory.createIRI("http://example.com/contentTypeSyntax"), - factory.createLiteral(getContentTypeSyntax().get().name()))); - } - } + if (getBase().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/base"), getBase().get())); + } + if (getContentType().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/contentType"), + factory.createLiteral(getContentType().get()))); + } + if (getContentTypeSyntax().isPresent()) { + t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/contentTypeSyntax"), + factory.createLiteral(getContentTypeSyntax().get().name()))); + } + } } http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleGraphTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleGraphTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleGraphTest.java index f207424..bbbad55 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleGraphTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleGraphTest.java @@ -37,13 +37,10 @@ public class SimpleGraphTest extends AbstractGraphTest { @Test public void graphToString() { Assume.assumeNotNull(aliceName, companyName); - //System.out.println(graph); - assertTrue(graph - .toString() - .contains( - "<http://example.com/alice> <http://xmlns.com/foaf/0.1/name> \"Alice\" .")); - assertTrue(graph.toString().contains( - " <http://xmlns.com/foaf/0.1/name> \"A company\" .")); + // System.out.println(graph); + assertTrue( + graph.toString().contains("<http://example.com/alice> <http://xmlns.com/foaf/0.1/name> \"Alice\" .")); + assertTrue(graph.toString().contains(" <http://xmlns.com/foaf/0.1/name> \"A company\" .")); } http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleNoRelativeIRIRDTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleNoRelativeIRIRDTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleNoRelativeIRIRDTest.java index abec6c0..a1a59ed 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleNoRelativeIRIRDTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleNoRelativeIRIRDTest.java @@ -26,11 +26,10 @@ import java.net.URI; /** * Test simple IRI without relative IRI support. * <p> - * Ensures that {@link AbstractRDFTest#testCreateIRIRelative()} is - * correctly skipped (without causing an error). + * Ensures that {@link AbstractRDFTest#testCreateIRIRelative()} is correctly + * skipped (without causing an error). */ -public class SimpleNoRelativeIRIRDTest extends - AbstractRDFTest { +public class SimpleNoRelativeIRIRDTest extends AbstractRDFTest { @Override public RDF createFactory() { return new SimpleRDF() { http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleServiceLoaderTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleServiceLoaderTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleServiceLoaderTest.java index 8ec0649..4e37199 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/SimpleServiceLoaderTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/SimpleServiceLoaderTest.java @@ -31,10 +31,10 @@ public class SimpleServiceLoaderTest { public void testServiceLoaderLookup() { ServiceLoader<RDF> loader = ServiceLoader.load(RDF.class); for (RDF impl : loader) { - if (impl instanceof SimpleRDF) { - return; // yay - } + if (impl instanceof SimpleRDF) { + return; // yay + } } - fail("SimpleRDF not found in ServiceLoader"); + fail("SimpleRDF not found in ServiceLoader"); } } http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/TestWritingGraph.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/TestWritingGraph.java b/simple/src/test/java/org/apache/commons/rdf/simple/TestWritingGraph.java index ca4925f..3badaf7 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/TestWritingGraph.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/TestWritingGraph.java @@ -68,20 +68,13 @@ public class TestWritingGraph { Collections.shuffle(types); for (int i = 0; i < TRIPLES; i++) { if (i % 11 == 0) { - graph.add(subject, predicate, - factory.createBlankNode("Example " + i)); + graph.add(subject, predicate, factory.createBlankNode("Example " + i)); } else if (i % 5 == 0) { - graph.add(subject, predicate, - factory.createLiteral("Example " + i, "en")); + graph.add(subject, predicate, factory.createLiteral("Example " + i, "en")); } else if (i % 3 == 0) { - graph.add( - subject, - predicate, - factory.createLiteral("Example " + i, - types.get(i % types.size()))); + graph.add(subject, predicate, factory.createLiteral("Example " + i, types.get(i % types.size()))); } else { - graph.add(subject, predicate, - factory.createLiteral("Example " + i)); + graph.add(subject, predicate, factory.createLiteral("Example " + i)); } } } @@ -101,16 +94,14 @@ public class TestWritingGraph { public void countQuery() { IRI subject = factory.createIRI("subj"); IRI predicate = factory.createIRI("pred"); - long count = graph.stream(subject, predicate, null).unordered() - .parallel().count(); - //System.out.println("Counted - " + count); + long count = graph.stream(subject, predicate, null).unordered().parallel().count(); + // System.out.println("Counted - " + count); assertEquals(count, TRIPLES); } public static String tripleAsString(Triple t) { - return t.getSubject().ntriplesString() + " " - + t.getPredicate().ntriplesString() + " " + - t.getObject().ntriplesString() + " ."; + return t.getSubject().ntriplesString() + " " + t.getPredicate().ntriplesString() + " " + + t.getObject().ntriplesString() + " ."; } @Test @@ -137,8 +128,7 @@ public class TestWritingGraph { IRI subject = factory.createIRI("subj"); IRI predicate = factory.createIRI("pred"); - Stream<CharSequence> stream = graph - .stream(subject, predicate, null).map(TestWritingGraph::tripleAsString); + Stream<CharSequence> stream = graph.stream(subject, predicate, null).map(TestWritingGraph::tripleAsString); Files.write(graphFile, stream::iterator, StandardCharsets.UTF_8); } @@ -154,8 +144,7 @@ public class TestWritingGraph { IRI subject = factory.createIRI("nonexistent"); IRI predicate = factory.createIRI("pred"); - Stream<CharSequence> stream = graph - .stream(subject, predicate, null).map(Object::toString); + Stream<CharSequence> stream = graph.stream(subject, predicate, null).map(Object::toString); Files.write(graphFile, stream::iterator, StandardCharsets.UTF_8); } http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/TypesTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/TypesTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/TypesTest.java index 056091e..f199d9e 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/TypesTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/TypesTest.java @@ -29,13 +29,12 @@ import org.junit.Test; public class TypesTest { /** - * Test method for {@link org.apache.commons.rdf.simple.Types#getIRIString()} - * . + * Test method for + * {@link org.apache.commons.rdf.simple.Types#getIRIString()} . */ @Test public final void testGetIRIString() { - assertEquals("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", - Types.RDF_LANGSTRING.getIRIString()); + assertEquals("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", Types.RDF_LANGSTRING.getIRIString()); } /** @@ -44,8 +43,7 @@ public class TypesTest { */ @Test public final void testNtriplesString() { - assertEquals("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>", - Types.RDF_LANGSTRING.ntriplesString()); + assertEquals("<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>", Types.RDF_LANGSTRING.ntriplesString()); } /** @@ -55,17 +53,10 @@ public class TypesTest { */ @Test public final void testGet() { - assertTrue(Types.get( - new IRIImpl("http://www.w3.org/2001/XMLSchema#boolean")) - .isPresent()); - assertEquals( - "http://www.w3.org/2001/XMLSchema#boolean", - Types.get( - new IRIImpl("http://www.w3.org/2001/XMLSchema#boolean")) - .get().getIRIString()); - assertFalse(Types.get( - new IRIImpl("http://www.w3.org/2001/XMLSchema#nonExistent")) - .isPresent()); + assertTrue(Types.get(new IRIImpl("http://www.w3.org/2001/XMLSchema#boolean")).isPresent()); + assertEquals("http://www.w3.org/2001/XMLSchema#boolean", + Types.get(new IRIImpl("http://www.w3.org/2001/XMLSchema#boolean")).get().getIRIString()); + assertFalse(Types.get(new IRIImpl("http://www.w3.org/2001/XMLSchema#nonExistent")).isPresent()); } } http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/413dd09a/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java index fed76b3..12e58bd 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java @@ -48,209 +48,203 @@ import org.junit.rules.ExpectedException; public class AbstractRDFParserTest { - private RDF factory = new SimpleRDF(); - - private DummyRDFParserBuilder dummyParser = new DummyRDFParserBuilder(); - private Path testNt; - private Path testTtl; - private Path testXml; + private RDF factory = new SimpleRDF(); + + private DummyRDFParserBuilder dummyParser = new DummyRDFParserBuilder(); + private Path testNt; + private Path testTtl; + private Path testXml; + + @Before + public void createTempFile() throws IOException { + testNt = Files.createTempFile("test", ".nt"); + testTtl = Files.createTempFile("test", ".ttl"); + testXml = Files.createTempFile("test", ".xml"); + + // No need to populate the files as the dummy parser + // doesn't actually read anything + } + + @After + public void deleteTempFiles() throws IOException { + Files.deleteIfExists(testNt); + Files.deleteIfExists(testTtl); + Files.deleteIfExists(testXml); + } + + @Test + public void guessRDFSyntax() throws Exception { + assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParser.guessRDFSyntax(testNt).get()); + assertEquals(RDFSyntax.TURTLE, AbstractRDFParser.guessRDFSyntax(testTtl).get()); + assertFalse(AbstractRDFParser.guessRDFSyntax(testXml).isPresent()); + } + + private void checkGraph(Graph g) throws Exception { + assertTrue(g.size() > 0); + IRI greeting = factory.createIRI("http://example.com/greeting"); + // Should only have parsed once! + assertEquals(1, g.stream(null, greeting, null).count()); + Triple triple = g.stream(null, greeting, null).findAny().get(); + assertTrue(triple.getSubject() instanceof IRI); + IRI parsing = (IRI) triple.getSubject(); + assertTrue(parsing.getIRIString().startsWith("urn:uuid:")); + + assertEquals("http://example.com/greeting", triple.getPredicate().getIRIString()); + + assertTrue(triple.getObject() instanceof Literal); + Literal literal = (Literal) triple.getObject(); + assertEquals("Hello world", literal.getLexicalForm()); + assertFalse(literal.getLanguageTag().isPresent()); + assertEquals(Types.XSD_STRING, literal.getDatatype()); + + // Check uniqueness of properties that are always present + assertEquals(1, g.stream(null, factory.createIRI("http://example.com/source"), null).count()); + + // Check optional properties that are unique + assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/base"), null).count()); + assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/contentType"), null).count()); + assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/contentTypeSyntax"), null).count()); + } + + @Test + public void parseFile() throws Exception { + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(testNt).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + // FIXME: this could potentially break if the equivalent of /tmp + // includes + // international characters + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); + // Should be set to the file path + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); + + // Should NOT have guessed the content type + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + } + + @Test + public void parseNoSource() throws Exception { + thrown.expect(IllegalStateException.class); + dummyParser.parse(); + } + + @Test + public void parseBaseAndContentTypeNoSource() throws Exception { + // Can set the other options, even without source() + IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); + RDFParser parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); + thrown.expect(IllegalStateException.class); + thrown.expectMessage("No source has been set"); + // but .parse() should fail + parser.parse(); + } + + @Test + public void parseFileMissing() throws Exception { + Files.delete(testNt); + // This should not fail yet + RDFParser parser = dummyParser.source(testNt); + // but here: + thrown.expect(IOException.class); + parser.parse(); + } + + @Test + public void parseFileContentType() throws Exception { + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(testNt).contentType(RDFSyntax.NTRIPLES).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + // FIXME: this could potentially break if the equivalent of /tmp + // includes + // international characters + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); + assertEquals("\"NTRIPLES\"", firstPredicate(g, "contentTypeSyntax")); + assertEquals("\"application/n-triples\"", firstPredicate(g, "contentType")); + } + + private String firstPredicate(Graph g, String pred) { + return g.stream(null, factory.createIRI("http://example.com/" + pred), null).map(Triple::getObject) + .map(RDFTerm::ntriplesString).findAny().orElse(null); + } + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void parseInputStreamFailsIfBaseMissing() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + // Should not fail at this point + RDFParser parser = dummyParser.source(inputStream); + // but here: + thrown.expect(IllegalStateException.class); + thrown.expectMessage("base iri required for inputstream source"); + parser.parse(); + } + + @Test + public void parseInputStreamWithBase() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(inputStream).base(base).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.org/test.rdf>", firstPredicate(g, "base")); + // in our particular debug output, + // bnode source indicates InputStream + assertTrue(firstPredicate(g, "source").startsWith("_:")); + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + } + + @Test + public void parseInputStreamWithNQuads() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertNull(firstPredicate(g, "base")); + // in our particular debug output, + // bnode source indicates InputStream + assertTrue(firstPredicate(g, "source").startsWith("_:")); + assertEquals("\"application/n-quads\"", firstPredicate(g, "contentType")); + assertEquals("\"NQUADS\"", firstPredicate(g, "contentTypeSyntax")); + } + + @Test + public void parseIRI() throws Exception { + IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(iri).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); + // No base - assuming the above IRI is always + // the base would break server-supplied base from + // any HTTP Location redirects and Content-Location header + assertNull(firstPredicate(g, "base")); + // ".ttl" in IRI string does not imply any content type + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + + } + + @Test + public void parseIRIBaseContentType() throws Exception { + IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "base")); + assertEquals("\"TURTLE\"", firstPredicate(g, "contentTypeSyntax")); + assertEquals("\"text/turtle\"", firstPredicate(g, "contentType")); + } - @Before - public void createTempFile() throws IOException { - testNt = Files.createTempFile("test", ".nt"); - testTtl = Files.createTempFile("test", ".ttl"); - testXml = Files.createTempFile("test", ".xml"); - - // No need to populate the files as the dummy parser - // doesn't actually read anything - } - - @After - public void deleteTempFiles() throws IOException { - Files.deleteIfExists(testNt); - Files.deleteIfExists(testTtl); - Files.deleteIfExists(testXml); - } - - @Test - public void guessRDFSyntax() throws Exception { - assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParser.guessRDFSyntax(testNt).get()); - assertEquals(RDFSyntax.TURTLE, AbstractRDFParser.guessRDFSyntax(testTtl).get()); - assertFalse(AbstractRDFParser.guessRDFSyntax(testXml).isPresent()); - } - - private void checkGraph(Graph g) throws Exception { - assertTrue(g.size() > 0); - IRI greeting = factory.createIRI("http://example.com/greeting"); - // Should only have parsed once! - assertEquals(1, g.stream(null, greeting, null).count()); - Triple triple = g.stream(null, greeting, null).findAny().get(); - assertTrue(triple.getSubject() instanceof IRI); - IRI parsing = (IRI) triple.getSubject(); - assertTrue(parsing.getIRIString().startsWith("urn:uuid:")); - - assertEquals("http://example.com/greeting", triple.getPredicate().getIRIString()); - - assertTrue(triple.getObject() instanceof Literal); - Literal literal = (Literal) triple.getObject(); - assertEquals("Hello world", literal.getLexicalForm()); - assertFalse(literal.getLanguageTag().isPresent()); - assertEquals(Types.XSD_STRING, literal.getDatatype()); - - // Check uniqueness of properties that are always present - assertEquals(1, - g.stream(null, factory.createIRI("http://example.com/source"), null).count()); - - // Check optional properties that are unique - assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/base"), null).count()); - assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/contentType"), null).count()); - assertTrue(2 > g.stream(null, factory.createIRI("http://example.com/contentTypeSyntax"), null).count()); - } - - @Test - public void parseFile() throws Exception { - Graph g = factory.createGraph(); - RDFParser parser = dummyParser.source(testNt).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - // FIXME: this could potentially break if the equivalent of /tmp includes - // international characters - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); - // Should be set to the file path - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); - - // Should NOT have guessed the content type - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - } - - - @Test - public void parseNoSource() throws Exception { - thrown.expect(IllegalStateException.class); - dummyParser.parse(); - } - - @Test - public void parseBaseAndContentTypeNoSource() throws Exception { - // Can set the other options, even without source() - IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); - RDFParser parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); - thrown.expect(IllegalStateException.class); - thrown.expectMessage("No source has been set"); - // but .parse() should fail - parser.parse(); - } - - @Test - public void parseFileMissing() throws Exception { - Files.delete(testNt); - // This should not fail yet - RDFParser parser = dummyParser.source(testNt); - // but here: - thrown.expect(IOException.class); - parser.parse(); - } - - - @Test - public void parseFileContentType() throws Exception { - Graph g = factory.createGraph(); - RDFParser parser = dummyParser - .source(testNt) - .contentType(RDFSyntax.NTRIPLES) - .target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - // FIXME: this could potentially break if the equivalent of /tmp includes - // international characters - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); - assertEquals("\"NTRIPLES\"", firstPredicate(g, "contentTypeSyntax")); - assertEquals("\"application/n-triples\"", firstPredicate(g, "contentType")); - } - - private String firstPredicate(Graph g, String pred) { - return g.stream(null, factory.createIRI("http://example.com/" + pred), null) - .map(Triple::getObject).map(RDFTerm::ntriplesString).findAny().orElse(null); - } - - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void parseInputStreamFailsIfBaseMissing() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - // Should not fail at this point - RDFParser parser = dummyParser.source(inputStream); - // but here: - thrown.expect(IllegalStateException.class); - thrown.expectMessage("base iri required for inputstream source"); - parser.parse(); - } - - @Test - public void parseInputStreamWithBase() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); - Graph g = factory.createGraph(); - RDFParser parser = dummyParser.source(inputStream).base(base).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.org/test.rdf>", firstPredicate(g, "base")); - // in our particular debug output, - // bnode source indicates InputStream - assertTrue(firstPredicate(g, "source").startsWith("_:")); - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - } - - @Test - public void parseInputStreamWithNQuads() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - Graph g = factory.createGraph(); - RDFParser parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertNull(firstPredicate(g, "base")); - // in our particular debug output, - // bnode source indicates InputStream - assertTrue(firstPredicate(g, "source").startsWith("_:")); - assertEquals("\"application/n-quads\"", firstPredicate(g, "contentType")); - assertEquals("\"NQUADS\"", firstPredicate(g, "contentTypeSyntax")); - } - - @Test - public void parseIRI() throws Exception { - IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); - Graph g = factory.createGraph(); - RDFParser parser = dummyParser.source(iri).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); - // No base - assuming the above IRI is always - // the base would break server-supplied base from - // any HTTP Location redirects and Content-Location header - assertNull(firstPredicate(g, "base")); - // ".ttl" in IRI string does not imply any content type - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - - } - - @Test - public void parseIRIBaseContentType() throws Exception { - IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); - Graph g = factory.createGraph(); - RDFParser parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "base")); - assertEquals("\"TURTLE\"", firstPredicate(g, "contentTypeSyntax")); - assertEquals("\"text/turtle\"", firstPredicate(g, "contentType")); - } - - }