http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java new file mode 100644 index 0000000..8111f09 --- /dev/null +++ b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParser.java @@ -0,0 +1,542 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rdf.simple.experimental; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.function.Consumer; + +import org.apache.commons.rdf.api.Dataset; +import org.apache.commons.rdf.api.Graph; +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.Quad; +import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.api.RDFTermFactory; +import org.apache.commons.rdf.experimental.RDFParser; +import org.apache.commons.rdf.simple.SimpleRDFTermFactory; + +/** + * Abstract RDFParser + * <p> + * This abstract class keeps the properties in protected fields like + * {@link #sourceFile} using {@link Optional}. Some basic checking like + * {@link #checkIsAbsolute(IRI)} is performed. + * <p> + * This class and its subclasses are {@link Cloneable}, immutable and + * (therefore) thread-safe - each call to option methods like + * {@link #contentType(String)} or {@link #source(IRI)} will return a cloned, + * mutated copy. + * <p> + * By default, parsing is done by the abstract method + * {@link #parseSynchronusly()} - which is executed in a cloned snapshot - hence + * multiple {@link #parse()} calls are thread-safe. The default {@link #parse()} + * uses a thread pool in {@link #threadGroup} - but implementations can override + * {@link #parse()} (e.g. because it has its own threading model or use + * asynchronous remote execution). + */ +public abstract class AbstractRDFParser<T extends AbstractRDFParser<T>> + implements RDFParser, Cloneable { + + public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers"); + private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r)); + + // Basically only used for creating IRIs + private static RDFTermFactory internalRdfTermFactory = new SimpleRDFTermFactory(); + + /** + * Get the set {@link RDFTermFactory}, if any. + */ + public Optional<RDFTermFactory> getRdfTermFactory() { + return rdfTermFactory; + } + + /** + * Get the set content-type {@link RDFSyntax}, if any. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getContentType()} contains the + * value of {@link RDFSyntax#mediaType}. + */ + public Optional<RDFSyntax> getContentTypeSyntax() { + return contentTypeSyntax; + } + + /** + * Get the set content-type String, if any. + * <p> + * If this is {@link Optional#isPresent()} and + * is recognized by {@link RDFSyntax#byMediaType(String)}, then + * the corresponding {@link RDFSyntax} is set on + * {@link #getContentType()}, otherwise that is + * {@link Optional#empty()}. + */ + public final Optional<String> getContentType() { + return contentType; + } + + /** + * Get the target to consume parsed Quads. + * <p> + * From the call to {@link #parseSynchronusly()}, this + * method is always {@link Optional#isPresent()}. + * + */ + public Consumer<Quad> getTarget() { + return target; + } + + /** + * Get the target dataset as set by {@link #target(Dataset)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Dataset)} has been set, meaning that the implementation + * may choose to append parsed quads to the {@link Dataset} directly instead + * of relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetGraph()} MUST + * be {@link Optional#empty()}. + * + * @return The target Dataset, or {@link Optional#empty()} if another kind of target has been set. + */ + public Optional<Dataset> getTargetDataset() { + return targetDataset; + } + + /** + * Get the target graph as set by {@link #target(Graph)}. + * <p> + * The return value is {@link Optional#isPresent()} if and only if + * {@link #target(Graph)} has been set, meaning that the implementation + * may choose to append parsed triples to the {@link Graph} directly instead + * of relying on the generated {@link #getTarget()} consumer. + * <p> + * If this value is present, then {@link #getTargetDataset()} MUST + * be {@link Optional#empty()}. + * + * @return The target Graph, or {@link Optional#empty()} if another kind of target has been set. + */ + public Optional<Graph> getTargetGraph() { + return targetGraph; + } + + /** + * Get the set base {@link IRI}, if present. + * <p> + * + */ + public Optional<IRI> getBase() { + return base; + } + + /** + * Get the set source {@link InputStream}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceFile()} and {@link #getSourceIri()} + * are {@link Optional#empty()}. + */ + public Optional<InputStream> getSourceInputStream() { + return sourceInputStream; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceIri()} + * are {@link Optional#empty()}. + */ + public Optional<Path> getSourceFile() { + return sourceFile; + } + + /** + * Get the set source {@link Path}. + * <p> + * If this is {@link Optional#isPresent()}, then + * {@link #getSourceInputStream()} and {@link #getSourceInputStream()()} + * are {@link Optional#empty()}. + */ + public Optional<IRI> getSourceIri() { + return sourceIri; + } + + + private Optional<RDFTermFactory> rdfTermFactory = Optional.empty(); + private Optional<RDFSyntax> contentTypeSyntax = Optional.empty(); + private Optional<String> contentType = Optional.empty(); + private Optional<IRI> base = Optional.empty(); + private Optional<InputStream> sourceInputStream = Optional.empty(); + private Optional<Path> sourceFile = Optional.empty(); + private Optional<IRI> sourceIri = Optional.empty(); + private Consumer<Quad> target; + private Optional<Dataset> targetDataset; + private Optional<Graph> targetGraph; + + @SuppressWarnings("unchecked") + @Override + public T clone() { + try { + return (T) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + @SuppressWarnings("unchecked") + protected T asT() { + return (T) this; + } + + @Override + public T rdfTermFactory(RDFTermFactory rdfTermFactory) { + AbstractRDFParser<T> c = clone(); + c.rdfTermFactory = Optional.ofNullable(rdfTermFactory); + return c.asT(); + } + + @Override + public T contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentTypeSyntax = Optional.ofNullable(rdfSyntax); + c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType); + return c.asT(); + } + + @Override + public T contentType(String contentType) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.contentType = Optional.ofNullable(contentType); + c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType); + return c.asT(); + } + + @Override + public T base(IRI base) { + AbstractRDFParser<T> c = clone(); + c.base = Optional.ofNullable(base); + c.base.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T base(String base) throws IllegalArgumentException { + return base(internalRdfTermFactory.createIRI(base)); + } + + @Override + public T source(InputStream inputStream) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceInputStream = Optional.ofNullable(inputStream); + return c.asT(); + } + + @Override + public T source(Path file) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceFile = Optional.ofNullable(file); + return c.asT(); + } + + @Override + public T source(IRI iri) { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return c.asT(); + } + + @Override + public T source(String iri) throws IllegalArgumentException { + AbstractRDFParser<T> c = clone(); + c.resetSource(); + c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI); + c.sourceIri.ifPresent(i -> checkIsAbsolute(i)); + return source(internalRdfTermFactory.createIRI(iri)); + } + + /** + * Check if an iri is absolute. + * <p> + * Used by {@link #source(String)} and {@link #base(String)} + * + * @param iri + */ + protected void checkIsAbsolute(IRI iri) { + if (!URI.create(iri.getIRIString()).isAbsolute()) { + throw new IllegalArgumentException("IRI is not absolute: " + iri); + } + } + + /** + * Check that one and only one source is present and valid. + * <p> + * Used by {@link #parse()}. + * <p> + * Subclasses might override this method, e.g. to support other + * source combinations, or to check if the sourceIri is + * resolvable. + * + * @throws IOException If a source file can't be read + */ + protected void checkSource() throws IOException { + if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) { + throw new IllegalStateException("No source has been set"); + } + if (sourceIri.isPresent() && sourceInputStream.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceInputStream have been set"); + } + if (sourceIri.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceIri and sourceFile have been set"); + } + if (sourceInputStream.isPresent() && sourceFile.isPresent()) { + throw new IllegalStateException("Both sourceInputStream and sourceFile have been set"); + } + if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) { + throw new IOException("Can't read file: " + sourceFile); + } + } + + /** + * Check if base is required. + * + * @throws IllegalStateException if base is required, but not set. + */ + protected void checkBaseRequired() { + if (!base.isPresent() && sourceInputStream.isPresent() + && !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) { + throw new IllegalStateException("base iri required for inputstream source"); + } + } + + /** + * Reset all source* fields to Optional.empty() + * <p> + * Subclasses should override this and call <code>super.resetSource()</code> + * if they need to reset any additional source* fields. + * + */ + protected void resetSource() { + sourceInputStream = Optional.empty(); + sourceIri = Optional.empty(); + sourceFile = Optional.empty(); + } + + + /** + * Reset all optional target* fields to Optional.empty()</code> + * <p> + * Note that the consumer set for {@link #getTarget()} is + * NOT reset. + * <p> + * Subclasses should override this and call <code>super.resetTarget()</code> + * if they need to reset any additional target* fields. + * + */ + protected void resetTarget() { + targetDataset = Optional.empty(); + targetGraph = Optional.empty(); + } + + /** + * Parse {@link #sourceInputStream}, {@link #sourceFile} or + * {@link #sourceIri}. + * <p> + * One of the source fields MUST be present, as checked by {@link #checkSource()}. + * <p> + * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is required. + * + * @throws IOException If the source could not be read + * @throws RDFParseException If the source could not be parsed (e.g. a .ttl file was not valid Turtle) + */ + protected abstract void parseSynchronusly() throws IOException, RDFParseException; + + /** + * Prepare a clone of this RDFParser which have been checked and + * completed. + * <p> + * The returned clone will always have + * {@link #getTarget()} and {@link #getRdfTermFactory()} present. + * <p> + * If the {@link #getSourceFile()} is present, but the + * {@link #getBase()} is not present, the base will be set to the + * <code>file:///</code> IRI for the Path's real path (e.g. resolving any + * symbolic links). + * + * @return A completed and checked clone of this RDFParser + * @throws IOException If the source was not accessible (e.g. a file was not found) + * @throws IllegalStateException If the parser was not in a compatible setting (e.g. contentType was an invalid string) + */ + protected T prepareForParsing() throws IOException, IllegalStateException { + checkSource(); + checkBaseRequired(); + checkContentType(); + checkTarget(); + + // We'll make a clone of our current state which will be passed to + // parseSynchronously() + AbstractRDFParser<T> c = clone(); + + // Use a fresh SimpleRDFTermFactory for each parse + if (!c.rdfTermFactory.isPresent()) { + c.rdfTermFactory = Optional.of(createRDFTermFactory()); + } + // sourceFile, but no base? Let's follow any symlinks and use + // the file:/// URI + if (c.sourceFile.isPresent() && !c.base.isPresent()) { + URI baseUri = c.sourceFile.get().toRealPath().toUri(); + c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString())); + } + + return c.asT(); + } + + /** + * Subclasses can override this method to check the target is + * valid. + * <p> + * The default implementation throws an IllegalStateException if the + * target has not been set. + */ + protected void checkTarget() { + if (target == null) { + throw new IllegalStateException("target has not been set"); + } + if (targetGraph.isPresent() && targetDataset.isPresent()) { + // This should not happen as each target(..) method resets the optionals + throw new IllegalStateException("targetGraph and targetDataset can't both be set"); + } + } + + /** + * Subclasses can override this method to check compatibility with the + * contentType setting. + * + * @throws IllegalStateException + * if the {@link #getContentType()} or + * {@link #getContentTypeSyntax()} is not compatible or invalid + */ + protected void checkContentType() throws IllegalStateException { + } + + /** + * Guess RDFSyntax from a local file's extension. + * <p> + * This method can be used by subclasses if {@link #getContentType()} is not + * present and {@link #getSourceFile()} is set. + * + * @param path Path which extension should be checked + * @return The {@link RDFSyntax} which has a matching {@link RDFSyntax#fileExtension}, + * otherwise {@link Optional#empty()}. + */ + protected static Optional<RDFSyntax> guessRDFSyntax(Path path) { + return fileExtension(path).flatMap(RDFSyntax::byFileExtension); + } + + /** + * Return the file extension of a Path - if any. + * <p> + * The returned file extension includes the leading <code>.</code> + * <p> + * Note that this only returns the last extension, e.g. the + * file extension for <code>archive.tar.gz</code> would be <code>.gz</code> + * + * @param path Path which filename might contain an extension + * @return File extension (including the leading <code>.</code>, + * or {@link Optional#empty()} if the path has no extension + */ + private static Optional<String> fileExtension(Path path) { + Path fileName = path.getFileName(); + if (fileName == null) { + return Optional.empty(); + } + String filenameStr = fileName.toString(); + int last = filenameStr.lastIndexOf("."); + if (last > -1) { + return Optional.of(filenameStr.substring(last)); + } + return Optional.empty(); + } + + + /** + * Create a new {@link RDFTermFactory} for a parse session. + * <p> + * This is called by {@link #parse()} to set + * {@link #rdfTermFactory(RDFTermFactory)} if it is + * {@link Optional#empty()}. + * <p> + * As parsed blank nodes might be made with + * {@link RDFTermFactory#createBlankNode(String)}, + * each call to this method SHOULD return + * a new RDFTermFactory instance. + * + * @return A new {@link RDFTermFactory} + */ + protected RDFTermFactory createRDFTermFactory() { + return new SimpleRDFTermFactory(); + } + + @Override + public Future<ParseResult> parse() throws IOException, IllegalStateException { + final AbstractRDFParser<T> c = prepareForParsing(); + return threadpool.submit(() -> { + c.parseSynchronusly(); + return null; + }); + } + + @Override + public T target(Consumer<Quad> consumer) { + AbstractRDFParser<T> c = clone(); + c.resetTarget(); + c.target = consumer; + return c.asT(); + } + + @Override + public T target(Dataset dataset) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(dataset); + c.resetTarget(); + c.targetDataset = Optional.of(dataset); + return c.asT(); + } + + @Override + public T target(Graph graph) { + @SuppressWarnings({ "rawtypes", "unchecked" }) // super calls our .clone() + AbstractRDFParser<T> c = (AbstractRDFParser) RDFParser.super.target(graph); + c.resetTarget(); + c.targetGraph = Optional.of(graph); + return c.asT(); + } + + + +}
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java new file mode 100644 index 0000000..cb3ad82 --- /dev/null +++ b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/RDFParseException.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rdf.simple.experimental; + +import org.apache.commons.rdf.experimental.RDFParser; + +public class RDFParseException extends Exception { + private static final long serialVersionUID = 5427752643780702976L; + private RDFParser builder; + + public RDFParseException(RDFParser builder) { + super(); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, String message, Throwable cause) { + super(message, cause); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, String message) { + super(message); + this.builder = builder; + } + + public RDFParseException(RDFParser builder, Throwable cause) { + super(cause); + this.builder = builder; + } + + public RDFParser getRDFParserBuilder() { + return builder; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/main/java/org/apache/commons/rdf/simple/experimental/package-info.java ---------------------------------------------------------------------- diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/experimental/package-info.java b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/package-info.java new file mode 100644 index 0000000..5196f42 --- /dev/null +++ b/simple/src/main/java/org/apache/commons/rdf/simple/experimental/package-info.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Experimental Commons RDF Simple implementations. + * <p> + * Classes in this package should be considered <strong>at + * risk</strong>; they might change or be removed in the next minor update of + * Commons RDF. + * <p> + * When a class has stabilized, it will move to the + * {@link org.apache.commons.rdf.simple} package. + * <p> + * <ul> + * <li>{@link AbstractRDFParser} - an abstract helper class + * for implementations of + * {@link org.apache.commons.rdf.api.experimental.RDFParser}.</li> + * </ul> + */ +package org.apache.commons.rdf.simple.experimental; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java deleted file mode 100644 index 439bacb..0000000 --- a/simple/src/test/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilderTest.java +++ /dev/null @@ -1,253 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rdf.simple; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.rdf.api.Graph; -import org.apache.commons.rdf.api.IRI; -import org.apache.commons.rdf.api.Literal; -import org.apache.commons.rdf.api.RDFParserBuilder; -import org.apache.commons.rdf.api.RDFSyntax; -import org.apache.commons.rdf.api.RDFTerm; -import org.apache.commons.rdf.api.RDFTermFactory; -import org.apache.commons.rdf.api.Triple; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -public class AbstractRDFParserBuilderTest { - - private RDFTermFactory factory = new SimpleRDFTermFactory(); - - private DummyRDFParserBuilder dummyParser = new DummyRDFParserBuilder(); - private Path testNt; - private Path testTtl; - private Path testXml; - - @Before - public void createTempFile() throws IOException { - testNt = Files.createTempFile("test", ".nt"); - testTtl = Files.createTempFile("test", ".ttl"); - testXml = Files.createTempFile("test", ".xml"); - - // No need to populate the files as the dummy parser - // doesn't actually read anything - } - - @After - public void deleteTempFiles() throws IOException { - Files.deleteIfExists(testNt); - Files.deleteIfExists(testTtl); - Files.deleteIfExists(testXml); - } - - @Test - public void guessRDFSyntax() throws Exception { - assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParserBuilder.guessRDFSyntax(testNt).get()); - assertEquals(RDFSyntax.TURTLE, AbstractRDFParserBuilder.guessRDFSyntax(testTtl).get()); - assertFalse(AbstractRDFParserBuilder.guessRDFSyntax(testXml).isPresent()); - } - - private void checkGraph(Graph g) throws Exception { - assertTrue(g.size() > 0); - IRI greeting = factory.createIRI("http://example.com/greeting"); - // Should only have parsed once! - assertEquals(1, g.getTriples(null, greeting, null).count()); - Triple triple = g.getTriples(null, greeting, null).findAny().get(); - assertTrue(triple.getSubject() instanceof IRI); - IRI parsing = (IRI) triple.getSubject(); - assertTrue(parsing.getIRIString().startsWith("urn:uuid:")); - - assertEquals("http://example.com/greeting", triple.getPredicate().getIRIString()); - - assertTrue(triple.getObject() instanceof Literal); - Literal literal = (Literal) triple.getObject(); - assertEquals("Hello world", literal.getLexicalForm()); - assertFalse(literal.getLanguageTag().isPresent()); - assertEquals(Types.XSD_STRING, literal.getDatatype()); - - // Check uniqueness of properties that are always present - assertEquals(1, - g.getTriples(null, factory.createIRI("http://example.com/source"), null).count()); - - // Check optional properties that are unique - assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/base"), null).count()); - assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/contentType"), null).count()); - assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/contentTypeSyntax"), null).count()); - } - - @Test - public void parseFile() throws Exception { - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(testNt).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - // FIXME: this could potentially break if the equivalent of /tmp includes - // international characters - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); - // Should be set to the file path - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); - - // Should NOT have guessed the content type - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - } - - - @Test - public void parseNoSource() throws Exception { - thrown.expect(IllegalStateException.class); - dummyParser.parse(); - } - - @Test - public void parseBaseAndContentTypeNoSource() throws Exception { - // Can set the other options, even without source() - IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); - RDFParserBuilder parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); - thrown.expect(IllegalStateException.class); - thrown.expectMessage("No source has been set"); - // but .parse() should fail - parser.parse(); - } - - @Test - public void parseFileMissing() throws Exception { - Files.delete(testNt); - // This should not fail yet - RDFParserBuilder parser = dummyParser.source(testNt); - // but here: - thrown.expect(IOException.class); - parser.parse(); - } - - - @Test - public void parseFileContentType() throws Exception { - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser - .source(testNt) - .contentType(RDFSyntax.NTRIPLES) - .target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - // FIXME: this could potentially break if the equivalent of /tmp includes - // international characters - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); - assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); - assertEquals("\"NTRIPLES\"", firstPredicate(g, "contentTypeSyntax")); - assertEquals("\"application/n-triples\"", firstPredicate(g, "contentType")); - } - - private String firstPredicate(Graph g, String pred) { - return g.getTriples(null, factory.createIRI("http://example.com/" + pred), null) - .map(Triple::getObject).map(RDFTerm::ntriplesString).findAny().orElse(null); - } - - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void parseInputStreamFailsIfBaseMissing() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - // Should not fail at this point - RDFParserBuilder parser = dummyParser.source(inputStream); - // but here: - thrown.expect(IllegalStateException.class); - thrown.expectMessage("base iri required for inputstream source"); - parser.parse(); - } - - @Test - public void parseInputStreamWithBase() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(inputStream).base(base).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.org/test.rdf>", firstPredicate(g, "base")); - // in our particular debug output, - // bnode source indicates InputStream - assertTrue(firstPredicate(g, "source").startsWith("_:")); - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - } - - @Test - public void parseInputStreamWithNQuads() throws Exception { - InputStream inputStream = new ByteArrayInputStream(new byte[0]); - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertNull(firstPredicate(g, "base")); - // in our particular debug output, - // bnode source indicates InputStream - assertTrue(firstPredicate(g, "source").startsWith("_:")); - assertEquals("\"application/n-quads\"", firstPredicate(g, "contentType")); - assertEquals("\"NQUADS\"", firstPredicate(g, "contentTypeSyntax")); - } - - @Test - public void parseIRI() throws Exception { - IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(iri).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); - // No base - assuming the above IRI is always - // the base would break server-supplied base from - // any HTTP Location redirects and Content-Location header - assertNull(firstPredicate(g, "base")); - // ".ttl" in IRI string does not imply any content type - assertNull(firstPredicate(g, "contentType")); - assertNull(firstPredicate(g, "contentTypeSyntax")); - - } - - @Test - public void parseIRIBaseContentType() throws Exception { - IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); - Graph g = factory.createGraph(); - RDFParserBuilder parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); - parser.parse().get(5, TimeUnit.SECONDS); - checkGraph(g); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); - assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "base")); - assertEquals("\"TURTLE\"", firstPredicate(g, "contentTypeSyntax")); - assertEquals("\"text/turtle\"", firstPredicate(g, "contentType")); - } - - -} http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java index baf9768..683ef7b 100644 --- a/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java +++ b/simple/src/test/java/org/apache/commons/rdf/simple/DummyRDFParserBuilder.java @@ -23,18 +23,20 @@ import java.util.function.Consumer; import org.apache.commons.rdf.api.IRI; import org.apache.commons.rdf.api.Quad; -import org.apache.commons.rdf.api.RDFParserBuilder; import org.apache.commons.rdf.api.RDFTermFactory; +import org.apache.commons.rdf.experimental.RDFParser; +import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; +import org.apache.commons.rdf.simple.experimental.RDFParseException; /** - * For test purposes - a {@link RDFParserBuilder} that inserts information + * For test purposes - a {@link RDFParser} that inserts information * about what it has been asked to parse instead of actually parsing anything. * <p> * This always insert at least the triple equivalent to: * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/greeting> "Hello world" . * </pre> - * Additional triples match the corresponding getter in AbstractRDFParserBuilder, + * Additional triples match the corresponding getter in AbstractRDFParser, * e.g.: * <pre> * <urn:uuid:b7ac3fcc-4d86-4d28-8358-a1cd094974a6> <http://example.com/base> <http://www.example.org/> . @@ -43,7 +45,7 @@ import org.apache.commons.rdf.api.RDFTermFactory; * * */ -public class DummyRDFParserBuilder extends AbstractRDFParserBuilder<DummyRDFParserBuilder> { +public class DummyRDFParserBuilder extends AbstractRDFParser<DummyRDFParserBuilder> { @Override protected void parseSynchronusly() throws IOException, IllegalStateException, RDFParseException { @@ -57,7 +59,7 @@ public class DummyRDFParserBuilder extends AbstractRDFParserBuilder<DummyRDFPars t.accept(factory.createQuad(null, parsing, factory.createIRI("http://example.com/greeting"), factory.createLiteral("Hello world"))); - // Now we'll expose the finalized AbstractRDFParserBuilder settings + // Now we'll expose the finalized AbstractRDFParser settings // so they can be inspected by the junit test if (getSourceIri().isPresent()) { http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/10d27cde/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java ---------------------------------------------------------------------- diff --git a/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java b/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java new file mode 100644 index 0000000..f263029 --- /dev/null +++ b/simple/src/test/java/org/apache/commons/rdf/simple/experimental/AbstractRDFParserTest.java @@ -0,0 +1,256 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rdf.simple.experimental; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.rdf.api.Graph; +import org.apache.commons.rdf.api.IRI; +import org.apache.commons.rdf.api.Literal; +import org.apache.commons.rdf.api.RDFSyntax; +import org.apache.commons.rdf.api.RDFTerm; +import org.apache.commons.rdf.api.RDFTermFactory; +import org.apache.commons.rdf.api.Triple; +import org.apache.commons.rdf.experimental.RDFParser; +import org.apache.commons.rdf.simple.DummyRDFParserBuilder; +import org.apache.commons.rdf.simple.SimpleRDFTermFactory; +import org.apache.commons.rdf.simple.Types; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class AbstractRDFParserTest { + + private RDFTermFactory factory = new SimpleRDFTermFactory(); + + private DummyRDFParserBuilder dummyParser = new DummyRDFParserBuilder(); + private Path testNt; + private Path testTtl; + private Path testXml; + + @Before + public void createTempFile() throws IOException { + testNt = Files.createTempFile("test", ".nt"); + testTtl = Files.createTempFile("test", ".ttl"); + testXml = Files.createTempFile("test", ".xml"); + + // No need to populate the files as the dummy parser + // doesn't actually read anything + } + + @After + public void deleteTempFiles() throws IOException { + Files.deleteIfExists(testNt); + Files.deleteIfExists(testTtl); + Files.deleteIfExists(testXml); + } + + @Test + public void guessRDFSyntax() throws Exception { + assertEquals(RDFSyntax.NTRIPLES, AbstractRDFParser.guessRDFSyntax(testNt).get()); + assertEquals(RDFSyntax.TURTLE, AbstractRDFParser.guessRDFSyntax(testTtl).get()); + assertFalse(AbstractRDFParser.guessRDFSyntax(testXml).isPresent()); + } + + private void checkGraph(Graph g) throws Exception { + assertTrue(g.size() > 0); + IRI greeting = factory.createIRI("http://example.com/greeting"); + // Should only have parsed once! + assertEquals(1, g.getTriples(null, greeting, null).count()); + Triple triple = g.getTriples(null, greeting, null).findAny().get(); + assertTrue(triple.getSubject() instanceof IRI); + IRI parsing = (IRI) triple.getSubject(); + assertTrue(parsing.getIRIString().startsWith("urn:uuid:")); + + assertEquals("http://example.com/greeting", triple.getPredicate().getIRIString()); + + assertTrue(triple.getObject() instanceof Literal); + Literal literal = (Literal) triple.getObject(); + assertEquals("Hello world", literal.getLexicalForm()); + assertFalse(literal.getLanguageTag().isPresent()); + assertEquals(Types.XSD_STRING, literal.getDatatype()); + + // Check uniqueness of properties that are always present + assertEquals(1, + g.getTriples(null, factory.createIRI("http://example.com/source"), null).count()); + + // Check optional properties that are unique + assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/base"), null).count()); + assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/contentType"), null).count()); + assertTrue(2 > g.getTriples(null, factory.createIRI("http://example.com/contentTypeSyntax"), null).count()); + } + + @Test + public void parseFile() throws Exception { + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(testNt).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + // FIXME: this could potentially break if the equivalent of /tmp includes + // international characters + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); + // Should be set to the file path + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); + + // Should NOT have guessed the content type + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + } + + + @Test + public void parseNoSource() throws Exception { + thrown.expect(IllegalStateException.class); + dummyParser.parse(); + } + + @Test + public void parseBaseAndContentTypeNoSource() throws Exception { + // Can set the other options, even without source() + IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); + RDFParser parser = dummyParser.base(base).contentType(RDFSyntax.RDFXML); + thrown.expect(IllegalStateException.class); + thrown.expectMessage("No source has been set"); + // but .parse() should fail + parser.parse(); + } + + @Test + public void parseFileMissing() throws Exception { + Files.delete(testNt); + // This should not fail yet + RDFParser parser = dummyParser.source(testNt); + // but here: + thrown.expect(IOException.class); + parser.parse(); + } + + + @Test + public void parseFileContentType() throws Exception { + Graph g = factory.createGraph(); + RDFParser parser = dummyParser + .source(testNt) + .contentType(RDFSyntax.NTRIPLES) + .target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + // FIXME: this could potentially break if the equivalent of /tmp includes + // international characters + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "source")); + assertEquals("<" + testNt.toUri().toString() + ">", firstPredicate(g, "base")); + assertEquals("\"NTRIPLES\"", firstPredicate(g, "contentTypeSyntax")); + assertEquals("\"application/n-triples\"", firstPredicate(g, "contentType")); + } + + private String firstPredicate(Graph g, String pred) { + return g.getTriples(null, factory.createIRI("http://example.com/" + pred), null) + .map(Triple::getObject).map(RDFTerm::ntriplesString).findAny().orElse(null); + } + + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void parseInputStreamFailsIfBaseMissing() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + // Should not fail at this point + RDFParser parser = dummyParser.source(inputStream); + // but here: + thrown.expect(IllegalStateException.class); + thrown.expectMessage("base iri required for inputstream source"); + parser.parse(); + } + + @Test + public void parseInputStreamWithBase() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + IRI base = dummyParser.createRDFTermFactory().createIRI("http://www.example.org/test.rdf"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(inputStream).base(base).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.org/test.rdf>", firstPredicate(g, "base")); + // in our particular debug output, + // bnode source indicates InputStream + assertTrue(firstPredicate(g, "source").startsWith("_:")); + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + } + + @Test + public void parseInputStreamWithNQuads() throws Exception { + InputStream inputStream = new ByteArrayInputStream(new byte[0]); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(inputStream).contentType(RDFSyntax.NQUADS).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertNull(firstPredicate(g, "base")); + // in our particular debug output, + // bnode source indicates InputStream + assertTrue(firstPredicate(g, "source").startsWith("_:")); + assertEquals("\"application/n-quads\"", firstPredicate(g, "contentType")); + assertEquals("\"NQUADS\"", firstPredicate(g, "contentTypeSyntax")); + } + + @Test + public void parseIRI() throws Exception { + IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(iri).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); + // No base - assuming the above IRI is always + // the base would break server-supplied base from + // any HTTP Location redirects and Content-Location header + assertNull(firstPredicate(g, "base")); + // ".ttl" in IRI string does not imply any content type + assertNull(firstPredicate(g, "contentType")); + assertNull(firstPredicate(g, "contentTypeSyntax")); + + } + + @Test + public void parseIRIBaseContentType() throws Exception { + IRI iri = dummyParser.createRDFTermFactory().createIRI("http://www.example.net/test.ttl"); + Graph g = factory.createGraph(); + RDFParser parser = dummyParser.source(iri).base(iri).contentType(RDFSyntax.TURTLE).target(g); + parser.parse().get(5, TimeUnit.SECONDS); + checkGraph(g); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "source")); + assertEquals("<http://www.example.net/test.ttl>", firstPredicate(g, "base")); + assertEquals("\"TURTLE\"", firstPredicate(g, "contentTypeSyntax")); + assertEquals("\"text/turtle\"", firstPredicate(g, "contentType")); + } + + +}