[ https://issues.apache.org/jira/browse/JENA-1306?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15899083#comment-15899083 ]
ASF GitHub Bot commented on JENA-1306: -------------------------------------- Github user rvesse commented on a diff in the pull request: https://github.com/apache/jena/pull/222#discussion_r104627436 --- Diff: jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java --- @@ -0,0 +1,431 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.riot; + +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.http.Header; +import org.apache.http.client.HttpClient; +import org.apache.http.impl.client.cache.CachingHttpClientBuilder; +import org.apache.http.message.BasicHeader; +import org.apache.jena.atlas.lib.IRILib; +import org.apache.jena.graph.BlankNodeId; +import org.apache.jena.graph.Graph; +import org.apache.jena.riot.lang.LabelToNode; +import org.apache.jena.riot.system.*; +import org.apache.jena.riot.web.HttpNames; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.util.Context; + +/** + * An {@link RDFParser} is a process that will generate triples; + * {@link RDFParserBuilder} provides the means to setup the parser. + * <p> + * An {@link RDFParser} has a predefined source; the target for output is given when the "parse" method is called. + * It can be used multiple times in which case the same source is reread. The destination can vary. + * The application is responsible for concurrency of the destination of the parse operation. + * + * The process is + * <pre> + * StreamRDF destination = ... + * RDFParser parser = RDFParser.create() + * .source("filename.ttl") + * .build(); + * parser.parse(destination); + * </pre> + * or using a short cut: + * <pre> + * RDFParser parser = RDFParser.create() + * .source("filename.ttl") + * .parse(destination); + * </pre> + */ +public class RDFParserBuilder { + // Source + private String uri = null; + private Path path = null; + private InputStream inputStream; + // StringReader - charset problems with any other kind. + private Reader javaReader = null; + + // HTTP + private Map<String, String> httpHeaders = new HashMap<>(); + private HttpClient httpClient = null; + + // Syntax + private Lang hintLang = null; + private Lang forceLang = null; + + private String baseUri = null; + + // ---- Unused but left in case required in the future. + private boolean strict = SysRIOT.isStrictMode(); + private boolean resolveURIs = true; + private IRIResolver resolver = null; + // ---- + + // Construction for the StreamRDF + private FactoryRDF factory = null; + private LabelToNode labelToNode = null; + + // Bad news. + private ErrorHandler errorHandler = null; + + // Parsing process + private Context context = null; + + public static RDFParserBuilder create() { return new RDFParserBuilder() ; } + private RDFParserBuilder() {} + + /** + * Set the source to {@link Path}. + * This clears any other source setting. + * @param path + * @return this + */ + public RDFParserBuilder source(Path path) { + clearSource(); + this.path = path; + return this; + } + + /** + * Set the source to a URI; this includes OS file names. + * File URL shoudl be of the form {@code file:///...}. + * This clears any other source setting. + * @param uri + * @return this + */ + public RDFParserBuilder source(String uri) { + clearSource(); + this.uri = uri; + return this; + } + + /** + * Set the source to {@link InputStream}. + * This clears any other source setting. + * The {@link InputStream} will be closed when the + * parser is called and the parser can not be reused. + * @param input + * @return this + */ + public RDFParserBuilder source(InputStream input) { + clearSource(); + this.inputStream = input; + return this; + } + + /** + * Set the source to {@link StringReader}. + * This clears any other source setting. + * The {@link StringReader} will be closed when the + * parser is called and the parser can not be reused. + * @param reader + * @return this + */ + public RDFParserBuilder source(StringReader reader) { + clearSource(); + this.javaReader = reader; + return this; + } + + /** + * Set the source to {@link StringReader}. + * This clears any other source setting. + * The {@link StringReader} will be closed when the + * parser is called and the parser can not be reused. + * @param reader + * @return this + * @deprecated Use an InputStream or a StringReader. + */ + @Deprecated + public RDFParserBuilder source(Reader reader) { + clearSource(); + this.javaReader = reader; + return this; + } + + private void clearSource() { + this.uri = null; + this.inputStream = null; + this.path = null; + this.javaReader = null; + } + + /** + * Set the hint {@link Lang}. This is the RDF syntax used when there is no way to + * deduce the syntax (e.g. read from a InputStream, no recognized file extension, no + * recognized HTTP Content-Type provided). + * + * @param lang + * @return this + */ + public RDFParserBuilder lang(Lang lang) { this.hintLang = lang ; return this; } + + /** + * Force the choice RDF syntax to be {@code lang}, and ignore any indications such as file extension + * or HTTP Content-Type. + * @see Lang + * @param lang + * @return this + */ + public RDFParserBuilder forceLang(Lang lang) { this.forceLang = lang ; return this; } + + /** + * Set the HTTP "Accept" header. + * The default if not set is {@link WebContent#defaultRDFAcceptHeader}. + * @param acceptHeader + * @return this + */ + public RDFParserBuilder httpAccept(String acceptHeader) { + httpHeader(HttpNames.hAccept, acceptHeader); + return this; + } + + /** + * Set an HTTP header. Any previous setting is + * <p> + * Consider setting up an {@link HttpClient} if more complicated setting to an HTTP + * request is required. + */ + public RDFParserBuilder httpHeader(String header, String value) { + httpHeaders.put(header, value); + return this; + } + + /** Set the HttpClient to use. + * This will override any HTTP header settings. + */ + public RDFParserBuilder httpClient(HttpClient httpClient) { + this.httpClient = httpClient; + return this; + } + + public RDFParserBuilder base(String base) { this.baseUri = base ; return this; } + + /** + * Set the {@link ErrorHandler} to use. + * This replaces any previous setting. + * The default is use slf4j logger "RIOT". + * @param handler + * @return this + */ + public RDFParserBuilder errorHandler(ErrorHandler handler) { + this.errorHandler = handler; + return this; + } + + /** + * Set the {@link FactoryRDF} to use. {@link FactoryRDF} control how parser output is + * turned into {@code Node} and how {@code Triple}s and {@code Quad}s are built. This + * replaces any previous setting. + * <br/> + * The default is use {@link RiotLib#factoryRDF()} which is provides {@code Node} + * reuse. + * <br/> + * The {@code FactoryRDF} also determines how blank node labels in RDF syntax are + * mapped to {@link BlankNodeId}. Use + * <pre> + * new Factory(myLabelToNode) + * </pre> + * to create an {@code FactoryRDF} and set the {@code LabelToNode} step. + * @see #labelToNode + * @param factory + * @return this + */ + public RDFParserBuilder factory(FactoryRDF factory) { + this.factory = factory; + return this; + } + + /** + * Use the given {@link LabelToNode}, the policy for converting blank node labels in + * RDF syntax to Jena's {@code Node} objects (usually a blank node). + * <br/> + * Only applies when the {@link FactoryRDF} is not set in the + * {@code RDFParserBuilder}, otherwise the {@link FactoryRDF} controls the + * label-to-node process. + * <br/> + * {@link SyntaxLabels#createLabelToNode} is the default policy. + * <br> + * {@link LabelToNode#createUseLabelAsGiven()} uses the label in teh RDF syntax directly. --- End diff -- Typo > Provide detailed setup for RIOT parsing with a parser builder. > -------------------------------------------------------------- > > Key: JENA-1306 > URL: https://issues.apache.org/jira/browse/JENA-1306 > Project: Apache Jena > Issue Type: New Feature > Components: RIOT > Affects Versions: Jena 3.2.0 > Reporter: Andy Seaborne > Assignee: Andy Seaborne > > Provide a parser builder for detailed setup of RDFParser. > This is a new low level interface to the parsing process. It replaces and > extends the machinery hidden inside {{RDFDataMgr}} ({{process}} and > {{getReader}}) and {{RDFParserRegistry.ReaderRIOTLang}}. > It aligns with the changes to {{HttpOp}} to have a specific optional > {{HttpClient}} (JENA-576 and related work) and so allows applications to > control the HTTP setup without resorting to direct use of {{HttpOp}}. > More detailed control can be exposed, including language specific and > specialized needs, for example [PR#211 "preserve id of blanknodes in > JSON-LD"|https://github.com/apache/jena/pull/211]. > {{RDFDataMgr}} functions involving a Context can be can be deprecated. -- This message was sent by Atlassian JIRA (v6.3.15#6346)