[ https://issues.apache.org/jira/browse/JENA-624?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14998690#comment-14998690 ]
ASF GitHub Bot commented on JENA-624: ------------------------------------- Github user ajs6f commented on a diff in the pull request: https://github.com/apache/jena/pull/94#discussion_r44414632 --- Diff: jena-arq/src/main/java/org/apache/jena/sparql/core/mem/DatasetGraphInMemory.java --- @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.sparql.core.mem; + +import static java.lang.ThreadLocal.withInitial; +import static org.apache.jena.graph.Node.ANY; +import static org.apache.jena.query.ReadWrite.READ; +import static org.apache.jena.query.ReadWrite.WRITE; +import static org.apache.jena.sparql.core.Quad.isUnionGraph; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Consumer; +import java.util.function.Supplier; + +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.query.ReadWrite; +import org.apache.jena.shared.Lock; +import org.apache.jena.shared.LockMRPlusSW; +import org.apache.jena.sparql.JenaTransactionException; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.core.DatasetGraphTriplesQuads; +import org.apache.jena.sparql.core.DatasetPrefixStorage; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.Transactional; + +/** + * A {@link DatasetGraph} backed by an {@link QuadTable}. By default, this is a {@link HexTable} designed for high-speed + * in-memory operation. + * + */ +public class DatasetGraphInMemory extends DatasetGraphTriplesQuads implements Transactional { + + private final DatasetPrefixStorage prefixes = new DatasetPrefixStorageInMemory(); + + private final Lock writeLock = new LockMRPlusSW(); + + private Lock writeLock() { + return writeLock; + } + + private final ReentrantReadWriteLock commitLock = new ReentrantReadWriteLock(true); + + /** + * Commits must be atomic, and because a thread that is committing alters the various indexes one after another, we + * lock out {@link #begin(ReadWrite)} while {@link #commit()} is executing. + */ + private ReentrantReadWriteLock commitLock() { + return commitLock; + } + + private final ThreadLocal<Boolean> isInTransaction = withInitial(() -> false); + + @Override + public boolean isInTransaction() { + return isInTransaction.get(); + } + + protected void isInTransaction(final boolean b) { + isInTransaction.set(b); + } + + private final ThreadLocal<ReadWrite> transactionType = withInitial(() -> null); + + /** + * @return the type of transaction in progress + */ + public ReadWrite transactionType() { + return transactionType.get(); + } + + protected void transactionType(final ReadWrite readWrite) { + transactionType.set(readWrite); + } + + private final QuadTable quadsIndex; + + private QuadTable quadsIndex() { + return quadsIndex; + } + + private final TripleTable defaultGraph; + + private TripleTable defaultGraph() { + return defaultGraph; + } + + @Override + public Lock getLock() { + return writeLock(); + } + + /** + * Default constructor. + */ + public DatasetGraphInMemory() { + this(new HexTable(), new TriTable()); + } + + /** + * @param i a table in which to store quads + * @param t a table in which to store triples + */ + public DatasetGraphInMemory(final QuadTable i, final TripleTable t) { + this.quadsIndex = i; + this.defaultGraph = t; + } + + @Override + public void begin(final ReadWrite readWrite) { + if (isInTransaction()) throw new JenaTransactionException("Transactions cannot be nested!"); + transactionType(readWrite); + isInTransaction(true); + getLock().enterCriticalSection(readWrite.equals(READ)); // get the dataset write lock, if needed. + commitLock().readLock().lock(); // if a commit is proceeding, wait so that we see a coherent index state + try { + quadsIndex().begin(readWrite); + } finally { + commitLock().readLock().unlock(); + } + } + + @Override + public void commit() { + if (!isInTransaction()) throw new JenaTransactionException("Tried to commit outside a transaction!"); + commitLock().writeLock().lock(); + try { + quadsIndex().commit(); + defaultGraph().commit(); + } finally { + commitLock().writeLock().unlock(); + } + end(); + } + + @Override + public void abort() { + if (!isInTransaction()) throw new JenaTransactionException("Tried to abort outside a transaction!"); + end(); + } + + @Override + public void close() { + if (isInTransaction()) abort(); + + } + + @Override + public void end() { + quadsIndex().end(); + defaultGraph().end(); + isInTransaction(false); + transactionType(null); + getLock().leaveCriticalSection(); + } + + private <T> Iterator<T> access(final Supplier<Iterator<T>> source) { + if (!isInTransaction()) { + begin(READ); + try { + return source.get(); + } finally { + end(); + } + } + return source.get(); + } + + @Override + public Iterator<Node> listGraphNodes() { + return access(() -> quadsIndex().listGraphNodes().iterator()); + } + + private Iterator<Quad> quadsFinder(final Node g, final Node s, final Node p, final Node o) { + if (isUnionGraph(g)) { // union graph is the merge of named graphs + final Set<Triple> seen = new HashSet<>(); + return quadsIndex().find(ANY, s, p, o).filter(q -> !q.isDefaultGraph() && seen.add(q.asTriple())) + .iterator(); + } --- End diff -- Okay, that makes a lot of sense to me. It's not dissimilar to the way I'm currently handling `listGraphNodes`, as we discuss elsewhere in this PR. I entirely agree about the preferrable signature. I'll get another commit in on this point. > Develop a new in-memory RDF Dataset implementation > -------------------------------------------------- > > Key: JENA-624 > URL: https://issues.apache.org/jira/browse/JENA-624 > Project: Apache Jena > Issue Type: Improvement > Reporter: Andy Seaborne > Assignee: A. Soroka > Labels: gsoc, gsoc2015, java, linked_data, rdf > > The current (Jan 2014) Jena in-memory dataset uses a general purpose > container that works for any storage technology for graphs together with > in-memory graphs. > This project would develop a new implementation design specifically for RDF > datasets (triples and quads) and efficient SPARQL execution, for example, > using multi-core parallel operations and/or multi-version concurrent > datastructures to maximise true parallel operation. > This is a system project suitable for someone interested in datatbase > implementation, datastructure design and implementation, operating systems or > distributed systems. > Note that TDB can operate in-memory using a simulated disk with > copy-in/copy-out semantics for disk-level operations. It is for faithful > testing TDB infrastructure and is not designed performance, general in-memory > use or use at scale. While lesson may be learnt from that system, TDB > in-memory is not the answer here. -- This message was sent by Atlassian JIRA (v6.3.4#6332)