Github user ajs6f commented on a diff in the pull request:

    https://github.com/apache/jena/pull/94#discussion_r44410004
  
    --- Diff: 
jena-arq/src/main/java/org/apache/jena/sparql/core/mem/DatasetGraphInMemory.java
 ---
    @@ -0,0 +1,310 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.jena.sparql.core.mem;
    +
    +import static java.lang.ThreadLocal.withInitial;
    +import static org.apache.jena.graph.Node.ANY;
    +import static org.apache.jena.query.ReadWrite.READ;
    +import static org.apache.jena.query.ReadWrite.WRITE;
    +import static org.apache.jena.sparql.core.Quad.isUnionGraph;
    +
    +import java.util.HashSet;
    +import java.util.Iterator;
    +import java.util.Set;
    +import java.util.concurrent.locks.ReentrantReadWriteLock;
    +import java.util.function.Consumer;
    +import java.util.function.Supplier;
    +
    +import org.apache.jena.graph.Graph;
    +import org.apache.jena.graph.Node;
    +import org.apache.jena.graph.Triple;
    +import org.apache.jena.query.ReadWrite;
    +import org.apache.jena.shared.Lock;
    +import org.apache.jena.shared.LockMRPlusSW;
    +import org.apache.jena.sparql.JenaTransactionException;
    +import org.apache.jena.sparql.core.DatasetGraph;
    +import org.apache.jena.sparql.core.DatasetGraphTriplesQuads;
    +import org.apache.jena.sparql.core.DatasetPrefixStorage;
    +import org.apache.jena.sparql.core.Quad;
    +import org.apache.jena.sparql.core.Transactional;
    +
    +/**
    + * A {@link DatasetGraph} backed by an {@link QuadTable}. By default, this 
is a {@link HexTable} designed for high-speed
    + * in-memory operation.
    + *
    + */
    +public class DatasetGraphInMemory extends DatasetGraphTriplesQuads 
implements Transactional {
    +
    +   private final DatasetPrefixStorage prefixes = new 
DatasetPrefixStorageInMemory();
    +
    +   private final Lock writeLock = new LockMRPlusSW();
    +
    +   private Lock writeLock() {
    +           return writeLock;
    +   }
    +
    +   private final ReentrantReadWriteLock commitLock = new 
ReentrantReadWriteLock(true);
    +
    +   /**
    +    * Commits must be atomic, and because a thread that is committing 
alters the various indexes one after another, we
    +    * lock out {@link #begin(ReadWrite)} while {@link #commit()} is 
executing.
    +    */
    +   private ReentrantReadWriteLock commitLock() {
    +           return commitLock;
    +   }
    +
    +   private final ThreadLocal<Boolean> isInTransaction = withInitial(() -> 
false);
    +
    +   @Override
    +   public boolean isInTransaction() {
    +           return isInTransaction.get();
    +   }
    +
    +   protected void isInTransaction(final boolean b) {
    +           isInTransaction.set(b);
    +   }
    +
    +   private final ThreadLocal<ReadWrite> transactionType = withInitial(() 
-> null);
    +
    +   /**
    +    * @return the type of transaction in progress
    +    */
    +   public ReadWrite transactionType() {
    +           return transactionType.get();
    +   }
    +
    +   protected void transactionType(final ReadWrite readWrite) {
    +           transactionType.set(readWrite);
    +   }
    +
    +   private final QuadTable quadsIndex;
    +
    +   private QuadTable quadsIndex() {
    +           return quadsIndex;
    +   }
    +
    +   private final TripleTable defaultGraph;
    +
    +   private TripleTable defaultGraph() {
    +           return defaultGraph;
    +   }
    +
    +   @Override
    +   public Lock getLock() {
    +           return writeLock();
    +   }
    +
    +   /**
    +    * Default constructor.
    +    */
    +   public DatasetGraphInMemory() {
    +           this(new HexTable(), new TriTable());
    +   }
    +
    +   /**
    +    * @param i a table in which to store quads
    +    * @param t a table in which to store triples
    +    */
    +   public DatasetGraphInMemory(final QuadTable i, final TripleTable t) {
    +           this.quadsIndex = i;
    +           this.defaultGraph = t;
    +   }
    +
    +   @Override
    +   public void begin(final ReadWrite readWrite) {
    +           if (isInTransaction()) throw new 
JenaTransactionException("Transactions cannot be nested!");
    +           transactionType(readWrite);
    +           isInTransaction(true);
    +           getLock().enterCriticalSection(readWrite.equals(READ)); // get 
the dataset write lock, if needed.
    +           commitLock().readLock().lock(); // if a commit is proceeding, 
wait so that we see a coherent index state
    +           try {
    +                   quadsIndex().begin(readWrite);
    +           } finally {
    +                   commitLock().readLock().unlock();
    +           }
    +   }
    +
    +   @Override
    +   public void commit() {
    +           if (!isInTransaction()) throw new 
JenaTransactionException("Tried to commit outside a transaction!");
    +           commitLock().writeLock().lock();
    +           try {
    +                   quadsIndex().commit();
    +                   defaultGraph().commit();
    +           } finally {
    +                   commitLock().writeLock().unlock();
    +           }
    +           end();
    +   }
    +
    +   @Override
    +   public void abort() {
    +           if (!isInTransaction()) throw new 
JenaTransactionException("Tried to abort outside a transaction!");
    +           end();
    +   }
    +
    +   @Override
    +   public void close() {
    +           if (isInTransaction()) abort();
    +
    +   }
    +
    +   @Override
    +   public void end() {
    +           quadsIndex().end();
    +           defaultGraph().end();
    +           isInTransaction(false);
    +           transactionType(null);
    +           getLock().leaveCriticalSection();
    +   }
    +
    +   private <T> Iterator<T> access(final Supplier<Iterator<T>> source) {
    +           if (!isInTransaction()) {
    +                   begin(READ);
    +                   try {
    +                           return source.get();
    +                   } finally {
    +                           end();
    +                   }
    +           }
    +           return source.get();
    +   }
    +
    +   @Override
    +   public Iterator<Node> listGraphNodes() {
    +           return access(() -> quadsIndex().listGraphNodes().iterator());
    +   }
    +
    +   private Iterator<Quad> quadsFinder(final Node g, final Node s, final 
Node p, final Node o) {
    +           if (isUnionGraph(g)) { // union graph is the merge of named 
graphs
    +                   final Set<Triple> seen = new HashSet<>();
    +                   return quadsIndex().find(ANY, s, p, o).filter(q -> 
!q.isDefaultGraph() && seen.add(q.asTriple()))
    +                                   .iterator();
    +           }
    --- End diff --
    
    1. I'm not sure what you mean by "stream this". It _is_ streaming. Unless 
you mean to avoid the buildup of state in the `Set`, in exchange for your 
suggestion about adjacency? In that case, see below.
    2. This is the `DatasetGraph`. It doesn't know anything about SPOG or any 
other particular kind of index. It only knows about `QuadTable`s and 
`TripleTable`s, which may happen (do, in the default case) to be implemented 
with maps, but could just as well be implemented with some other technique that 
wouldn't produce adjacency as an artifact. (E.g. @Claudenw 's idea about Bloom 
filters, or your suggestion about "mixed structures".)
    



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to