Github user dick-twocows commented on a diff in the pull request: https://github.com/apache/jena/pull/233#discussion_r110255601 --- Diff: jena-arq/src/main/java/org/apache/jena/sparql/core/mosaic/DatasetGraphMosaic.java --- @@ -0,0 +1,589 @@ +package org.apache.jena.sparql.core.mosaic; + +import static org.slf4j.LoggerFactory.getLogger; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.query.ReadWrite; +import org.apache.jena.shared.JenaException; +import org.apache.jena.shared.Lock; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.core.GraphView; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.thrift.IteratorCachedArray; +import org.apache.jena.sparql.util.Context; +import org.apache.jena.sparql.util.Symbol; +import org.slf4j.Logger; + +/** + * A DatasetGraph which distributes actions across a number of child DatasetGraph's. + * + * As Jena requires Thread affinity when working with transactions this class uses ThreadProxy. + * + * Most DatasetGraph methods call into a set of convenience methods which perform common tasks, i.e. mosaicIterator(Function<DatasetGraph, Iterator<T>>). + * + * @author dick + * + */ +public class DatasetGraphMosaic implements DatasetGraph { + + private static final Logger LOGGER = getLogger(DatasetGraphMosaic.class); + + public static final String jenaID = UUID.randomUUID().toString(); + + public static final Symbol MOSAIC_STREAM_SEQUENTIAL = Symbol.create(DatasetGraphMosaic.class.getSimpleName() + ".mosaicStreamSequential"); + + public static final Symbol WRAP_ITERATOR = Symbol.create(DatasetGraphMosaic.class.getSimpleName() + ".wrapIterator"); + + protected final String id = UUID.randomUUID().toString(); + + protected volatile Boolean closed = false; + + protected final Topology topology = new Topology(); + + protected final Set<Tessera> mosaic; + + protected final ThreadLocal<TransactionalDistributed> transactional; + + protected final WeakHashMap<Thread, TransactionalDistributed> monitor; + + protected final Lock lock; + + protected final DatasetGraphShimWrite shimWrite; + + protected final AtomicInteger readCount = new AtomicInteger(); + + protected final AtomicInteger writeCount = new AtomicInteger(); + + protected final AtomicInteger transactionCount = new AtomicInteger(); + + protected final Context context; + + public DatasetGraphMosaic() { + super(); + + mosaic = ConcurrentHashMap.newKeySet(256); + + transactional = new ThreadLocal<>(); + + monitor = new WeakHashMap<>(32); + + lock = new LockMRAndMW(); + + shimWrite = new DatasetGraphShimWrite() { + + @Override + public void add(final Node g, final Node s, final Node p, final Node o) { --- End diff -- We already have a shim which checks the quad graph and adds the quad to each child which contains that graph. Without complicating this the modify of the Thrift has its own issues because the single nature of the add function hits the network so writes are cached and a flush triggered by a read, or the read from the remote is aggregated with the local cache to maintain read consistency depending on what you want, latency Vs safety.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---