Github user ajs6f commented on a diff in the pull request: https://github.com/apache/jena/pull/233#discussion_r110254074 --- Diff: jena-arq/src/main/java/org/apache/jena/sparql/core/mosaic/DatasetGraphMosaic.java --- @@ -0,0 +1,589 @@ +package org.apache.jena.sparql.core.mosaic; + +import static org.slf4j.LoggerFactory.getLogger; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.jena.graph.Graph; +import org.apache.jena.graph.Node; +import org.apache.jena.query.ReadWrite; +import org.apache.jena.shared.JenaException; +import org.apache.jena.shared.Lock; +import org.apache.jena.sparql.core.DatasetGraph; +import org.apache.jena.sparql.core.GraphView; +import org.apache.jena.sparql.core.Quad; +import org.apache.jena.sparql.core.thrift.IteratorCachedArray; +import org.apache.jena.sparql.util.Context; +import org.apache.jena.sparql.util.Symbol; +import org.slf4j.Logger; + +/** + * A DatasetGraph which distributes actions across a number of child DatasetGraph's. + * + * As Jena requires Thread affinity when working with transactions this class uses ThreadProxy. + * + * Most DatasetGraph methods call into a set of convenience methods which perform common tasks, i.e. mosaicIterator(Function<DatasetGraph, Iterator<T>>). + * + * @author dick + * + */ +public class DatasetGraphMosaic implements DatasetGraph { + + private static final Logger LOGGER = getLogger(DatasetGraphMosaic.class); + + public static final String jenaID = UUID.randomUUID().toString(); + + public static final Symbol MOSAIC_STREAM_SEQUENTIAL = Symbol.create(DatasetGraphMosaic.class.getSimpleName() + ".mosaicStreamSequential"); + + public static final Symbol WRAP_ITERATOR = Symbol.create(DatasetGraphMosaic.class.getSimpleName() + ".wrapIterator"); + + protected final String id = UUID.randomUUID().toString(); + + protected volatile Boolean closed = false; + + protected final Topology topology = new Topology(); + + protected final Set<Tessera> mosaic; + + protected final ThreadLocal<TransactionalDistributed> transactional; + + protected final WeakHashMap<Thread, TransactionalDistributed> monitor; + + protected final Lock lock; + + protected final DatasetGraphShimWrite shimWrite; + + protected final AtomicInteger readCount = new AtomicInteger(); + + protected final AtomicInteger writeCount = new AtomicInteger(); + + protected final AtomicInteger transactionCount = new AtomicInteger(); + + protected final Context context; + + public DatasetGraphMosaic() { + super(); + + mosaic = ConcurrentHashMap.newKeySet(256); + + transactional = new ThreadLocal<>(); + + monitor = new WeakHashMap<>(32); + + lock = new LockMRAndMW(); + + shimWrite = new DatasetGraphShimWrite() { + + @Override + public void add(final Node g, final Node s, final Node p, final Node o) { --- End diff -- Ah, okay, cool, that was one of my biggest questions (sharding policy). I'm glad to see you've got an extension point ready, nicely done. My immediate sense would be that we want a policy injection point near here, into which you could put a type that takes a quad and checks it against some record of the available "children" and makes that decision.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---