Updated Branches: refs/heads/trunk c06244916 -> 93a03feac
Update some javadoc for repair Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/93a03fea Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/93a03fea Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/93a03fea Branch: refs/heads/trunk Commit: 93a03feacebb34b639558b8e6400398b3ae26f37 Parents: c062449 Author: Sylvain Lebresne <sylv...@datastax.com> Authored: Thu Jun 27 11:08:18 2013 +0200 Committer: Sylvain Lebresne <sylv...@datastax.com> Committed: Thu Jun 27 11:08:18 2013 +0200 ---------------------------------------------------------------------- .../apache/cassandra/repair/RepairSession.java | 33 ++++++++++++++++++- .../org/apache/cassandra/repair/Validator.java | 2 +- .../cassandra/service/ActiveRepairService.java | 34 ++++++-------------- 3 files changed, 43 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/93a03fea/src/java/org/apache/cassandra/repair/RepairSession.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/repair/RepairSession.java b/src/java/org/apache/cassandra/repair/RepairSession.java index 7101b5a..5761597 100644 --- a/src/java/org/apache/cassandra/repair/RepairSession.java +++ b/src/java/org/apache/cassandra/repair/RepairSession.java @@ -37,7 +37,37 @@ import org.apache.cassandra.service.ActiveRepairService; import org.apache.cassandra.utils.*; /** - * Triggers repairs with all neighbors for the given table, cfs and range. + * Coordinates the (active) repair of a token range. + * + * A given RepairSession repairs a set of replicas for a given range on a list + * of column families. For each of the column family to repair, RepairSession + * creates a RepairJob that handles the repair of that CF. + * + * A given RepairJob has the 2 main phases: + * 1. Validation phase: the job requests merkle trees from each of the replica involves + * (RepairJob.sendTreeRequests()) and waits until all trees are received (in + * validationComplete()). + * 2. Synchonization phase: once all trees are received, the job compares each tree with + * all the other using a so-called Differencer (started by submitDifferencers()). If + * differences there is between 2 trees, the concerned Differencer will start a streaming + * of the difference between the 2 endpoint concerned (Differencer.performStreamingRepair). + * The job is done once all its Differencer are done (i.e. have either computed no differences + * or the streaming they started is done (syncComplete())). + * + * A given session will execute the first phase (validation phase) of each of it's job + * sequentially. In other words, it will start the first job and only start the next one + * once that first job validation phase is complete. This is done so that the replica only + * create one merkle tree at a time, which is our way to ensure that such creation starts + * roughly at the same time on every node (see CASSANDRA-2816). However the synchronization + * phases are allowed to run concurrently (with each other and with validation phases). + * + * A given RepairJob has 2 modes: either sequential or not (isSequential flag). If sequential, + * it will requests merkle tree creation from each replica in sequence (though in that case + * we still first send a message to each node to flush and snapshot data so each merkle tree + * creation is still done on similar data, even if the actual creation is not + * done simulatneously). If not sequential, all merkle tree are requested in parallel. + * Similarly, if a job is sequential, it will handle one Differencer at a time, but will handle + * all of them in parallel otherwise. */ public class RepairSession extends WrappedRunnable implements IEndpointStateChangeSubscriber, IFailureDetectionEventListener { @@ -223,6 +253,7 @@ public class RepairSession extends WrappedRunnable implements IEndpointStateChan throw new IOException(message); } } + ActiveRepairService.instance.addToActiveSessions(this); try { http://git-wip-us.apache.org/repos/asf/cassandra/blob/93a03fea/src/java/org/apache/cassandra/repair/Validator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/repair/Validator.java b/src/java/org/apache/cassandra/repair/Validator.java index 97b4ca2..0a7f927 100644 --- a/src/java/org/apache/cassandra/repair/Validator.java +++ b/src/java/org/apache/cassandra/repair/Validator.java @@ -42,7 +42,7 @@ import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.MerkleTree; /** - * A Strategy to handle building and validating a merkle tree for a column family. + * Handles the building of a merkle tree for a column family. * * Lifecycle: * 1. prepare() - Initialize tree with samples. http://git-wip-us.apache.org/repos/asf/cassandra/blob/93a03fea/src/java/org/apache/cassandra/service/ActiveRepairService.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java index 2bcf579..53460b9 100644 --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java @@ -38,32 +38,18 @@ import org.apache.cassandra.repair.messages.ValidationComplete; import org.apache.cassandra.utils.FBUtilities; /** - * ActiveRepairService encapsulates "validating" (hashing) individual column families, - * exchanging MerkleTrees with remote nodes via a tree request/response conversation, - * and then triggering repairs for disagreeing ranges. + * ActiveRepairService is the starting point for manual "active" repairs. * - * The node where repair was invoked acts as the 'initiator,' where valid trees are sent after generation - * and where the local and remote tree will rendezvous in rendezvous(). - * Once the trees rendezvous, a Differencer is executed and the service can trigger repairs - * for disagreeing ranges. + * Each user triggered repair will correspond to one or multiple repair session, + * one for each token range to repair. On repair session might repair multiple + * column families. For each of those column families, the repair session will + * request merkle trees for each replica of the range being repaired, diff those + * trees upon receiving them, schedule the streaming ofthe parts to repair (based on + * the tree diffs) and wait for all those operation. See RepairSession for more + * details. * - * Tree comparison and repair triggering occur in the single threaded Stage.ANTI_ENTROPY. - * - * The steps taken to enact a repair are as follows: - * 1. A repair is requested via JMX/nodetool: - * * The initiator sends TreeRequest messages to all neighbors of the target node: when a node - * receives a TreeRequest, it will perform a validation (read-only) compaction to immediately validate - * the column family. This is performed on the CompactionManager ExecutorService. - * 2. The validation process builds the merkle tree by: - * * Calling Validator.prepare(), which samples the column family to determine key distribution, - * * Calling Validator.add() in order for rows in repair range in the column family, - * * Calling Validator.complete() to indicate that all rows have been added. - * * Calling complete() indicates that a valid MerkleTree has been created for the column family. - * * The valid tree is returned to the requesting node via a TreeResponse. - * 3. When a node receives a tree response, it passes the tree to rendezvous() to see if all responses are - * received. Once the initiator receives all responses, it creates Differencers on every tree pair combination. - * 4. Differencers are executed in Stage.ANTI_ENTROPY, to compare the given two trees, and perform repair via the - * streaming api. + * The creation of a repair session is done through the submitRepairSession that + * returns a future on the completion of that session. */ public class ActiveRepairService {