Author: slebresne Date: Fri Dec 2 10:52:57 2011 New Revision: 1209399 URL: http://svn.apache.org/viewvc?rev=1209399&view=rev Log: fix potential race in AES when repair fails patch by slebresne; reviewed by amorton for CASSANDRA-3548
Modified: cassandra/branches/cassandra-1.0/CHANGES.txt cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/service/AntiEntropyService.java Modified: cassandra/branches/cassandra-1.0/CHANGES.txt URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1209399&r1=1209398&r2=1209399&view=diff ============================================================================== --- cassandra/branches/cassandra-1.0/CHANGES.txt (original) +++ cassandra/branches/cassandra-1.0/CHANGES.txt Fri Dec 2 10:52:57 2011 @@ -7,6 +7,7 @@ be qualified by keyspace (CASSANDRA-3419) * always remove endpoints from delevery queue in HH (CASSANDRA-3546) * fix race between cf flush and its 2ndary indexes flush (CASSANDRA-3547) + * fix potential race in AES when a repair fails (CASSANDRA-3548) 1.0.5 Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/service/AntiEntropyService.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/service/AntiEntropyService.java?rev=1209399&r1=1209398&r2=1209399&view=diff ============================================================================== --- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/service/AntiEntropyService.java (original) +++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/service/AntiEntropyService.java Fri Dec 2 10:52:57 2011 @@ -180,14 +180,15 @@ public class AntiEntropyService return; } - if (session.terminated()) + RepairSession.RepairJob job = session.jobs.peek(); + if (job == null) + { + assert session.terminated(); return; + } logger.info(String.format("[repair #%s] Received merkle tree for %s from %s", session.getName(), request.cf.right, request.endpoint)); - RepairSession.RepairJob job = session.jobs.peek(); - assert job != null : "A repair should have at least some jobs scheduled"; - if (job.addTree(request, tree) == 0) { logger.debug("All trees received for " + session.getName() + "/" + request.cf.right); @@ -704,14 +705,14 @@ public class AntiEntropyService } catch (InterruptedException e) { - throw new RuntimeException("Interrupted while waiting for repair: repair will continue in the background."); + throw new RuntimeException("Interrupted while waiting for repair."); } finally { + // mark this session as terminated + terminate(); FailureDetector.instance.unregisterFailureDetectionEventListener(this); Gossiper.instance.unregister(this); - // mark this session as terminated - terminated = true; AntiEntropyService.instance.sessions.remove(getName()); } } @@ -724,28 +725,36 @@ public class AntiEntropyService return terminated; } + public void terminate() + { + terminated = true; + jobs.clear(); + activeJobs.clear(); + } + /** * clear all RepairJobs and terminate this session. */ public void forceShutdown() { - jobs.clear(); - activeJobs.clear(); differencingDone.signalAll(); completed.signalAll(); } void completed(Differencer differencer) { - if (terminated) - return; - logger.debug(String.format("[repair #%s] Repair completed between %s and %s on %s", getName(), differencer.r1.endpoint, differencer.r2.endpoint, differencer.cfname)); RepairJob job = activeJobs.get(differencer.cfname); + if (job == null) + { + assert terminated; + return; + } + if (job.completedSynchronization(differencer)) { activeJobs.remove(differencer.cfname);