fix nodetool repair not fail on node down; patch by yukim reviewed by brandonwilliams for CASSANDRA-5203
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6bddbb2d Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6bddbb2d Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6bddbb2d Branch: refs/heads/trunk Commit: 6bddbb2dd242cacd50cf1b98643385c7b22c1939 Parents: ddab67d Author: Yuki Morishita <yu...@apache.org> Authored: Fri Feb 1 14:30:58 2013 -0600 Committer: Yuki Morishita <yu...@apache.org> Committed: Fri Feb 1 14:31:02 2013 -0600 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/service/AntiEntropyService.java | 5 ++- src/java/org/apache/cassandra/tools/NodeCmd.java | 2 +- src/java/org/apache/cassandra/tools/NodeProbe.java | 17 ++++++++++++-- 4 files changed, 19 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/6bddbb2d/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1c414bc..f296d92 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,6 +4,7 @@ * fix sstable maxtimestamp for row deletes and pre-1.1.1 sstables (CASSANDRA-5153) * fix start key/end token validation for wide row iteration (CASSANDRA-5168) * add ConfigHelper support for Thrift frame and max message sizes (CASSANDRA-5188) + * fix nodetool repair not fail on node down (CASSANDRA-5203) 1.1.9 http://git-wip-us.apache.org/repos/asf/cassandra/blob/6bddbb2d/src/java/org/apache/cassandra/service/AntiEntropyService.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/service/AntiEntropyService.java b/src/java/org/apache/cassandra/service/AntiEntropyService.java index dc03122..f3ca1c2 100644 --- a/src/java/org/apache/cassandra/service/AntiEntropyService.java +++ b/src/java/org/apache/cassandra/service/AntiEntropyService.java @@ -687,9 +687,10 @@ public class AntiEntropyService { if (!FailureDetector.instance.isAlive(endpoint)) { + String message = String.format("Cannot proceed on repair because a neighbor (%s) is dead: session failed", endpoint); differencingDone.signalAll(); - logger.info(String.format("[repair #%s] Cannot proceed on repair because a neighbor (%s) is dead: session failed", getName(), endpoint)); - return; + logger.error(String.format("[repair #%s] ", getName()) + message); + throw new IOException(message); } if (Gossiper.instance.getVersion(endpoint) < MessagingService.VERSION_11 && isSequential) http://git-wip-us.apache.org/repos/asf/cassandra/blob/6bddbb2d/src/java/org/apache/cassandra/tools/NodeCmd.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/tools/NodeCmd.java b/src/java/org/apache/cassandra/tools/NodeCmd.java index 8d4f9a1..723bdf8 100644 --- a/src/java/org/apache/cassandra/tools/NodeCmd.java +++ b/src/java/org/apache/cassandra/tools/NodeCmd.java @@ -922,7 +922,7 @@ public class NodeCmd } } } - System.exit(0); + System.exit(probe.isFailed() ? 1 : 0); } private static Throwable findInnermostThrowable(Throwable ex) http://git-wip-us.apache.org/repos/asf/cassandra/blob/6bddbb2d/src/java/org/apache/cassandra/tools/NodeProbe.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java index 264ea90..036d653 100644 --- a/src/java/org/apache/cassandra/tools/NodeProbe.java +++ b/src/java/org/apache/cassandra/tools/NodeProbe.java @@ -80,6 +80,7 @@ public class NodeProbe private FailureDetectorMBean fdProxy; private CacheServiceMBean cacheService; private StorageProxyMBean spProxy; + private boolean failed; /** * Creates a NodeProbe using the specified JMX host, port, username, and password. @@ -213,7 +214,8 @@ public class NodeProbe try { ssProxy.addNotificationListener(runner, null, null); - runner.repairAndWait(ssProxy, isSequential, primaryRange); + if (!runner.repairAndWait(ssProxy, isSequential, primaryRange)) + failed = true; } catch (Exception e) { @@ -729,6 +731,11 @@ public class NodeProbe { ssProxy.resetLocalSchema(); } + + public boolean isFailed() + { + return failed; + } } class ColumnFamilyStoreMBeanIterator implements Iterator<Map.Entry<String, ColumnFamilyStoreMBean>> @@ -804,6 +811,7 @@ class RepairRunner implements NotificationListener private final String keyspace; private final String[] columnFamilies; private int cmd; + private boolean success = true; RepairRunner(PrintStream out, String keyspace, String... columnFamilies) { @@ -812,7 +820,7 @@ class RepairRunner implements NotificationListener this.columnFamilies = columnFamilies; } - public void repairAndWait(StorageServiceMBean ssProxy, boolean isSequential, boolean primaryRangeOnly) throws InterruptedException + public boolean repairAndWait(StorageServiceMBean ssProxy, boolean isSequential, boolean primaryRangeOnly) throws InterruptedException { cmd = ssProxy.forceRepairAsync(keyspace, isSequential, primaryRangeOnly, columnFamilies); if (cmd > 0) @@ -824,6 +832,7 @@ class RepairRunner implements NotificationListener String message = String.format("[%s] Nothing to repair for keyspace '%s'", format.format(System.currentTimeMillis()), keyspace); out.println(message); } + return success; } public void handleNotification(Notification notification, Object handback) @@ -838,7 +847,9 @@ class RepairRunner implements NotificationListener { String message = String.format("[%s] %s", format.format(notification.getTimeStamp()), notification.getMessage()); out.println(message); - if (status[1] == AntiEntropyService.Status.FINISHED.ordinal()) + if (status[1] == AntiEntropyService.Status.SESSION_FAILED.ordinal()) + success = false; + else if (status[1] == AntiEntropyService.Status.FINISHED.ordinal()) condition.signalAll(); } }