Improve initial FD phi estimate when starting up patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed Branch: refs/heads/trunk Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea Parents: b3ae77d Author: Jonathan Ellis <jbel...@apache.org> Authored: Wed Nov 20 17:52:45 2013 -0600 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Wed Nov 20 18:00:15 2013 -0600 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/gms/FailureDetector.java | 47 +++++++++++--------- .../apache/cassandra/gms/ArrivalWindowTest.java | 7 +-- 3 files changed, 29 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index dc56d90..b3fa565 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,6 @@ 1.2.13 * Optimize FD phi calculation (CASSANDRA-6386) + * Improve initial FD phi estimate when starting up (CASSANDRA-6385) 1.2.12 http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java index e4ffb88..ee47997 100644 --- a/src/java/org/apache/cassandra/gms/FailureDetector.java +++ b/src/java/org/apache/cassandra/gms/FailureDetector.java @@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean public static final IFailureDetector instance = new FailureDetector(); private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class); + // this is useless except to provide backwards compatibility in phi_convict_threshold, + // because everyone seems pretty accustomed to the default of 8, and users who have + // already tuned their phi_convict_threshold for their own environments won't need to + // change. + private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434... + private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>(); private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>(); @@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean logger.trace("reporting {}", ep); long now = System.currentTimeMillis(); ArrivalWindow heartbeatWindow = arrivalSamples.get(ep); - if ( heartbeatWindow == null ) + if (heartbeatWindow == null) { + // avoid adding an empty ArrivalWindow to the Map heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE); + heartbeatWindow.add(now); arrivalSamples.put(ep, heartbeatWindow); } - heartbeatWindow.add(now); + else + { + heartbeatWindow.add(now); + } } public void interpret(InetAddress ep) @@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean if (logger.isTraceEnabled()) logger.trace("PHI for " + ep + " : " + phi); - if (phi > getPhiConvictThreshold()) + if (PHI_FACTOR * phi > getPhiConvictThreshold()) { logger.trace("notifying listeners that {} is down", ep); logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean()); @@ -266,12 +277,6 @@ class ArrivalWindow private double tLast = 0L; private final BoundedStatsDeque arrivalIntervals; - // this is useless except to provide backwards compatibility in phi_convict_threshold, - // because everyone seems pretty accustomed to the default of 8, and users who have - // already tuned their phi_convict_threshold for their own environments won't need to - // change. - private final double PHI_FACTOR = 1.0 / Math.log(10.0); - // in the event of a long partition, never record an interval longer than the rpc timeout, // since if a host is regularly experiencing connectivity problems lasting this long we'd // rather mark it down quickly instead of adapting @@ -284,19 +289,21 @@ class ArrivalWindow synchronized void add(double value) { - double interArrivalTime; - if ( tLast > 0L ) + if (tLast > 0L) { - interArrivalTime = (value - tLast); + double interArrivalTime = (value - tLast); + if (interArrivalTime <= MAX_INTERVAL_IN_MS) + arrivalIntervals.add(interArrivalTime); + else + logger.debug("Ignoring interval time of {}", interArrivalTime); } else { - interArrivalTime = Gossiper.intervalInMillis / 2; + // We use a very large initial interval since the "right" average depends on the cluster size + // and it's better to err high (false negatives, which will be corrected by waiting a bit longer) + // than low (false positives, which cause "flapping"). + arrivalIntervals.add(Gossiper.intervalInMillis * 30); } - if (interArrivalTime <= MAX_INTERVAL_IN_MS) - arrivalIntervals.add(interArrivalTime); - else - logger.debug("Ignoring interval time of {}", interArrivalTime); tLast = value; } @@ -308,11 +315,9 @@ class ArrivalWindow // see CASSANDRA-2597 for an explanation of the math at work here. double phi(long tnow) { - int size = arrivalIntervals.size(); + assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive double t = tnow - tLast; - return (size > 0) - ? PHI_FACTOR * t / mean() - : 0.0; + return t / mean(); } public String toString() http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java index 277cae1..46fee34 100644 --- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java +++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java @@ -27,7 +27,6 @@ import org.junit.Test; public class ArrivalWindowTest { - @Test public void test() { @@ -40,11 +39,9 @@ public class ArrivalWindowTest window.add(555); //all good - assertEquals(0.4342, window.phi(666), 0.01); + assertEquals(1.0, window.phi(666), 0.01); //oh noes, a much higher timestamp, something went wrong! - assertEquals(9.566, window.phi(3000), 0.01); + assertEquals(22.03, window.phi(3000), 0.01); } - - }