Don't remove FailureDetector history on removeEndpoint patch by jkni, reviewed by jasobrown for CASSANDRA-10371
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7877d6f8 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7877d6f8 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7877d6f8 Branch: refs/heads/cassandra-2.2 Commit: 7877d6f85f1a84d9f9de4d81339730d9df3667a1 Parents: 67637d1 Author: Joel Knighton <joel.knigh...@datastax.com> Authored: Fri Feb 19 15:19:33 2016 -0600 Committer: Jason Brown <jasedbr...@gmail.com> Committed: Tue Feb 23 14:30:28 2016 -0800 ---------------------------------------------------------------------- CHANGES.txt | 1 + src/java/org/apache/cassandra/gms/Gossiper.java | 3 +- .../cassandra/gms/FailureDetectorTest.java | 85 ++++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/7877d6f8/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 52bdcce..82ee99e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.1.14 + * Don't remove FailureDetector history on removeEndpoint (CASSANDRA-10371) * Only notify if repair status changed (CASSANDRA-11172) * Add partition key to TombstoneOverwhelmingException error message (CASSANDRA-10888) * Use logback setting for 'cassandra -v' command (CASSANDRA-10767) http://git-wip-us.apache.org/repos/asf/cassandra/blob/7877d6f8/src/java/org/apache/cassandra/gms/Gossiper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index ae99829..889806c 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -386,6 +386,7 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean unreachableEndpoints.remove(endpoint); endpointStateMap.remove(endpoint); expireTimeEndpointMap.remove(endpoint); + FailureDetector.instance.remove(endpoint); quarantineEndpoint(endpoint); if (logger.isDebugEnabled()) logger.debug("evicting {} from gossip", endpoint); @@ -409,8 +410,6 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean liveEndpoints.remove(endpoint); unreachableEndpoints.remove(endpoint); - // do not remove endpointState until the quarantine expires - FailureDetector.instance.remove(endpoint); MessagingService.instance().resetVersion(endpoint); quarantineEndpoint(endpoint); MessagingService.instance().destroyConnectionPool(endpoint); http://git-wip-us.apache.org/repos/asf/cassandra/blob/7877d6f8/test/unit/org/apache/cassandra/gms/FailureDetectorTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/gms/FailureDetectorTest.java b/test/unit/org/apache/cassandra/gms/FailureDetectorTest.java new file mode 100644 index 0000000..9325922 --- /dev/null +++ b/test/unit/org/apache/cassandra/gms/FailureDetectorTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.gms; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.Util; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.RandomPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.locator.TokenMetadata; +import org.apache.cassandra.service.StorageService; + +import static org.junit.Assert.assertFalse; + +public class FailureDetectorTest +{ + @BeforeClass + public static void setup() + { + // slow unit tests can cause problems with FailureDetector's GC pause handling + System.setProperty("cassandra.max_local_pause_in_ms", "20000"); + } + + @Test + public void testConvictAfterLeft() throws UnknownHostException + { + StorageService ss = StorageService.instance; + TokenMetadata tmd = ss.getTokenMetadata(); + tmd.clearUnsafe(); + IPartitioner partitioner = new RandomPartitioner(); + VersionedValue.VersionedValueFactory valueFactory = new VersionedValue.VersionedValueFactory(partitioner); + + ArrayList<Token> endpointTokens = new ArrayList<>(); + ArrayList<Token> keyTokens = new ArrayList<>(); + List<InetAddress> hosts = new ArrayList<>(); + List<UUID> hostIds = new ArrayList<>(); + + // we want to convict if there is any heartbeat data present in the FD + DatabaseDescriptor.setPhiConvictThreshold(0); + + // create a ring of 2 nodes + Util.createInitialRing(ss, partitioner, endpointTokens, keyTokens, hosts, hostIds, 3); + + InetAddress leftHost = hosts.get(1); + + FailureDetector.instance.report(leftHost); + + // trigger handleStateLeft in StorageService + ss.onChange(leftHost, ApplicationState.STATUS, + valueFactory.left(Collections.singleton(endpointTokens.get(1)), Gossiper.computeExpireTime())); + + // confirm that handleStateLeft was called and leftEndpoint was removed from TokenMetadata + assertFalse("Left endpoint not removed from TokenMetadata", tmd.isMember(leftHost)); + + // confirm the FD's history for leftHost didn't get wiped by status jump to LEFT + FailureDetector.instance.interpret(leftHost); + assertFalse("Left endpoint not convicted", FailureDetector.instance.isAlive(leftHost)); + } +}