Measure max generation drift against local time instead of previously stored generation for remote host to allow long-running clusters.
patch by jkni; reviewed by Stefania for CASSANDRA-10969 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/3c55732f Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/3c55732f Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/3c55732f Branch: refs/heads/trunk Commit: 3c55732fa414c7835536dc42ff489461a7441bfe Parents: 98cc2c8 Author: Joel Knighton <joel.knigh...@datastax.com> Authored: Thu Jan 7 09:41:47 2016 -0600 Committer: Sylvain Lebresne <sylv...@datastax.com> Committed: Fri Jan 22 15:37:19 2016 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + src/java/org/apache/cassandra/gms/Gossiper.java | 10 ++- .../org/apache/cassandra/gms/GossiperTest.java | 93 ++++++++++++++++++++ 3 files changed, 100 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/3c55732f/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 7175953..4bff88c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.1.13 + * Fix bad gossip generation seen in long-running clusters (CASSANDRA-10969) * Avoid NPE when incremental repair fails (CASSANDRA-10909) * Unmark sstables compacting once they are done in cleanup/scrub/upgradesstables (CASSANDRA-10829) * Revert CASSANDRA-10012 and add more logging (CASSANDRA-10961) http://git-wip-us.apache.org/repos/asf/cassandra/blob/3c55732f/src/java/org/apache/cassandra/gms/Gossiper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index 59ef3cc..ebdd5bd 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -87,8 +87,8 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean public static final long aVeryLongTime = 259200 * 1000; // 3 days - /** Maximimum difference in generation and version values we are willing to accept about a peer */ - private static final long MAX_GENERATION_DIFFERENCE = 86400 * 365; + // Maximimum difference between generation value and local time we are willing to accept about a peer + static final int MAX_GENERATION_DIFFERENCE = 86400 * 365; private long FatClientTimeout; private final Random random = new Random(); private final Comparator<InetAddress> inetcomparator = new Comparator<InetAddress>() @@ -1107,13 +1107,15 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean { int localGeneration = localEpStatePtr.getHeartBeatState().getGeneration(); int remoteGeneration = remoteState.getHeartBeatState().getGeneration(); + long localTime = System.currentTimeMillis()/1000; if (logger.isTraceEnabled()) logger.trace(ep + "local generation " + localGeneration + ", remote generation " + remoteGeneration); - if (localGeneration != 0 && remoteGeneration > localGeneration + MAX_GENERATION_DIFFERENCE) + // We measure generation drift against local time, based on the fact that generation is initialized by time + if (remoteGeneration > localTime + MAX_GENERATION_DIFFERENCE) { // assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself) - logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}", ep, localGeneration, remoteGeneration); + logger.warn("received an invalid gossip generation for peer {}; local time = {}, received generation = {}", ep, localTime, remoteGeneration); } else if (remoteGeneration > localGeneration) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/3c55732f/test/unit/org/apache/cassandra/gms/GossiperTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/gms/GossiperTest.java b/test/unit/org/apache/cassandra/gms/GossiperTest.java new file mode 100644 index 0000000..ad07165 --- /dev/null +++ b/test/unit/org/apache/cassandra/gms/GossiperTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.gms; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import com.google.common.collect.ImmutableMap; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.SchemaLoader; +import org.apache.cassandra.Util; +import org.apache.cassandra.dht.IPartitioner; +import org.apache.cassandra.dht.RandomPartitioner; +import org.apache.cassandra.dht.Token; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.locator.TokenMetadata; +import org.apache.cassandra.service.StorageService; + +import static org.junit.Assert.assertEquals; + +public class GossiperTest +{ + static final IPartitioner partitioner = new RandomPartitioner(); + StorageService ss = StorageService.instance; + TokenMetadata tmd = StorageService.instance.getTokenMetadata(); + ArrayList<Token> endpointTokens = new ArrayList<>(); + ArrayList<Token> keyTokens = new ArrayList<>(); + List<InetAddress> hosts = new ArrayList<>(); + List<UUID> hostIds = new ArrayList<>(); + + @Before + public void setup() + { + tmd.clearUnsafe(); + }; + + @Test + public void testLargeGenerationJump() throws UnknownHostException, InterruptedException + { + Util.createInitialRing(ss, partitioner, endpointTokens, keyTokens, hosts, hostIds, 2); + InetAddress remoteHostAddress = hosts.get(1); + + EndpointState initialRemoteState = Gossiper.instance.getEndpointStateForEndpoint(remoteHostAddress); + HeartBeatState initialRemoteHeartBeat = initialRemoteState.getHeartBeatState(); + + //Util.createInitialRing should have initialized remoteHost's HeartBeatState's generation to 1 + assertEquals(initialRemoteHeartBeat.getGeneration(), 1); + + HeartBeatState proposedRemoteHeartBeat = new HeartBeatState(initialRemoteHeartBeat.getGeneration() + Gossiper.MAX_GENERATION_DIFFERENCE + 1); + EndpointState proposedRemoteState = new EndpointState(proposedRemoteHeartBeat); + + Gossiper.instance.applyStateLocally(ImmutableMap.of(remoteHostAddress, proposedRemoteState)); + + //The generation should have been updated because it isn't over Gossiper.MAX_GENERATION_DIFFERENCE in the future + HeartBeatState actualRemoteHeartBeat = Gossiper.instance.getEndpointStateForEndpoint(remoteHostAddress).getHeartBeatState(); + assertEquals(proposedRemoteHeartBeat.getGeneration(), actualRemoteHeartBeat.getGeneration()); + + //Propose a generation 10 years in the future - this should be rejected. + HeartBeatState badProposedRemoteHeartBeat = new HeartBeatState((int) (System.currentTimeMillis()/1000) + Gossiper.MAX_GENERATION_DIFFERENCE * 10); + EndpointState badProposedRemoteState = new EndpointState(badProposedRemoteHeartBeat); + + Gossiper.instance.applyStateLocally(ImmutableMap.of(remoteHostAddress, badProposedRemoteState)); + + actualRemoteHeartBeat = Gossiper.instance.getEndpointStateForEndpoint(remoteHostAddress).getHeartBeatState(); + + //The generation should not have been updated because it is over Gossiper.MAX_GENERATION_DIFFERENCE in the future + assertEquals(proposedRemoteHeartBeat.getGeneration(), actualRemoteHeartBeat.getGeneration()); + } +}