This is an automated email from the ASF dual-hosted git repository. brandonwilliams pushed a commit to branch cassandra-4.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-4.0 by this push: new 057d082e00 Add option to override the FatClient timeout for Bootstrapping nodes 057d082e00 is described below commit 057d082e00f7d10b8e9b127cfabd9b8cd228da3d Author: Raymond Huffman <rhuff...@palantir.com> AuthorDate: Tue Apr 23 16:41:12 2024 -0400 Add option to override the FatClient timeout for Bootstrapping nodes Patch by Raymond Huffman; reviewed by brandonwilliams and dcapwell for CASSANDRA-15439 --- CHANGES.txt | 1 + conf/jvm-server.options | 4 ++ .../config/CassandraRelevantProperties.java | 4 ++ src/java/org/apache/cassandra/gms/Gossiper.java | 46 ++++++++++++++++++++-- .../org/apache/cassandra/gms/VersionedValue.java | 2 + 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a506c9e9e0..2d56a56c61 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0.14 + * Add timeout specifically for bootstrapping nodes (CASSANDRA-15439) * Bring Redhat packge dirs/ownership/perms in line with Debian package (CASSANDRA-19565) diff --git a/conf/jvm-server.options b/conf/jvm-server.options index e89cf7343a..d529a2b9e2 100644 --- a/conf/jvm-server.options +++ b/conf/jvm-server.options @@ -74,6 +74,10 @@ # before joining the ring. #-Dcassandra.ring_delay_ms=ms +# Allows overriding the timeout after which an unresponsive bootstrapping node is considered failed +# and is removed from gossip state and bootstrapTokens. (Default: cassandra.ring_delay * 2) +#-Dcassandra.failed_bootstrap_timeout_ms=ms + # Set the SSL port for encrypted communication. (Default: 7001) #-Dcassandra.ssl_storage_port=port diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java index 4de333e101..0377bc40e2 100644 --- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java +++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java @@ -151,6 +151,10 @@ public enum CassandraRelevantProperties /** mx4jport */ MX4JPORT ("mx4jport"), + RING_DELAY("cassandra.ring_delay_ms"), + + FAILED_BOOTSTRAP_TIMEOUT("cassandra.failed_bootstrap_timeout_ms"), + /** * When bootstraping we wait for all schema versions found in gossip to be seen, and if not seen in time we fail * the bootstrap; this property will avoid failing and allow bootstrap to continue if set to true. diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java b/src/java/org/apache/cassandra/gms/Gossiper.java index 63ff5150a7..009e6b255f 100644 --- a/src/java/org/apache/cassandra/gms/Gossiper.java +++ b/src/java/org/apache/cassandra/gms/Gossiper.java @@ -107,6 +107,7 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean SILENT_SHUTDOWN_STATES.add(VersionedValue.STATUS_BOOTSTRAPPING); SILENT_SHUTDOWN_STATES.add(VersionedValue.STATUS_BOOTSTRAPPING_REPLACE); } + private static final List<String> ADMINISTRATIVELY_INACTIVE_STATES = Arrays.asList(VersionedValue.HIBERNATE, VersionedValue.REMOVED_TOKEN, VersionedValue.STATUS_LEFT); @@ -126,7 +127,10 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean // Maximimum difference between generation value and local time we are willing to accept about a peer static final int MAX_GENERATION_DIFFERENCE = 86400 * 365; - private final long fatClientTimeout; + + // half of QUARATINE_DELAY, to ensure justRemovedEndpoints has enough leeway to prevent re-gossip + private static final long FAT_CLIENT_TIMEOUT = (QUARANTINE_DELAY / 2); + private static final long FAILED_BOOTSTRAP_TIMEOUT = getFailedBootstrapTimeout(); private final Random random = new Random(); /* subscribers for interest in EndpointState change */ @@ -254,6 +258,25 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean return 259200 * 1000; // 3 days } + private static long getFailedBootstrapTimeout() + { + String newtimeout = CassandraRelevantProperties.FAILED_BOOTSTRAP_TIMEOUT.getString(); + if (newtimeout != null) + { + long longValue = Long.parseLong(newtimeout); + if (longValue == -1) + { + longValue = Long.MAX_VALUE; + } + logger.info("Overriding FAILED_BOOTSTRAP_TIMEOUT to {}ms", longValue); + return longValue; + } + else + { + return FAT_CLIENT_TIMEOUT * 2; + } + } + private static boolean isInGossipStage() { return ((JMXEnabledSingleThreadExecutor) Stage.GOSSIP.executor()).isExecutedBy(Thread.currentThread()); @@ -344,8 +367,6 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean @VisibleForTesting public Gossiper(boolean registerJmx) { - // half of QUARATINE_DELAY, to ensure justRemovedEndpoints has enough leeway to prevent re-gossip - fatClientTimeout = (QUARANTINE_DELAY / 2); /* register with the Failure Detector for receiving Failure detector events */ FailureDetector.instance.registerFailureDetectionEventListener(this); @@ -1048,6 +1069,7 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean { // check if this is a fat client. fat clients are removed automatically from // gossip after FatClientTimeout. Do not remove dead states here. + long fatClientTimeout = getFatClientTimeoutForEndpoint(epState); if (isGossipOnlyMember(endpoint) && !justRemovedEndpoints.containsKey(endpoint) && TimeUnit.NANOSECONDS.toMillis(nowNano - epState.getUpdateTimestamp()) > fatClientTimeout) @@ -1095,6 +1117,24 @@ public class Gossiper implements IFailureDetectionEventListener, GossiperMBean } } + private static long getFatClientTimeoutForEndpoint(EndpointState epState) + { + return isBootstrappingState(epState) ? + FAILED_BOOTSTRAP_TIMEOUT : + FAT_CLIENT_TIMEOUT; + } + + private static boolean isBootstrappingState(EndpointState epState) + { + String status = getGossipStatus(epState); + if (status.isEmpty()) + { + return false; + } + + return VersionedValue.BOOTSTRAPPING_STATUS.contains(status); + } + protected long getExpireTimeForEndpoint(InetAddressAndPort endpoint) { /* default expireTime is aVeryLongTime */ diff --git a/src/java/org/apache/cassandra/gms/VersionedValue.java b/src/java/org/apache/cassandra/gms/VersionedValue.java index 880cb98e06..f7b7c18ea5 100644 --- a/src/java/org/apache/cassandra/gms/VersionedValue.java +++ b/src/java/org/apache/cassandra/gms/VersionedValue.java @@ -27,6 +27,7 @@ import java.util.stream.Collectors; import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.cassandra.db.TypeSizes; @@ -83,6 +84,7 @@ public class VersionedValue implements Comparable<VersionedValue> // values for ApplicationState.REMOVAL_COORDINATOR public final static String REMOVAL_COORDINATOR = "REMOVER"; + public final static Set<String> BOOTSTRAPPING_STATUS = ImmutableSet.of(STATUS_BOOTSTRAPPING, STATUS_BOOTSTRAPPING_REPLACE); public final int version; public final String value; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org