This is an automated email from the ASF dual-hosted git repository.

brandonwilliams pushed a commit to branch cassandra-4.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-4.0 by this push:
     new 057d082e00 Add option to override the FatClient timeout for 
Bootstrapping nodes
057d082e00 is described below

commit 057d082e00f7d10b8e9b127cfabd9b8cd228da3d
Author: Raymond Huffman <rhuff...@palantir.com>
AuthorDate: Tue Apr 23 16:41:12 2024 -0400

    Add option to override the FatClient timeout for Bootstrapping nodes
    
    Patch by Raymond Huffman; reviewed by brandonwilliams and dcapwell for
    CASSANDRA-15439
---
 CHANGES.txt                                        |  1 +
 conf/jvm-server.options                            |  4 ++
 .../config/CassandraRelevantProperties.java        |  4 ++
 src/java/org/apache/cassandra/gms/Gossiper.java    | 46 ++++++++++++++++++++--
 .../org/apache/cassandra/gms/VersionedValue.java   |  2 +
 5 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index a506c9e9e0..2d56a56c61 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0.14
+ * Add timeout specifically for bootstrapping nodes (CASSANDRA-15439)
  * Bring Redhat packge dirs/ownership/perms in line with Debian package 
(CASSANDRA-19565)
 
 
diff --git a/conf/jvm-server.options b/conf/jvm-server.options
index e89cf7343a..d529a2b9e2 100644
--- a/conf/jvm-server.options
+++ b/conf/jvm-server.options
@@ -74,6 +74,10 @@
 # before joining the ring.
 #-Dcassandra.ring_delay_ms=ms
 
+# Allows overriding the timeout after which an unresponsive bootstrapping node 
is considered failed
+# and is removed from gossip state and bootstrapTokens. (Default: 
cassandra.ring_delay * 2)
+#-Dcassandra.failed_bootstrap_timeout_ms=ms
+
 # Set the SSL port for encrypted communication. (Default: 7001)
 #-Dcassandra.ssl_storage_port=port
 
diff --git 
a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java 
b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
index 4de333e101..0377bc40e2 100644
--- a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
+++ b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
@@ -151,6 +151,10 @@ public enum CassandraRelevantProperties
     /** mx4jport */
     MX4JPORT ("mx4jport"),
 
+    RING_DELAY("cassandra.ring_delay_ms"),
+
+    FAILED_BOOTSTRAP_TIMEOUT("cassandra.failed_bootstrap_timeout_ms"),
+
     /**
      * When bootstraping we wait for all schema versions found in gossip to be 
seen, and if not seen in time we fail
      * the bootstrap; this property will avoid failing and allow bootstrap to 
continue if set to true.
diff --git a/src/java/org/apache/cassandra/gms/Gossiper.java 
b/src/java/org/apache/cassandra/gms/Gossiper.java
index 63ff5150a7..009e6b255f 100644
--- a/src/java/org/apache/cassandra/gms/Gossiper.java
+++ b/src/java/org/apache/cassandra/gms/Gossiper.java
@@ -107,6 +107,7 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
         SILENT_SHUTDOWN_STATES.add(VersionedValue.STATUS_BOOTSTRAPPING);
         
SILENT_SHUTDOWN_STATES.add(VersionedValue.STATUS_BOOTSTRAPPING_REPLACE);
     }
+
     private static final List<String> ADMINISTRATIVELY_INACTIVE_STATES = 
Arrays.asList(VersionedValue.HIBERNATE,
                                                                                
        VersionedValue.REMOVED_TOKEN,
                                                                                
        VersionedValue.STATUS_LEFT);
@@ -126,7 +127,10 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
 
     // Maximimum difference between generation value and local time we are 
willing to accept about a peer
     static final int MAX_GENERATION_DIFFERENCE = 86400 * 365;
-    private final long fatClientTimeout;
+
+    // half of QUARATINE_DELAY, to ensure justRemovedEndpoints has enough 
leeway to prevent re-gossip
+    private static final long FAT_CLIENT_TIMEOUT = (QUARANTINE_DELAY / 2);
+    private static final long FAILED_BOOTSTRAP_TIMEOUT = 
getFailedBootstrapTimeout();
     private final Random random = new Random();
 
     /* subscribers for interest in EndpointState change */
@@ -254,6 +258,25 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
         return 259200 * 1000; // 3 days
     }
 
+    private static long getFailedBootstrapTimeout()
+    {
+        String newtimeout = 
CassandraRelevantProperties.FAILED_BOOTSTRAP_TIMEOUT.getString();
+        if (newtimeout != null)
+        {
+            long longValue = Long.parseLong(newtimeout);
+            if (longValue == -1)
+            {
+                longValue = Long.MAX_VALUE;
+            }
+            logger.info("Overriding FAILED_BOOTSTRAP_TIMEOUT to {}ms", 
longValue);
+            return longValue;
+        }
+        else
+        {
+            return FAT_CLIENT_TIMEOUT * 2;
+        }
+    }
+
     private static boolean isInGossipStage()
     {
         return ((JMXEnabledSingleThreadExecutor) 
Stage.GOSSIP.executor()).isExecutedBy(Thread.currentThread());
@@ -344,8 +367,6 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
     @VisibleForTesting
     public Gossiper(boolean registerJmx)
     {
-        // half of QUARATINE_DELAY, to ensure justRemovedEndpoints has enough 
leeway to prevent re-gossip
-        fatClientTimeout = (QUARANTINE_DELAY / 2);
         /* register with the Failure Detector for receiving Failure detector 
events */
         FailureDetector.instance.registerFailureDetectionEventListener(this);
 
@@ -1048,6 +1069,7 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
             {
                 // check if this is a fat client. fat clients are removed 
automatically from
                 // gossip after FatClientTimeout.  Do not remove dead states 
here.
+                long fatClientTimeout = 
getFatClientTimeoutForEndpoint(epState);
                 if (isGossipOnlyMember(endpoint)
                     && !justRemovedEndpoints.containsKey(endpoint)
                     && TimeUnit.NANOSECONDS.toMillis(nowNano - 
epState.getUpdateTimestamp()) > fatClientTimeout)
@@ -1095,6 +1117,24 @@ public class Gossiper implements 
IFailureDetectionEventListener, GossiperMBean
         }
     }
 
+    private static long getFatClientTimeoutForEndpoint(EndpointState epState)
+    {
+        return isBootstrappingState(epState) ?
+                FAILED_BOOTSTRAP_TIMEOUT :
+                FAT_CLIENT_TIMEOUT;
+    }
+
+    private static boolean isBootstrappingState(EndpointState epState)
+    {
+        String status = getGossipStatus(epState);
+        if (status.isEmpty())
+        {
+            return false;
+        }
+
+        return VersionedValue.BOOTSTRAPPING_STATUS.contains(status);
+    }
+
     protected long getExpireTimeForEndpoint(InetAddressAndPort endpoint)
     {
         /* default expireTime is aVeryLongTime */
diff --git a/src/java/org/apache/cassandra/gms/VersionedValue.java 
b/src/java/org/apache/cassandra/gms/VersionedValue.java
index 880cb98e06..f7b7c18ea5 100644
--- a/src/java/org/apache/cassandra/gms/VersionedValue.java
+++ b/src/java/org/apache/cassandra/gms/VersionedValue.java
@@ -27,6 +27,7 @@ import java.util.stream.Collectors;
 import static java.nio.charset.StandardCharsets.ISO_8859_1;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 
 import org.apache.cassandra.db.TypeSizes;
@@ -83,6 +84,7 @@ public class VersionedValue implements 
Comparable<VersionedValue>
 
     // values for ApplicationState.REMOVAL_COORDINATOR
     public final static String REMOVAL_COORDINATOR = "REMOVER";
+    public final static Set<String> BOOTSTRAPPING_STATUS = 
ImmutableSet.of(STATUS_BOOTSTRAPPING, STATUS_BOOTSTRAPPING_REPLACE);
 
     public final int version;
     public final String value;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to