This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 9104f396d feat(replication): add replication-no-slowdown config (#3219)
9104f396d is described below
commit 9104f396d8a44ace9545d10a16b2c3ce84ebf37d
Author: Zhixin Wen <[email protected]>
AuthorDate: Tue Oct 7 23:25:22 2025 -0700
feat(replication): add replication-no-slowdown config (#3219)
We need to set `rocksdb.write_options.no_slowdown` to yes in production
because if write stall happens, it would block the event loop and
KVRocks would not be able to serve any other requests including `INFO`.
This means when write stall happens, we would see the cluster frozen and
failover happens unnecessarily.
However, a side effect of `rocksdb.write_options.no_slowdown` is it
would cause replication failure when write stall happens. The replica
would continuously tries to reconnect and restart the process, adding
extra load on master.
The solution is to add a separate config to fine control the behavior.
---------
Co-authored-by: Twice <[email protected]>
---
kvrocks.conf | 11 +++++++++++
src/cluster/replication.cc | 7 +++++++
src/config/config.cc | 1 +
src/config/config.h | 1 +
4 files changed, 20 insertions(+)
diff --git a/kvrocks.conf b/kvrocks.conf
index fb34204ba..c15252a08 100644
--- a/kvrocks.conf
+++ b/kvrocks.conf
@@ -208,6 +208,17 @@ replication-recv-timeout-ms 3200
# Default: no
replication-group-sync no
+# Control whether rocksdb.write_options.no_slowdown is applied to replication
writes.
+# This option is only effective when rocksdb.write_options.no_slowdown is
enabled.
+# If rocksdb.write_options.no_slowdown is enabled globally, this option
determines
+# whether replication writes should also use no_slowdown. This allows
fine-grained
+# control to prevent replication from being affected by global no_slowdown
setting.
+# One possible issue of using no-slowdown in replication is that it can cause
replication
+# to error and restart the replication process continuously.
+# Default to yes to keep current behavior.
+# Default: yes
+replication-no-slowdown yes
+
# Maximum bytes to buffer before sending replication data to replicas.
# The master will pack multiple write batches into one bulk to reduce network
overhead,
# but will send immediately if the bulk size exceeds this limit.
diff --git a/src/cluster/replication.cc b/src/cluster/replication.cc
index c66406ddb..993ed9825 100644
--- a/src/cluster/replication.cc
+++ b/src/cluster/replication.cc
@@ -664,6 +664,13 @@ ReplicationThread::CBState
ReplicationThread::incrementBatchLoopCB(bufferevent *
write_opts.sync = false;
}
+ // Control no_slowdown for replication separately from global setting
+ // If rocksdb.write_options.no_slowdown is enabled, use
replication_no_slowdown config
+ // to determine if it should be applied to replication writes
+ if (srv_->GetConfig()->rocks_db.write_options.no_slowdown) {
+ write_opts.no_slowdown = srv_->GetConfig()->replication_no_slowdown;
+ }
+
while (true) {
switch (incr_state_) {
case Incr_batch_size: {
diff --git a/src/config/config.cc b/src/config/config.cc
index 28584386f..1fd8f40b8 100644
--- a/src/config/config.cc
+++ b/src/config/config.cc
@@ -203,6 +203,7 @@ Config::Config() {
{"replication-connect-timeout-ms", false, new
IntField(&replication_connect_timeout_ms, 3100, 0, INT_MAX)},
{"replication-recv-timeout-ms", false, new
IntField(&replication_recv_timeout_ms, 3200, 0, INT_MAX)},
{"replication-group-sync", false, new
YesNoField(&replication_group_sync, false)},
+ {"replication-no-slowdown", false, new
YesNoField(&replication_no_slowdown, true)},
{"replication-delay-bytes", false, new
IntField(&max_replication_delay_bytes, 16 * 1024, 1, INT_MAX)},
{"replication-delay-updates", false, new
IntField(&max_replication_delay_updates, 16, 1, INT_MAX)},
{"use-rsid-psync", true, new YesNoField(&use_rsid_psync, false)},
diff --git a/src/config/config.h b/src/config/config.h
index c56a99f0b..036e506e8 100644
--- a/src/config/config.h
+++ b/src/config/config.h
@@ -135,6 +135,7 @@ struct Config {
int fullsync_recv_file_delay = 0;
bool use_rsid_psync = false;
bool replication_group_sync = false;
+ bool replication_no_slowdown = false;
std::vector<std::string> binds;
std::string dir;
std::string db_dir;