This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new c0a967365 Add jitter for scanner plugins
c0a967365 is described below

commit c0a967365e059faf6869bdda2da6ccd215155e56
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Tue Mar 24 01:00:48 2026 -0400

    Add jitter for scanner plugins
    
    Add some jitter (10% by default) to scanner plugin periods. This should help
    avoid a "thundering heard" effect on the cluster if plugins are set up to 
start
    at the same time on all the cluster nodes.
    
    Jitter is configurable per-plugin. Config format is `$num_percent` or
    `$num_$timeunit`. The `$num_percent` format configures the maximum jitter 
value
    as a period percentage. For example 10% of `period = 24_hours` would be 2.4
    hours. The `$num_$timeunit` is the same format as a period itself, so it can
    take values like `5_min`, `2_hours`, etc.
---
 rel/overlay/etc/default.ini                    |  5 ++
 src/couch_scanner/src/couch_scanner_plugin.erl |  3 +-
 src/couch_scanner/src/couch_scanner_util.erl   | 69 +++++++++++++++++++++++---
 src/docs/src/config/scanner.rst                | 16 +++++-
 4 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 558542fed..bb017fb44 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1127,6 +1127,11 @@ url = {{nouveau_url}}
 ;  * $num_$timeunit: 1000_sec, 30_min, 8_hours, 24_hour, 2_days, 3_weeks, 
1_month
 ;  * $weekday: mon, monday, Thu, thursdays
 ;repeat = restart
+;
+; How much jitter to apply to the period. Possible formats are:
+;  * $num_percent: percent of period value
+;  * $num_timeunit: 1000_sec, 30_min, 8_hours, 24_hour, 2_days, 3_weeks, 
1_month
+;jitter = 10_percent
 
 ;[$plugin.skips_dbs]
 ; Skip over databases if their names contain any of the strings in this 
section.
diff --git a/src/couch_scanner/src/couch_scanner_plugin.erl 
b/src/couch_scanner/src/couch_scanner_plugin.erl
index b31ed949f..cbb42bd90 100644
--- a/src/couch_scanner/src/couch_scanner_plugin.erl
+++ b/src/couch_scanner/src/couch_scanner_plugin.erl
@@ -788,8 +788,9 @@ cfg_ddoc_batch_size() ->
 schedule_time(Mod, LastSec, NowSec) ->
     After = cfg(Mod, "after", "restart"),
     Repeat = cfg(Mod, "repeat", "restart"),
+    Jitter = cfg(Mod, "jitter", "10_percent"),
     Restart = couch_scanner_util:restart_tsec(),
-    couch_scanner_util:schedule_time(NowSec, LastSec, Restart, After, Repeat).
+    couch_scanner_util:schedule_time(NowSec, LastSec, Restart, After, Repeat, 
Jitter).
 
 tsec() ->
     erlang:system_time(second).
diff --git a/src/couch_scanner/src/couch_scanner_util.erl 
b/src/couch_scanner/src/couch_scanner_util.erl
index a6bafe973..b414a195f 100644
--- a/src/couch_scanner/src/couch_scanner_util.erl
+++ b/src/couch_scanner/src/couch_scanner_util.erl
@@ -17,7 +17,7 @@
     log/5,
     ejson_map/1,
     restart_tsec/0,
-    schedule_time/5,
+    schedule_time/6,
     load_regexes/1,
     compile_regexes/1,
     match_regexes/2,
@@ -60,7 +60,7 @@ consistent_hash_nodes(Item) ->
     Nodes = mem3_util:live_nodes(),
     hd(mem3_util:rotate_list(Item, Nodes)) =:= node().
 
-schedule_time(Now, Last, Restart, AfterCfg, RepeatCfg) when
+schedule_time(Now, Last, Restart, AfterCfg, RepeatCfg, JitterCfg) when
     is_integer(Now), is_integer(Restart), is_integer(Last)
 ->
     RepeatPeriod = repeat_period(Now, Last, parse_repeat(RepeatCfg)),
@@ -77,14 +77,47 @@ schedule_time(Now, Last, Restart, AfterCfg, RepeatCfg) when
         {After, undefined} when is_integer(After), Last < After ->
             % Run once, haven't run yet, schedule to run
             max(Now, After);
-        {undefined, Period} ->
+        {undefined, Period} when is_integer(Period) ->
             % No after time, just period. Either need to wait
             % since last time it ran, or is actually ready to run
-            max(Now, Last + Period);
-        {After, Period} ->
+            Jitter = rand:uniform(jitter(JitterCfg, Period)),
+            max(Now, Last + Period + Jitter);
+        {After, Period} when is_integer(After), is_integer(Period) ->
             % Both after time set and a period. Wait for whichever
             % takes the longest
-            lists:max([Now, After, Last + Period])
+            Jitter = rand:uniform(jitter(JitterCfg, Period)),
+            lists:max([Now, After, Last + Period + Jitter])
+    end.
+
+% Parse jitter configuration as number of seconds.
+%
+% JitterCfg formats can be:
+%   N_percent : where N is value 0-100 and then it return N% of Period
+%   N_Unit : where N is a number and Unit is any unit (parse_period_unit/1 can 
parse)
+%
+% Result will always be in the range of [1, Period] seconds.
+%
+jitter(JitterCfg, Period) when is_integer(Period), Period > 0 ->
+    try string:split(JitterCfg, "_") of
+        [PctStr, "percent"] ->
+            try list_to_integer(PctStr) of
+                Pct ->
+                    Val = round(Period * Pct / 100),
+                    max(1, min(Period, Val))
+            catch
+                _:_ ->
+                    1
+            end;
+        [_, _] ->
+            case parse_non_weekday_period(JitterCfg) of
+                undefined -> 1;
+                Val when is_integer(Val), Val > 0 -> min(Period, Val)
+            end;
+        _ ->
+            1
+    catch
+        _:_ ->
+            1
     end.
 
 load_regexes(KVs) when is_list(KVs) ->
@@ -342,6 +375,30 @@ repeat_period_test() ->
     ?assertEqual(?WEEK, repeat_period(Now, Now - 1, {weekday, 5})),
     ?assertEqual(1 * ?DAY, repeat_period(Now, Now - 999999, {weekday, 6})).
 
+jitter_test() ->
+    ?assertEqual(1, jitter("foo", 1)),
+    ?assertEqual(1, jitter(undefined, 1)),
+    ?assertEqual(1, jitter("", 1)),
+    ?assertEqual(1, jitter("_", 1)),
+    ?assertEqual(1, jitter("1_", 1)),
+    ?assertEqual(1, jitter("_percent", 1)),
+    ?assertEqual(1, jitter("1", 1)),
+    ?assertEqual(1, jitter("X_percent", 1)),
+    ?assertEqual(1, jitter("Z_seconds", 1)),
+    ?assertEqual(1, jitter("1_percent_years", 1)),
+    ?assertEqual(1, jitter("0_percent", 1)),
+    ?assertEqual(1, jitter("50_percent", 1)),
+    ?assertEqual(1, jitter("100_percent", 1)),
+    ?assertEqual(1, jitter("100000000000_percent", 1)),
+    ?assertEqual(1, jitter("1_sec", 1)),
+    ?assertEqual(1, jitter("2_sec", 1)),
+    ?assertEqual(2, jitter("2_sec", 2)),
+    ?assertEqual(2, jitter("2_sec", 3)),
+    ?assertEqual(50, jitter("50_percent", 100)),
+    ?assertEqual(100, jitter("100_percent", 100)),
+    ?assertEqual(100, jitter("10000000000_percent", 100)),
+    ?assertEqual(100, jitter("10000000000_years", 100)).
+
 regex_compile_test() ->
     KVs = [{"x", "a[d-f]"}, {"y", "**"}],
     Regexes = load_regexes(KVs),
diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst
index cb49acd6f..98fe2e67c 100644
--- a/src/docs/src/config/scanner.rst
+++ b/src/docs/src/config/scanner.rst
@@ -132,8 +132,8 @@ settings in their ``[{plugin}]`` section.
 
     .. config:option:: repeat
 
-        Run the plugin periodically. By default it will run once after node the
-        node starts. Possible period formats are: ``{num}_{timeunit}`` (ex.:
+        Run the plugin periodically. By default it will run once after node
+        starts. Possible period formats are: ``{num}_{timeunit}`` (ex.:
         ``1000_sec``, ``30_min``, ``8_hours``, ``24_hour``, ``2_days``,
         ``3_weeks``, ``1_month``) or ``{weekday}`` (ex.: ``mon``, ``monday``,
         ``Thu``, etc.) ::
@@ -141,6 +141,18 @@ settings in their ``[{plugin}]`` section.
           [{plugin}]
           repeat = restart
 
+    .. config:option:: jitter
+
+        How much jitter to apply to the period. The default is 10% of the
+        period value. Jitter can spread the load on the cluster by adding some
+        randomness to when the plugins start. Possible formats are
+        ``{num}_percent`` (ex.: ``25_percent``) or ``{num}_{timeunit}`` (ex.:
+        ``1000_sec``, ``30_min``, ``8_hours``, ``24_hour``, ``2_days``). The
+        default is ``10_percent``, which means 10% of the period value ::
+
+          [{plugin}]
+          jitter = 10_percent
+
 .. config:section:: {plugin}.skip_dbs :: Skip databases
 
     .. config:option:: {tag}

Reply via email to