This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch scanner-improvements
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit b437f85071b952f8d1a00949675f4b1e7b9f414f
Author: Nick Vatamaniuc <vatam...@gmail.com>
AuthorDate: Tue Aug 26 01:12:12 2025 -0400

    Avoid timeouts in ddoc scanner callback
    
    Previously the `ddoc(St, DbName, #doc{})` scanner callback was called from 
the
    `fabric:all_docs()` context while it was fetching design documents. That can
    work when design documents are just filtered or checked quickly like in the
    feature detector plugin. However, if a plugin wanted to do perform a longer
    running operation the `fabric:all_docs()` call would timeout and crash.
    
    To allow plugins to spend arbitrarily long time in the ddoc calback, switch 
to
    fetching small batches of design documents and then calling the plugin 
`ddoc/3`
    callback outside the `fabric:all_docs()` context.
    
    Thanks to Robert Newson (@rnewson) for the original idea of doing batched
    design doc iteration.
---
 rel/overlay/etc/default.ini                        |  5 ++
 src/couch_scanner/src/couch_scanner_plugin.erl     | 88 ++++++++++++++++------
 .../test/eunit/couch_scanner_test.erl              |  3 +
 src/docs/src/config/scanner.rst                    | 10 +++
 4 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index c2ea4c7ca..efdbf0c76 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -1011,6 +1011,11 @@ url = {{nouveau_url}}
 ; is shared across all running plugins.
 ;doc_rate_limit = 1000
 
+; Batch size to use when fetching design documents. For lots of small design
+; documents this value could be increased to 500 or 1000. If design documents
+; are large (100KB+) it could make sense to decrease it a bit to 25 or 10.
+;ddoc_batch_size = 100
+
 [couch_scanner_plugins]
 ;couch_scanner_plugin_ddoc_features = false
 ;couch_scanner_plugin_find = false
diff --git a/src/couch_scanner/src/couch_scanner_plugin.erl 
b/src/couch_scanner/src/couch_scanner_plugin.erl
index 1ef56378d..1ea6d57e4 100644
--- a/src/couch_scanner/src/couch_scanner_plugin.erl
+++ b/src/couch_scanner/src/couch_scanner_plugin.erl
@@ -171,6 +171,7 @@
 
 -define(CHECKPOINT_INTERVAL_SEC, 10).
 -define(STOP_TIMEOUT_SEC, 5).
+-define(DDOC_BATCH_SIZE, 100).
 
 -record(st, {
     id,
@@ -326,7 +327,7 @@ scan_db([_ | _] = Shards, #st{} = St) ->
             case Go of
                 ok ->
                     St2 = rate_limit(St1, db),
-                    St3 = fold_ddocs(fun scan_ddocs_fold/2, St2),
+                    St3 = scan_ddocs(St2),
                     {Shards1, St4} = shards_callback(St3, Shards),
                     St5 = scan_shards(Shards1, St4),
                     {ok, St5};
@@ -339,16 +340,6 @@ scan_db([_ | _] = Shards, #st{} = St) ->
             {ok, St}
     end.
 
-scan_ddocs_fold({meta, _}, #st{} = Acc) ->
-    {ok, Acc};
-scan_ddocs_fold({row, RowProps}, #st{} = Acc) ->
-    DDoc = couch_util:get_value(doc, RowProps),
-    scan_ddoc(ejson_to_doc(DDoc), Acc);
-scan_ddocs_fold(complete, #st{} = Acc) ->
-    {ok, Acc};
-scan_ddocs_fold({error, Error}, _Acc) ->
-    exit({shutdown, {scan_ddocs_fold, Error}}).
-
 scan_shards([], #st{} = St) ->
     St;
 scan_shards([#shard{} = Shard | Rest], #st{} = St) ->
@@ -654,28 +645,78 @@ shards_by_range(Shards) ->
     Dict = lists:foldl(Fun, orddict:new(), Shards),
     orddict:to_list(Dict).
 
-% Design doc fetching helper
-
-fold_ddocs(Fun, #st{dbname = DbName, mod = Mod} = Acc) ->
+scan_ddocs(#st{mod = Mod} = St) ->
     case is_exported(Mod, ddoc, 3) of
         true ->
-            QArgs = #mrargs{
-                include_docs = true,
-                extra = [{namespace, <<"_design">>}]
-            },
             try
-                {ok, Acc1} = fabric:all_docs(DbName, [?ADMIN_CTX], Fun, Acc, 
QArgs),
-                Acc1
+                fold_ddocs_batched(St, <<?DESIGN_DOC_PREFIX>>)
             catch
                 error:database_does_not_exist ->
-                    Acc
+                    St
             end;
         false ->
             % If the plugin doesn't export the ddoc callback, don't bother 
calling
             % fabric:all_docs, as it's expensive
-            Acc
+            St
+    end.
+
+fold_ddocs_batched(#st{dbname = DbName} = St, <<_/binary>> = StartKey) ->
+    QArgs = #mrargs{
+        include_docs = true,
+        start_key = StartKey,
+        extra = [{namespace, <<?DESIGN_DOC_PREFIX0>>}],
+        % Need limit > 1 for the algorithm below to work
+        limit = max(2, cfg_ddoc_batch_size())
+    },
+    Cbk =
+        fun
+            ({meta, _}, {Cnt, Id, DDocs}) ->
+                {ok, {Cnt, Id, DDocs}};
+            ({row, Props}, {Cnt, _Id, DDocs}) ->
+                EJson = couch_util:get_value(doc, Props),
+                DDoc = #doc{id = Id} = ejson_to_doc(EJson),
+                case Id =:= StartKey of
+                    true ->
+                        % We get there if we're continuing batched iteration so
+                        % we skip this ddoc as we already processed it. In the
+                        % first batch StartKey will be <<"_design/">> and
+                        % that's an invalid document ID so will never match.
+                        {ok, {Cnt + 1, Id, DDocs}};
+                    false ->
+                        {ok, {Cnt + 1, Id, [DDoc | DDocs]}}
+                end;
+            (complete, {Cnt, Id, DDocs}) ->
+                {ok, {Cnt, Id, lists:reverse(DDocs)}};
+            ({error, Error}, {_Cnt, _Id, _DDocs}) ->
+                exit({shutdown, {scan_ddocs_fold, Error}})
+        end,
+    Acc0 = {0, StartKey, []},
+    {ok, {Cnt, LastId, DDocs}} = fabric:all_docs(DbName, [?ADMIN_CTX], Cbk, 
Acc0, QArgs),
+    case scan_ddoc_batch(DDocs, {ok, St}) of
+        {ok, #st{} = St1} ->
+            if
+                is_integer(Cnt), Cnt < QArgs#mrargs.limit ->
+                    % We got less than we asked for so we're done
+                    St1;
+                Cnt == QArgs#mrargs.limit ->
+                    % We got all the docs we asked for, there are probably 
more docs
+                    % so we recurse and fetch the next batch.
+                    fold_ddocs_batched(St1, LastId)
+            end;
+        {stop, #st{} = St1} ->
+            % Plugin wanted to stop scanning ddocs, so we stop
+            St1
     end.
 
+% Call plugin ddocs callback. These may take an arbitrarily long time to
+% process.
+scan_ddoc_batch(_, {stop, #st{} = St}) ->
+    {stop, St};
+scan_ddoc_batch([], {ok, #st{} = St}) ->
+    {ok, St};
+scan_ddoc_batch([#doc{} = DDoc | Rest], {ok, #st{} = St}) ->
+    scan_ddoc_batch(Rest, scan_ddoc(DDoc, St)).
+
 % Simple ejson to #doc{} function to avoid all the extra validation in 
from_json_obj/1.
 % We just got these docs from the cluster, they are already saved on disk.
 ejson_to_doc({[_ | _] = Props}) ->
@@ -708,6 +749,9 @@ cfg(Mod, Key, Default) when is_list(Key) ->
     Section = atom_to_list(Mod),
     config:get(Section, Key, Default).
 
+cfg_ddoc_batch_size() ->
+    config:get_integer("couch_scanner", "ddoc_batch_size", ?DDOC_BATCH_SIZE).
+
 schedule_time(Mod, LastSec, NowSec) ->
     After = cfg(Mod, "after", "restart"),
     Repeat = cfg(Mod, "repeat", "restart"),
diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl 
b/src/couch_scanner/test/eunit/couch_scanner_test.erl
index a7edb6b67..1fe2e157d 100644
--- a/src/couch_scanner/test/eunit/couch_scanner_test.erl
+++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl
@@ -53,6 +53,9 @@ setup() ->
     meck:new(couch_scanner_server, [passthrough]),
     meck:new(couch_scanner_util, [passthrough]),
     Ctx = test_util:start_couch([fabric, couch_scanner]),
+    % Run with the smallest batch size to exercise the batched
+    % ddoc iteration
+    config:set("couch_scanner", "ddoc_batch_size", "2", false),
     DbName1 = <<"dbname1", (?tempdb())/binary>>,
     DbName2 = <<"dbname2", (?tempdb())/binary>>,
     DbName3 = <<"dbname3", (?tempdb())/binary>>,
diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst
index 4be3d9c06..d3644c23f 100644
--- a/src/docs/src/config/scanner.rst
+++ b/src/docs/src/config/scanner.rst
@@ -85,6 +85,16 @@ Scanner Options
             [couch_scanner]
             doc_rate_limit = 1000
 
+    .. config:option:: ddoc_batch_size
+
+        Batch size to use when fetching design documents. For lots of small
+        design documents this value could be increased to 500 or 1000. If
+        design documents are large (100KB+) it could make sense to decrease it
+        a bit to 25 or 10. ::
+
+            [couch_scanner]
+            ddoc_batch_size = 100
+
 .. config:section:: couch_scanner_plugins :: Enable Scanner Plugins
 
     .. config:option:: {plugin}

Reply via email to