This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch scanner-improvements in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit b437f85071b952f8d1a00949675f4b1e7b9f414f Author: Nick Vatamaniuc <vatam...@gmail.com> AuthorDate: Tue Aug 26 01:12:12 2025 -0400 Avoid timeouts in ddoc scanner callback Previously the `ddoc(St, DbName, #doc{})` scanner callback was called from the `fabric:all_docs()` context while it was fetching design documents. That can work when design documents are just filtered or checked quickly like in the feature detector plugin. However, if a plugin wanted to do perform a longer running operation the `fabric:all_docs()` call would timeout and crash. To allow plugins to spend arbitrarily long time in the ddoc calback, switch to fetching small batches of design documents and then calling the plugin `ddoc/3` callback outside the `fabric:all_docs()` context. Thanks to Robert Newson (@rnewson) for the original idea of doing batched design doc iteration. --- rel/overlay/etc/default.ini | 5 ++ src/couch_scanner/src/couch_scanner_plugin.erl | 88 ++++++++++++++++------ .../test/eunit/couch_scanner_test.erl | 3 + src/docs/src/config/scanner.rst | 10 +++ 4 files changed, 84 insertions(+), 22 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index c2ea4c7ca..efdbf0c76 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -1011,6 +1011,11 @@ url = {{nouveau_url}} ; is shared across all running plugins. ;doc_rate_limit = 1000 +; Batch size to use when fetching design documents. For lots of small design +; documents this value could be increased to 500 or 1000. If design documents +; are large (100KB+) it could make sense to decrease it a bit to 25 or 10. +;ddoc_batch_size = 100 + [couch_scanner_plugins] ;couch_scanner_plugin_ddoc_features = false ;couch_scanner_plugin_find = false diff --git a/src/couch_scanner/src/couch_scanner_plugin.erl b/src/couch_scanner/src/couch_scanner_plugin.erl index 1ef56378d..1ea6d57e4 100644 --- a/src/couch_scanner/src/couch_scanner_plugin.erl +++ b/src/couch_scanner/src/couch_scanner_plugin.erl @@ -171,6 +171,7 @@ -define(CHECKPOINT_INTERVAL_SEC, 10). -define(STOP_TIMEOUT_SEC, 5). +-define(DDOC_BATCH_SIZE, 100). -record(st, { id, @@ -326,7 +327,7 @@ scan_db([_ | _] = Shards, #st{} = St) -> case Go of ok -> St2 = rate_limit(St1, db), - St3 = fold_ddocs(fun scan_ddocs_fold/2, St2), + St3 = scan_ddocs(St2), {Shards1, St4} = shards_callback(St3, Shards), St5 = scan_shards(Shards1, St4), {ok, St5}; @@ -339,16 +340,6 @@ scan_db([_ | _] = Shards, #st{} = St) -> {ok, St} end. -scan_ddocs_fold({meta, _}, #st{} = Acc) -> - {ok, Acc}; -scan_ddocs_fold({row, RowProps}, #st{} = Acc) -> - DDoc = couch_util:get_value(doc, RowProps), - scan_ddoc(ejson_to_doc(DDoc), Acc); -scan_ddocs_fold(complete, #st{} = Acc) -> - {ok, Acc}; -scan_ddocs_fold({error, Error}, _Acc) -> - exit({shutdown, {scan_ddocs_fold, Error}}). - scan_shards([], #st{} = St) -> St; scan_shards([#shard{} = Shard | Rest], #st{} = St) -> @@ -654,28 +645,78 @@ shards_by_range(Shards) -> Dict = lists:foldl(Fun, orddict:new(), Shards), orddict:to_list(Dict). -% Design doc fetching helper - -fold_ddocs(Fun, #st{dbname = DbName, mod = Mod} = Acc) -> +scan_ddocs(#st{mod = Mod} = St) -> case is_exported(Mod, ddoc, 3) of true -> - QArgs = #mrargs{ - include_docs = true, - extra = [{namespace, <<"_design">>}] - }, try - {ok, Acc1} = fabric:all_docs(DbName, [?ADMIN_CTX], Fun, Acc, QArgs), - Acc1 + fold_ddocs_batched(St, <<?DESIGN_DOC_PREFIX>>) catch error:database_does_not_exist -> - Acc + St end; false -> % If the plugin doesn't export the ddoc callback, don't bother calling % fabric:all_docs, as it's expensive - Acc + St + end. + +fold_ddocs_batched(#st{dbname = DbName} = St, <<_/binary>> = StartKey) -> + QArgs = #mrargs{ + include_docs = true, + start_key = StartKey, + extra = [{namespace, <<?DESIGN_DOC_PREFIX0>>}], + % Need limit > 1 for the algorithm below to work + limit = max(2, cfg_ddoc_batch_size()) + }, + Cbk = + fun + ({meta, _}, {Cnt, Id, DDocs}) -> + {ok, {Cnt, Id, DDocs}}; + ({row, Props}, {Cnt, _Id, DDocs}) -> + EJson = couch_util:get_value(doc, Props), + DDoc = #doc{id = Id} = ejson_to_doc(EJson), + case Id =:= StartKey of + true -> + % We get there if we're continuing batched iteration so + % we skip this ddoc as we already processed it. In the + % first batch StartKey will be <<"_design/">> and + % that's an invalid document ID so will never match. + {ok, {Cnt + 1, Id, DDocs}}; + false -> + {ok, {Cnt + 1, Id, [DDoc | DDocs]}} + end; + (complete, {Cnt, Id, DDocs}) -> + {ok, {Cnt, Id, lists:reverse(DDocs)}}; + ({error, Error}, {_Cnt, _Id, _DDocs}) -> + exit({shutdown, {scan_ddocs_fold, Error}}) + end, + Acc0 = {0, StartKey, []}, + {ok, {Cnt, LastId, DDocs}} = fabric:all_docs(DbName, [?ADMIN_CTX], Cbk, Acc0, QArgs), + case scan_ddoc_batch(DDocs, {ok, St}) of + {ok, #st{} = St1} -> + if + is_integer(Cnt), Cnt < QArgs#mrargs.limit -> + % We got less than we asked for so we're done + St1; + Cnt == QArgs#mrargs.limit -> + % We got all the docs we asked for, there are probably more docs + % so we recurse and fetch the next batch. + fold_ddocs_batched(St1, LastId) + end; + {stop, #st{} = St1} -> + % Plugin wanted to stop scanning ddocs, so we stop + St1 end. +% Call plugin ddocs callback. These may take an arbitrarily long time to +% process. +scan_ddoc_batch(_, {stop, #st{} = St}) -> + {stop, St}; +scan_ddoc_batch([], {ok, #st{} = St}) -> + {ok, St}; +scan_ddoc_batch([#doc{} = DDoc | Rest], {ok, #st{} = St}) -> + scan_ddoc_batch(Rest, scan_ddoc(DDoc, St)). + % Simple ejson to #doc{} function to avoid all the extra validation in from_json_obj/1. % We just got these docs from the cluster, they are already saved on disk. ejson_to_doc({[_ | _] = Props}) -> @@ -708,6 +749,9 @@ cfg(Mod, Key, Default) when is_list(Key) -> Section = atom_to_list(Mod), config:get(Section, Key, Default). +cfg_ddoc_batch_size() -> + config:get_integer("couch_scanner", "ddoc_batch_size", ?DDOC_BATCH_SIZE). + schedule_time(Mod, LastSec, NowSec) -> After = cfg(Mod, "after", "restart"), Repeat = cfg(Mod, "repeat", "restart"), diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl b/src/couch_scanner/test/eunit/couch_scanner_test.erl index a7edb6b67..1fe2e157d 100644 --- a/src/couch_scanner/test/eunit/couch_scanner_test.erl +++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl @@ -53,6 +53,9 @@ setup() -> meck:new(couch_scanner_server, [passthrough]), meck:new(couch_scanner_util, [passthrough]), Ctx = test_util:start_couch([fabric, couch_scanner]), + % Run with the smallest batch size to exercise the batched + % ddoc iteration + config:set("couch_scanner", "ddoc_batch_size", "2", false), DbName1 = <<"dbname1", (?tempdb())/binary>>, DbName2 = <<"dbname2", (?tempdb())/binary>>, DbName3 = <<"dbname3", (?tempdb())/binary>>, diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst index 4be3d9c06..d3644c23f 100644 --- a/src/docs/src/config/scanner.rst +++ b/src/docs/src/config/scanner.rst @@ -85,6 +85,16 @@ Scanner Options [couch_scanner] doc_rate_limit = 1000 + .. config:option:: ddoc_batch_size + + Batch size to use when fetching design documents. For lots of small + design documents this value could be increased to 500 or 1000. If + design documents are large (100KB+) it could make sense to decrease it + a bit to 25 or 10. :: + + [couch_scanner] + ddoc_batch_size = 100 + .. config:section:: couch_scanner_plugins :: Enable Scanner Plugins .. config:option:: {plugin}