This is an automated email from the ASF dual-hosted git repository. nickva pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit e979f7f8b448043b95f68214faf73790e4d2cfd1 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Wed Apr 29 00:25:30 2026 -0400 Do not run full GC after each doc update in the indexer Run it after 1000 docs, ensuring it still forced to run as a safe default. However, in OTP 27+ this should not be needed at all. So infinity is also a possible config setting, we may change to that in the future. Comments from John Hogberg in OTP repo regarind behavior change in OTP 27: https://github.com/erlang/otp/issues/8229#issuecomment-1988858134 > the GC pressure of off-heap binaries ("vheap") was vastly under-counted prior to 24ef4cb [1]. In these tests this caused it to GC less often and, crucially, when there was less live data to keep. [1] https://github.com/erlang/otp/commit/24ef4cbaeda9b9c26682cba75f2f15b0c58722aa --- rel/overlay/etc/default.ini | 9 ++++++++- src/couch_index/src/couch_index_updater.erl | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index bb017fb44..df24e1d44 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -491,7 +491,7 @@ hash_algorithms = sha256, sha ;max_write_delay = 500 ;update_db = true -;[view_updater] +[view_updater] ; Configure the queue capacity used during indexing. These settings apply to ; both the queue between the changes feed and the JS mapper, and between the ; JS mapper and the disk writer. @@ -505,6 +505,13 @@ hash_algorithms = sha256, sha ;min_writer_items = 100 ;min_writer_size = 16777216 +; After how many processed docs to run garbage collection in view index updater +; process (infinity is a also a possible setting, to let the Erlang VM run GC +; as it sees fit), version >= 27 should do that much better than before +; Previously in versions < 3.5.1 this this was running after every single doc +; update +;gc_interval_docs = 1000 + [couch_httpd_auth] ; WARNING! This only affects the node-local port (5986 by default). ; You probably want the settings under [chttpd]. diff --git a/src/couch_index/src/couch_index_updater.erl b/src/couch_index/src/couch_index_updater.erl index c7596e90d..3ae18750a 100644 --- a/src/couch_index/src/couch_index_updater.erl +++ b/src/couch_index/src/couch_index_updater.erl @@ -159,15 +159,24 @@ update(Idx, Mod, IdxState) -> end end, - Proc = fun(DocInfo, {IdxStateAcc, _}) -> - case CommittedOnly and (GetSeq(DocInfo) > DbCommittedSeq) of + GcInterval = config:get_integer_or_infinity("view_updater", "gc_interval_docs", 1000), + Proc = fun(DocInfo, {IdxStateAcc, _, NDocs}) -> + case CommittedOnly andalso (GetSeq(DocInfo) > DbCommittedSeq) of true -> - {stop, {IdxStateAcc, false}}; + {stop, {IdxStateAcc, false, NDocs}}; false -> {Doc, Seq} = LoadDoc(DocInfo), {ok, NewSt} = Mod:process_doc(Doc, Seq, IdxStateAcc), - garbage_collect(), - {ok, {NewSt, true}} + NDocs1 = NDocs + 1, + case GcInterval of + infinity -> + ok; + _ when NDocs1 rem GcInterval == 0 -> + garbage_collect(); + _ -> + ok + end, + {ok, {NewSt, true, NDocs1}} end end, {ok, InitIdxState} = Mod:start_update( @@ -177,9 +186,9 @@ update(Idx, Mod, IdxState) -> NumPurgeChanges ), - Acc0 = {InitIdxState, true}, + Acc0 = {InitIdxState, true, 0}, {ok, Acc} = couch_db:fold_changes(Db, CurrSeq, Proc, Acc0, []), - {ProcIdxSt, SendLast} = Acc, + {ProcIdxSt, SendLast, _} = Acc, % If we didn't bail due to hitting the last committed seq we need % to send our last update_seq through.
