This is an automated email from the ASF dual-hosted git repository.

nickva pushed a commit to branch optimize-view-building
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit e84b4f6b73389ac76932e1afc021806aed9312d9
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Wed Apr 29 00:25:30 2026 -0400

    Do not run full GC after each doc update in the indexer
    
    Run it after 1000 docs, ensuring it still forced to run as a safe default.
    However, in OTP 27+ this should not be needed at all. So infinity is also a
    possible config setting, we may change to that in the future.
    
    Comments from John Hogberg in OTP repo regarind behavior change in OTP 27:
    
    https://github.com/erlang/otp/issues/8229#issuecomment-1988858134
    
    > the GC pressure of off-heap binaries ("vheap") was vastly under-counted 
prior
    to 24ef4cb [1]. In these tests this caused it to GC less often and, 
crucially, when
    there was less live data to keep.
    
    [1]
    
https://github.com/erlang/otp/commit/24ef4cbaeda9b9c26682cba75f2f15b0c58722aa
---
 rel/overlay/etc/default.ini                 |  9 ++++++++-
 src/couch_index/src/couch_index_updater.erl | 23 ++++++++++++++++-------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index bb017fb44..df24e1d44 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -491,7 +491,7 @@ hash_algorithms = sha256, sha
 ;max_write_delay = 500
 ;update_db = true
 
-;[view_updater]
+[view_updater]
 ; Configure the queue capacity used during indexing. These settings apply to
 ; both the queue between the changes feed and the JS mapper, and between the
 ; JS mapper and the disk writer.
@@ -505,6 +505,13 @@ hash_algorithms = sha256, sha
 ;min_writer_items = 100
 ;min_writer_size = 16777216
 
+; After how many processed docs to run garbage collection in view index updater
+; process (infinity is a also a possible setting, to let the Erlang VM run GC
+; as it sees fit), version >= 27 should do that much better than before
+; Previously in versions < 3.5.1 this this was running after every single doc
+; update
+;gc_interval_docs = 1000
+
 [couch_httpd_auth]
 ; WARNING! This only affects the node-local port (5986 by default).
 ; You probably want the settings under [chttpd].
diff --git a/src/couch_index/src/couch_index_updater.erl 
b/src/couch_index/src/couch_index_updater.erl
index c7596e90d..3ae18750a 100644
--- a/src/couch_index/src/couch_index_updater.erl
+++ b/src/couch_index/src/couch_index_updater.erl
@@ -159,15 +159,24 @@ update(Idx, Mod, IdxState) ->
             end
         end,
 
-        Proc = fun(DocInfo, {IdxStateAcc, _}) ->
-            case CommittedOnly and (GetSeq(DocInfo) > DbCommittedSeq) of
+        GcInterval = config:get_integer_or_infinity("view_updater", 
"gc_interval_docs", 1000),
+        Proc = fun(DocInfo, {IdxStateAcc, _, NDocs}) ->
+            case CommittedOnly andalso (GetSeq(DocInfo) > DbCommittedSeq) of
                 true ->
-                    {stop, {IdxStateAcc, false}};
+                    {stop, {IdxStateAcc, false, NDocs}};
                 false ->
                     {Doc, Seq} = LoadDoc(DocInfo),
                     {ok, NewSt} = Mod:process_doc(Doc, Seq, IdxStateAcc),
-                    garbage_collect(),
-                    {ok, {NewSt, true}}
+                    NDocs1 = NDocs + 1,
+                    case GcInterval of
+                        infinity ->
+                            ok;
+                        _ when NDocs1 rem GcInterval == 0 ->
+                            garbage_collect();
+                        _ ->
+                            ok
+                    end,
+                    {ok, {NewSt, true, NDocs1}}
             end
         end,
         {ok, InitIdxState} = Mod:start_update(
@@ -177,9 +186,9 @@ update(Idx, Mod, IdxState) ->
             NumPurgeChanges
         ),
 
-        Acc0 = {InitIdxState, true},
+        Acc0 = {InitIdxState, true, 0},
         {ok, Acc} = couch_db:fold_changes(Db, CurrSeq, Proc, Acc0, []),
-        {ProcIdxSt, SendLast} = Acc,
+        {ProcIdxSt, SendLast, _} = Acc,
 
         % If we didn't bail due to hitting the last committed seq we need
         % to send our last update_seq through.

Reply via email to