This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/main by this push:
     new 25e126917 Avoid creating purge checkpoints for invalid views
25e126917 is described below

commit 25e1269170e7d056d9c5242946224900eb5f759f
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Sat Mar 7 00:21:00 2026 -0500

    Avoid creating purge checkpoints for invalid views
    
    Previously, if we successfully parsed a design document into an #mrst{} 
record,
    we would create a purge checkpoint even if the view group didn't have any
    views, or used an invalid language. Normally, having extra unused 
checkpoints
    around would be just bit untidy, however, when we start purging, these
    checkpoints won't be advancing, and after a while prevent the compactor from
    compacting the purge sequences and advancing the minimum purge sequence
    forward.
    
    In other words, we need to differentiate between pars-able #mrst{} view 
groups
    and those that can actually build on a particular node. Ken (our background
    index builder) was already performing the same check, so simply move the 
check
    to the proc manager and the the indexing utility modules so it can be 
reused.
    
    In the proc manager, noticed that the repeated OS environment fetching and
    parsing was kind of time consuming (100s of microseconds) so used a 
persistent
    term.
---
 src/couch/src/couch_proc_manager.erl        | 32 +++++++++-
 src/couch_index/src/couch_index_util.erl    |  1 +
 src/couch_mrview/src/couch_mrview_index.erl | 17 +++--
 src/couch_mrview/src/couch_mrview_util.erl  | 35 +++++++++--
 src/fabric/test/eunit/fabric_tests.erl      | 96 ++++++++++++++++++++++++++++-
 src/ken/src/ken_server.erl                  | 23 +------
 6 files changed, 173 insertions(+), 31 deletions(-)

diff --git a/src/couch/src/couch_proc_manager.erl 
b/src/couch/src/couch_proc_manager.erl
index aa538e23e..ea4a76001 100644
--- a/src/couch/src/couch_proc_manager.erl
+++ b/src/couch/src/couch_proc_manager.erl
@@ -25,8 +25,7 @@
     new_proc/1,
     reload/0,
     terminate_stale_procs/0,
-    get_servers_from_env/1,
-    native_query_server_enabled/0
+    allowed_languages/0
 ]).
 
 -export([
@@ -503,6 +502,18 @@ split_by_char(String, Char) ->
     {Key, Value}.
 
 get_servers_from_env(Spec) ->
+    case persistent_term:get({?MODULE, Spec}, undefined) of
+        undefined ->
+            % Save some time since os:getenv() can potentially be very large
+            % and we don't want to reparse it all every time.
+            Val = get_servers_from_env_raw(Spec),
+            persistent_term:put({?MODULE, Spec}, Val),
+            Val;
+        Val ->
+            Val
+    end.
+
+get_servers_from_env_raw(Spec) ->
     SpecLen = length(Spec),
     % loop over os:getenv(), match SPEC_
     lists:filtermap(
@@ -517,6 +528,23 @@ get_servers_from_env(Spec) ->
         os:getenv()
     ).
 
+% Get the list of all the languages we know how to handle. Some are built-in
+% and some are configurable via config settings or env vars
+%
+allowed_languages() ->
+    % These are always available
+    BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>],
+    Config =
+        get_servers_from_env("COUCHDB_QUERY_SERVER_") ++
+            get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"),
+    Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- 
Config],
+    Allowed =
+        case native_query_server_enabled() of
+            true -> [<<"erlang">> | Allowed0];
+            _Else -> Allowed0
+        end,
+    lists:usort(BuiltIn ++ Allowed).
+
 configure_language_servers() ->
     ets:delete_all_objects(?SERVERS),
     % NOTE: process environment variables cannot be altered except by
diff --git a/src/couch_index/src/couch_index_util.erl 
b/src/couch_index/src/couch_index_util.erl
index 9a16d06d6..5812926af 100644
--- a/src/couch_index/src/couch_index_util.erl
+++ b/src/couch_index/src/couch_index_util.erl
@@ -15,6 +15,7 @@
 -export([root_dir/0, index_dir/2, index_file/3]).
 -export([load_doc/3, sort_lib/1, hexsig/1]).
 -export([get_purge_checkpoints/2, cleanup_purges/3]).
+-export([delete_checkpoint/2]).
 
 -include_lib("couch/include/couch_db.hrl").
 
diff --git a/src/couch_mrview/src/couch_mrview_index.erl 
b/src/couch_mrview/src/couch_mrview_index.erl
index 164227b49..f0a649b77 100644
--- a/src/couch_mrview/src/couch_mrview_index.erl
+++ b/src/couch_mrview/src/couch_mrview_index.erl
@@ -292,13 +292,22 @@ ensure_local_purge_docs(DbName, DDocs) ->
     end).
 
 ensure_local_purge_doc(Db, #mrst{} = State) ->
+    IsValid = couch_mrview_util:mrst_has_valid_views(State),
     Sig = couch_index_util:hexsig(get(signature, State)),
     DocId = couch_mrview_util:get_local_purge_doc_id(Sig),
-    case couch_db:open_doc(Db, DocId, []) of
-        {not_found, _} ->
+    case {IsValid, couch_db:open_doc(Db, DocId, [])} of
+        {true, {not_found, _}} ->
+            % Is valid and doc doesn't exist => create it
             create_local_purge_doc(Db, State);
-        {ok, _} ->
-            ok
+        {false, {not_found, _}} ->
+            % Invalid and doc doesn't exist => do nothing
+            ok;
+        {true, {ok, _}} ->
+            % Is valid and doc exists already => do nothing
+            ok;
+        {false, {ok, _}} ->
+            % Invalid and doc exists => cleanup
+            couch_index_util:delete_checkpoint(Db, DocId)
     end.
 
 create_local_purge_doc(Db, State) ->
diff --git a/src/couch_mrview/src/couch_mrview_util.erl 
b/src/couch_mrview/src/couch_mrview_util.erl
index 494f5b3ec..48138bf76 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -18,6 +18,7 @@
 -export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]).
 -export([get_signatures_from_ddocs/2]).
 -export([ddoc_to_mrst/2, init_state/4, reset_index/3]).
+-export([mrst_has_valid_views/1]).
 -export([make_header/1]).
 -export([index_file/2, compaction_file/2, open_file/1]).
 -export([delete_files/2, delete_index_file/2, delete_compaction_file/2]).
@@ -112,12 +113,23 @@ get_signatures(Db) ->
     get_signatures_from_ddocs(DbName, DDocs1).
 
 % From a list of design #doc{} records returns signatures map: #{Sig => true}
-%
+% This will be valid signatures of views we expect to run and build on this
+% node.
 get_signatures_from_ddocs(DbName, DDocs) when is_list(DDocs) ->
     FoldFun = fun(#doc{} = Doc, Acc) ->
-        {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
-        Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
-        Acc#{Sig => true}
+        try ddoc_to_mrst(DbName, Doc) of
+            {ok, Mrst} ->
+                case couch_mrview_util:mrst_has_valid_views(Mrst) of
+                    true ->
+                        Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+                        Acc#{Sig => true};
+                    false ->
+                        Acc
+                end
+        catch
+            _:_ ->
+                Acc
+        end
     end,
     lists:foldl(FoldFun, #{}, DDocs).
 
@@ -346,6 +358,21 @@ view_sig_term(BaseSig, UpdateSeq, PurgeSeq) ->
 view_sig_term(BaseSig, UpdateSeq, PurgeSeq, Args) ->
     {BaseSig, UpdateSeq, PurgeSeq, Args}.
 
+% We can construct an MRSt from design document which may have languages this
+% server doesn't support, or may have no views. That view group won't build on
+% this server, and we can use this utility function to filter it out. Ken and
+% purge checkpoint cleanups use this function.
+%
+mrst_has_valid_views(MRSt) ->
+    case couch_mrview_index:get(views, MRSt) of
+        [_ | _] ->
+            Language = couch_mrview_index:get(language, MRSt),
+            AllowedLanguages = couch_proc_manager:allowed_languages(),
+            lists:member(Language, AllowedLanguages);
+        [] ->
+            false
+    end.
+
 init_state(Db, Fd, #mrst{views = Views} = State, nil) ->
     PurgeSeq = couch_db:get_purge_seq(Db),
     Header = #mrheader{
diff --git a/src/fabric/test/eunit/fabric_tests.erl 
b/src/fabric/test/eunit/fabric_tests.erl
index c092f4473..daf018231 100644
--- a/src/fabric/test/eunit/fabric_tests.erl
+++ b/src/fabric/test/eunit/fabric_tests.erl
@@ -27,7 +27,9 @@ cleanup_index_files_test_() ->
             ?TDEF_FE(t_cleanup_index_files_with_view_data),
             ?TDEF_FE(t_cleanup_index_files_with_deleted_db),
             ?TDEF_FE(t_cleanup_index_file_after_ddoc_update),
-            ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete)
+            ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete),
+            ?TDEF_FE(t_cleanup_empty_view_checkpoints),
+            ?TDEF_FE(t_cleanup_disallowed_language_checkpoints)
         ]
     }.
 
@@ -151,6 +153,76 @@ t_cleanup_index_file_after_ddoc_delete({_, DbName}) ->
     ?assertEqual([], indices(DbName)),
     ?assertEqual([], purges(DbName)).
 
+t_cleanup_empty_view_checkpoints({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    update_empty_ddoc(DbName, <<"_design/foo">>),
+    {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>),
+    ok = fabric:cleanup_index_files_all_nodes(DbName),
+
+    % One 4bc stays, da8 should gone. If it weren't for the check to
+    % empty views it would have used signature 
"3e823c2a4383ac0c18d4e574135a5b08"
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>
+        ],
+        purges(DbName)
+    ).
+
+t_cleanup_disallowed_language_checkpoints({_, DbName}) ->
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view",
+            "da817c3d3f7413c1a610f25635a0c521.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>,
+            <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">>
+        ],
+        purges(DbName)
+    ),
+
+    update_invalid_language(DbName, <<"_design/foo">>, <<"bar">>),
+    {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>),
+    ok = fabric:cleanup_index_files_all_nodes(DbName),
+
+    % One 4bc stays, the new view uses an invalid language which can't index 
with
+    % so we don't create a checkpoint for it
+    ?assertEqual(
+        [
+            "4bcdf852098ff6b0578ddf472c320e9c.view"
+        ],
+        indices(DbName)
+    ),
+    ?assertEqual(
+        [
+            <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>
+        ],
+        purges(DbName)
+    ).
+
 shard_names(DbName) ->
     [mem3:name(S) || S <- mem3:local_shards(DbName)].
 
@@ -239,6 +311,28 @@ update_ddoc(DbName, DDocId, ViewName) ->
     },
     fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
 
+update_empty_ddoc(DbName, DDocId) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{body = {[]}},
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
+update_invalid_language(DbName, DDocId, ViewName) ->
+    {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]),
+    DDoc = DDoc0#doc{
+        body =
+            {[
+                {<<"language">>, <<"cobol">>},
+                {<<"views">>,
+                    {[
+                        {ViewName,
+                            {[
+                                {<<"map">>, <<"MAIN-PROCEDURE. PERFORM MAP">>}
+                            ]}}
+                    ]}}
+            ]}
+    },
+    fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]).
+
 delete_ddoc(DbName, DDocId) ->
     {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]),
     DDoc = DDoc0#doc{deleted = true, body = {[]}},
diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl
index c51ae4304..26c611eae 100644
--- a/src/ken/src/ken_server.erl
+++ b/src/ken/src/ken_server.erl
@@ -361,15 +361,12 @@ should_update(#doc{body = {Props}}, IndexType) ->
     end.
 
 update_ddoc_views(Name, MRSt, Seq, State) ->
-    Language = couch_mrview_index:get(language, MRSt),
-    Allowed = lists:member(Language, allowed_languages()),
-    Views = couch_mrview_index:get(views, MRSt),
-    if
-        Allowed andalso Views =/= [] ->
+    case couch_mrview_util:mrst_has_valid_views(MRSt) of
+        true ->
             {ok, Pid} = couch_index_server:get_index(couch_mrview_index, MRSt),
             GroupName = couch_mrview_index:get(idx_name, MRSt),
             maybe_start_job({Name, GroupName}, Pid, Seq, State);
-        true ->
+        false ->
             ok
     end.
 
@@ -531,20 +528,6 @@ prune_worker_table(State) ->
     ets:select_delete(ken_workers, [{MatchHead, [Guard], [true]}]),
     State#state{pruned_last = erlang:monotonic_time()}.
 
-allowed_languages() ->
-    % These are always available
-    BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>],
-    Config =
-        couch_proc_manager:get_servers_from_env("COUCHDB_QUERY_SERVER_") ++
-            
couch_proc_manager:get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"),
-    Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- 
Config],
-    Allowed =
-        case couch_proc_manager:native_query_server_enabled() of
-            true -> [<<"erlang">> | Allowed0];
-            _Else -> Allowed0
-        end,
-    lists:usort(BuiltIn ++ Allowed).
-
 config(Key, Default) ->
     config:get("ken", Key, Default).
 

Reply via email to