This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch avoid-creating-invalid-view-purge-checkpoints in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 5a1b43836d784e869d38d372a9721b24478814cb Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sat Mar 7 00:21:00 2026 -0500 Avoid creating purge checkpoints for invalid views Previously, if we successfully parsed a design document into an #mrst{} record, we would create a purge checkpoint even if the view group didn't have any views, or used an invalid language. Normally, having extra unused checkpoints around would be just bit untidy, however, when we start purging, these checkpoints won't be advancing, and after a while prevent the compactor from compacting the purge sequences and advancing the minimum purge sequence forward. In other words, we need to differentiate between pars-able #mrst{} view groups and those that can actually builid on a particular node. Ken (our background index builder) was already performing the same check, so simply move the check to the proc manager and the the indexing utility modules so it can be reused. In the proc manager, noticed that the repeated OS environment fetching and parsing was kind of time consuming (100s of microseconds) so used a persistent term. --- src/couch/src/couch_proc_manager.erl | 32 +++++++++- src/couch_index/src/couch_index_util.erl | 1 + src/couch_mrview/src/couch_mrview_index.erl | 17 +++-- src/couch_mrview/src/couch_mrview_util.erl | 35 +++++++++-- src/fabric/test/eunit/fabric_tests.erl | 96 ++++++++++++++++++++++++++++- src/ken/src/ken_server.erl | 23 +------ 6 files changed, 173 insertions(+), 31 deletions(-) diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index aa538e23e..e8813f93a 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -25,8 +25,7 @@ new_proc/1, reload/0, terminate_stale_procs/0, - get_servers_from_env/1, - native_query_server_enabled/0 + allowed_languages/0 ]). -export([ @@ -503,6 +502,18 @@ split_by_char(String, Char) -> {Key, Value}. get_servers_from_env(Spec) -> + case persistent_term:get({?MODULE, Spec}, undefined) of + undefined -> + % Save some time since os:getenv() can potentially be very large + % and we don't want to reparse it all every time. + Val = get_servers_from_env_raw(Spec), + persistent_term:put({?MODULE, Spec}, Val), + Val; + Val -> + Val + end. + +get_servers_from_env_raw(Spec) -> SpecLen = length(Spec), % loop over os:getenv(), match SPEC_ lists:filtermap( @@ -517,6 +528,23 @@ get_servers_from_env(Spec) -> os:getenv() ). +% Get the list of all the languages we know how handle. Some are built-in +% and some are configurable via config settings or env vars +% +allowed_languages() -> + % These are always available + BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>], + Config = + get_servers_from_env("COUCHDB_QUERY_SERVER_") ++ + get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"), + Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- Config], + Allowed = + case native_query_server_enabled() of + true -> [<<"erlang">> | Allowed0]; + _Else -> Allowed0 + end, + lists:usort(BuiltIn ++ Allowed). + configure_language_servers() -> ets:delete_all_objects(?SERVERS), % NOTE: process environment variables cannot be altered except by diff --git a/src/couch_index/src/couch_index_util.erl b/src/couch_index/src/couch_index_util.erl index 9a16d06d6..5812926af 100644 --- a/src/couch_index/src/couch_index_util.erl +++ b/src/couch_index/src/couch_index_util.erl @@ -15,6 +15,7 @@ -export([root_dir/0, index_dir/2, index_file/3]). -export([load_doc/3, sort_lib/1, hexsig/1]). -export([get_purge_checkpoints/2, cleanup_purges/3]). +-export([delete_checkpoint/2]). -include_lib("couch/include/couch_db.hrl"). diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index 164227b49..f0a649b77 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -292,13 +292,22 @@ ensure_local_purge_docs(DbName, DDocs) -> end). ensure_local_purge_doc(Db, #mrst{} = State) -> + IsValid = couch_mrview_util:mrst_has_valid_views(State), Sig = couch_index_util:hexsig(get(signature, State)), DocId = couch_mrview_util:get_local_purge_doc_id(Sig), - case couch_db:open_doc(Db, DocId, []) of - {not_found, _} -> + case {IsValid, couch_db:open_doc(Db, DocId, [])} of + {true, {not_found, _}} -> + % Is valid and doc doesn't exist => create it create_local_purge_doc(Db, State); - {ok, _} -> - ok + {false, {not_found, _}} -> + % Invalid and doc doesn't exist => do nothing + ok; + {true, {ok, _}} -> + % Is valid and doc exists already => do nothing + ok; + {false, {ok, _}} -> + % Invalid and doc exists => cleanup + couch_index_util:delete_checkpoint(Db, DocId) end. create_local_purge_doc(Db, State) -> diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index 494f5b3ec..48138bf76 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -18,6 +18,7 @@ -export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]). -export([get_signatures_from_ddocs/2]). -export([ddoc_to_mrst/2, init_state/4, reset_index/3]). +-export([mrst_has_valid_views/1]). -export([make_header/1]). -export([index_file/2, compaction_file/2, open_file/1]). -export([delete_files/2, delete_index_file/2, delete_compaction_file/2]). @@ -112,12 +113,23 @@ get_signatures(Db) -> get_signatures_from_ddocs(DbName, DDocs1). % From a list of design #doc{} records returns signatures map: #{Sig => true} -% +% This will be valid signatures of views we expect to run and build on this +% node. get_signatures_from_ddocs(DbName, DDocs) when is_list(DDocs) -> FoldFun = fun(#doc{} = Doc, Acc) -> - {ok, Mrst} = ddoc_to_mrst(DbName, Doc), - Sig = couch_util:to_hex_bin(Mrst#mrst.sig), - Acc#{Sig => true} + try ddoc_to_mrst(DbName, Doc) of + {ok, Mrst} -> + case couch_mrview_util:mrst_has_valid_views(Mrst) of + true -> + Sig = couch_util:to_hex_bin(Mrst#mrst.sig), + Acc#{Sig => true}; + false -> + Acc + end + catch + _:_ -> + Acc + end end, lists:foldl(FoldFun, #{}, DDocs). @@ -346,6 +358,21 @@ view_sig_term(BaseSig, UpdateSeq, PurgeSeq) -> view_sig_term(BaseSig, UpdateSeq, PurgeSeq, Args) -> {BaseSig, UpdateSeq, PurgeSeq, Args}. +% We can construct an MRSt from design document which may have languages this +% server doesn't support, or may have no views. That view group won't build on +% this server, and we can use this utility function to filter it out. Ken and +% purge checkpoint cleanups use this function. +% +mrst_has_valid_views(MRSt) -> + case couch_mrview_index:get(views, MRSt) of + [_ | _] -> + Language = couch_mrview_index:get(language, MRSt), + AllowedLanguages = couch_proc_manager:allowed_languages(), + lists:member(Language, AllowedLanguages); + [] -> + false + end. + init_state(Db, Fd, #mrst{views = Views} = State, nil) -> PurgeSeq = couch_db:get_purge_seq(Db), Header = #mrheader{ diff --git a/src/fabric/test/eunit/fabric_tests.erl b/src/fabric/test/eunit/fabric_tests.erl index c092f4473..daf018231 100644 --- a/src/fabric/test/eunit/fabric_tests.erl +++ b/src/fabric/test/eunit/fabric_tests.erl @@ -27,7 +27,9 @@ cleanup_index_files_test_() -> ?TDEF_FE(t_cleanup_index_files_with_view_data), ?TDEF_FE(t_cleanup_index_files_with_deleted_db), ?TDEF_FE(t_cleanup_index_file_after_ddoc_update), - ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete) + ?TDEF_FE(t_cleanup_index_file_after_ddoc_delete), + ?TDEF_FE(t_cleanup_empty_view_checkpoints), + ?TDEF_FE(t_cleanup_disallowed_language_checkpoints) ] }. @@ -151,6 +153,76 @@ t_cleanup_index_file_after_ddoc_delete({_, DbName}) -> ?assertEqual([], indices(DbName)), ?assertEqual([], purges(DbName)). +t_cleanup_empty_view_checkpoints({_, DbName}) -> + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view", + "da817c3d3f7413c1a610f25635a0c521.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>, + <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">> + ], + purges(DbName) + ), + + update_empty_ddoc(DbName, <<"_design/foo">>), + {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>), + ok = fabric:cleanup_index_files_all_nodes(DbName), + + % One 4bc stays, da8 should gone. If it weren't for the check to + % empty views it would have used signature "3e823c2a4383ac0c18d4e574135a5b08" + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">> + ], + purges(DbName) + ). + +t_cleanup_disallowed_language_checkpoints({_, DbName}) -> + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view", + "da817c3d3f7413c1a610f25635a0c521.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">>, + <<"_local/purge-mrview-da817c3d3f7413c1a610f25635a0c521">> + ], + purges(DbName) + ), + + update_invalid_language(DbName, <<"_design/foo">>, <<"bar">>), + {ok, _} = fabric:get_view_group_info(DbName, <<"foo">>), + ok = fabric:cleanup_index_files_all_nodes(DbName), + + % One 4bc stays, the new view uses an invalid language which can't index with + % so we don't create a checkpoint for it + ?assertEqual( + [ + "4bcdf852098ff6b0578ddf472c320e9c.view" + ], + indices(DbName) + ), + ?assertEqual( + [ + <<"_local/purge-mrview-4bcdf852098ff6b0578ddf472c320e9c">> + ], + purges(DbName) + ). + shard_names(DbName) -> [mem3:name(S) || S <- mem3:local_shards(DbName)]. @@ -239,6 +311,28 @@ update_ddoc(DbName, DDocId, ViewName) -> }, fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). +update_empty_ddoc(DbName, DDocId) -> + {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), + DDoc = DDoc0#doc{body = {[]}}, + fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). + +update_invalid_language(DbName, DDocId, ViewName) -> + {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), + DDoc = DDoc0#doc{ + body = + {[ + {<<"language">>, <<"cobol">>}, + {<<"views">>, + {[ + {ViewName, + {[ + {<<"map">>, <<"MAIN-PROCEDURE. PERFORM MAP">>} + ]}} + ]}} + ]} + }, + fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]). + delete_ddoc(DbName, DDocId) -> {ok, DDoc0} = fabric:open_doc(DbName, DDocId, [?ADMIN_CTX]), DDoc = DDoc0#doc{deleted = true, body = {[]}}, diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl index c51ae4304..26c611eae 100644 --- a/src/ken/src/ken_server.erl +++ b/src/ken/src/ken_server.erl @@ -361,15 +361,12 @@ should_update(#doc{body = {Props}}, IndexType) -> end. update_ddoc_views(Name, MRSt, Seq, State) -> - Language = couch_mrview_index:get(language, MRSt), - Allowed = lists:member(Language, allowed_languages()), - Views = couch_mrview_index:get(views, MRSt), - if - Allowed andalso Views =/= [] -> + case couch_mrview_util:mrst_has_valid_views(MRSt) of + true -> {ok, Pid} = couch_index_server:get_index(couch_mrview_index, MRSt), GroupName = couch_mrview_index:get(idx_name, MRSt), maybe_start_job({Name, GroupName}, Pid, Seq, State); - true -> + false -> ok end. @@ -531,20 +528,6 @@ prune_worker_table(State) -> ets:select_delete(ken_workers, [{MatchHead, [Guard], [true]}]), State#state{pruned_last = erlang:monotonic_time()}. -allowed_languages() -> - % These are always available - BuiltIn = [<<"javascript">>, <<"javascript_quickjs">>, <<"query">>], - Config = - couch_proc_manager:get_servers_from_env("COUCHDB_QUERY_SERVER_") ++ - couch_proc_manager:get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_"), - Allowed0 = [list_to_binary(string:to_lower(Lang)) || {Lang, _Cmd} <- Config], - Allowed = - case couch_proc_manager:native_query_server_enabled() of - true -> [<<"erlang">> | Allowed0]; - _Else -> Allowed0 - end, - lists:usort(BuiltIn ++ Allowed). - config(Key, Default) -> config:get("ken", Key, Default).
