This is an automated email from the ASF dual-hosted git repository. rnewson pushed a commit to branch epoch-validation in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 7a79b36efd696132223826ebedbb5299f286d886 Author: Robert Newson <rnew...@apache.org> AuthorDate: Mon Jul 15 23:25:16 2024 +0100 Validate epoch transitions We've learned that `node()` can return `nonode@nohost` in a distributed node while it is crashing (if net_kernel dies, say, but perhaps other causes). We've assumed this can never happen, and have seen cases where `nonode@nohost` has been recorded in the epoch list. Tighten the code so we get the node name from the init arguments instead as these are immutable. Also validate that the associated update sequence never goes down (it can stay the same, which happens if the file is copied from one machine to another). --- src/couch/src/couch_bt_engine_header.erl | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/couch/src/couch_bt_engine_header.erl b/src/couch/src/couch_bt_engine_header.erl index e28f07723..ac59dc8b5 100644 --- a/src/couch/src/couch_bt_engine_header.erl +++ b/src/couch/src/couch_bt_engine_header.erl @@ -261,6 +261,7 @@ upgrade_uuid(#db_header{} = Header) -> end. upgrade_epochs(#db_header{} = Header) -> + Node = init_node_name(), NewEpochs = case Header#db_header.epochs of undefined -> @@ -269,14 +270,14 @@ upgrade_epochs(#db_header{} = Header) -> % was always an implicit assumption that a file was % owned since eternity by the node it was on. This % just codifies that assumption. - [{node(), 0}]; - [{Node, _} | _] = Epochs0 when Node == node() -> + [{Node, 0}]; + [{Node, S} | _] = Epochs0 when Header#db_header.update_seq >= S -> % Current node is the current owner of this db Epochs0; - Epochs1 -> + [{_OtherNode, S} | _] = Epochs1 when Header#db_header.update_seq >= S -> % This node is taking over ownership of this db % and marking the update sequence where it happened. - [{node(), Header#db_header.update_seq} | Epochs1] + [{Node, Header#db_header.update_seq} | Epochs1] end, % Its possible for a node to open a db and claim % ownership but never make a write to the db. This @@ -285,6 +286,20 @@ upgrade_epochs(#db_header{} = Header) -> DedupedEpochs = remove_dup_epochs(NewEpochs), Header#db_header{epochs = DedupedEpochs}. +%% get the node name from the start up configuration as node() +%% can return nonode@nohost if net_kernel crashes. +init_node_name() -> + init_node_name(init:get_arguments()). + +init_node_name([]) -> + nonode@nohost; +init_node_name([{name, [Name]} | _Rest]) -> + list_to_existing_atom(Name); +init_node_name([{sname, [Name]} | _Rest]) -> + list_to_existing_atom(Name); +init_node_name([_ | Rest]) -> + init_node_name(Rest). + % This is slightly relying on the udpate_seq's being sorted % in epochs due to how we only ever push things onto the % front. Although if we ever had a case where the update_seq