This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch 3.3.x-pending-changes-3.3.3 in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 49e852d50de34991849a89881853155d65a83d88 Author: Nick Vatamaniuc <vatam...@gmail.com> AuthorDate: Thu Jun 15 20:32:46 2023 -0400 Improve emitted change feed sequence after a split When we get sequence before the split, we'll fill in the missing (now split) ranges with a special `{split, OldNodeUUid}` marker. However, when sequences are emitted in the changes API, that will make the N- prefix (SeqSum) bounce around from higher to lower numbers, while users expect those to be mostly incrementing. So take a conservative approach and assume it will be a full rewind for that range, and use 0 instead. This is a purely cosmetic thing, when we decode the sequence that prefix gets thrown away anyway. To emphasize that the sequence prefix is mostly a visual aid, rename the seq/1 function to fake_packed_seq/1 with a comment about what it does and a note about the special split case. Fixes a part of issue: #4640 --- src/fabric/src/fabric_view_changes.erl | 37 ++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/fabric/src/fabric_view_changes.erl b/src/fabric/src/fabric_view_changes.erl index fd439c5c3..970a200d2 100644 --- a/src/fabric/src/fabric_view_changes.erl +++ b/src/fabric/src/fabric_view_changes.erl @@ -421,13 +421,28 @@ pending_count(Dict) -> pack_seqs(Workers) -> SeqList = [{N, R, S} || {#shard{node = N, range = R}, S} <- Workers], - SeqSum = lists:sum([seq(S) || {_, _, S} <- SeqList]), + SeqSum = lists:sum([fake_packed_seq(S) || {_, _, S} <- SeqList]), Opaque = couch_util:encodeBase64Url(?term_to_bin(SeqList, [compressed])), ?l2b([integer_to_list(SeqSum), $-, Opaque]). -seq({Seq, _Uuid, _Node}) -> Seq; -seq({Seq, _Uuid}) -> Seq; -seq(Seq) -> Seq. +% Generate the sequence number used to build the emitted N-... prefix. +% +% The prefix is discarded when the sequence is decoded in the current CouchDB +% version. It's mostly there as a visual heuristic about all the packed +% sequences and to keep backwards compatibility with previous CouchDB versions. +% +% We handle a few backwards compatibility clauses, and also when we get a +% sequence before a shard split. In that case, we fill in the missing ranges +% with a special {split, OldNodeUUid} marker. However, when sequences are +% emitted, that will make the N- prefix (SeqSum) bounce around from higher to +% lower numbers if we leave it as is. To fix that, use 0 for that range, +% assuming it will be full rewind. That way the N-... prefix is more likely to +% be incrementing, which is what users would generally expect. +% +fake_packed_seq({Seq, {split, <<_/binary>>}, _Node}) when is_integer(Seq) -> 0; +fake_packed_seq({Seq, _Uuid, _Node}) -> Seq; +fake_packed_seq({Seq, _Uuid}) -> Seq; +fake_packed_seq(Seq) -> Seq. unpack_seq_regex_match(Packed) -> Pattern = "^\"?([0-9]+-)?(?<opaque>.*?)\"?$", @@ -1065,4 +1080,18 @@ find_replacement_sequence_test() -> ?assertEqual(0, find_replacement_sequence(Dead4, [0, 10])), ?assertEqual(0, find_replacement_sequence(Dead4, [0, 5])). +pack_split_seq_test() -> + Shards = [{"n1", 0, 10}, {"n2", 11, ?RING_END}], + Workers = mk_workers(Shards, {42, {split, <<"abc1234">>}, 'node1@127.0.0.1'}), + PackedSeq = pack_seqs(Workers), + ?assertMatch(<<"0-", _/binary>>, PackedSeq), + DecodedSeq = decode_seq(PackedSeq), + ?assertEqual( + [ + {n1, [0, 10], {42, {split, <<"abc1234">>}, 'node1@127.0.0.1'}}, + {n2, [11, 4294967295], {42, {split, <<"abc1234">>}, 'node1@127.0.0.1'}} + ], + DecodedSeq + ). + -endif.