This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch prettify-split-sequences in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 484cdac558bd1bb9971be46662f0851fead45e2e Author: Nick Vatamaniuc <[email protected]> AuthorDate: Thu Jun 15 20:32:46 2023 -0400 Improve emitted change feed sequence after a split When we get sequence before the split, we'll fill in the missing (now split) ranges with a special `{split, OldNodeUUid}` marker. However, when sequences are emitted in the changes API, that will make the N- prefix (SeqSum) bounce around from higher to lower numbers, while users expect those to be mostly incrementing. So take a conservative approach and assume it will be a full rewind for that range, and use 0 instead. This is a purely cosmetic thing, when we decode the sequence that prefix gets thrown away anyway. To emphasize that the sequence prefix is mostly a visual aid, rename the seq/1 function to fake_packed_seq/1 with a comment about what it does and a note about the special split case. Fixes a part of issue: #4640 --- src/fabric/src/fabric_view_changes.erl | 37 ++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/fabric/src/fabric_view_changes.erl b/src/fabric/src/fabric_view_changes.erl index fd439c5c3..a60791b0d 100644 --- a/src/fabric/src/fabric_view_changes.erl +++ b/src/fabric/src/fabric_view_changes.erl @@ -421,13 +421,28 @@ pending_count(Dict) -> pack_seqs(Workers) -> SeqList = [{N, R, S} || {#shard{node = N, range = R}, S} <- Workers], - SeqSum = lists:sum([seq(S) || {_, _, S} <- SeqList]), + SeqSum = lists:sum([fake_packed_seq(S) || {_, _, S} <- SeqList]), Opaque = couch_util:encodeBase64Url(?term_to_bin(SeqList, [compressed])), ?l2b([integer_to_list(SeqSum), $-, Opaque]). -seq({Seq, _Uuid, _Node}) -> Seq; -seq({Seq, _Uuid}) -> Seq; -seq(Seq) -> Seq. +% Generate the sequence number used to build the emitted N-... prefix. +% +% The prefix is discarded when the sequence is decoded in the current CouchDB +% version. It's mostly there as a visual heuristic about all the packed +% sequences and to keep backwards compatibility with previous CouchDB versions. +% +% We handle of few backwards compatibility clauses, and also the when we get a +% sequence before a shard split. In that case we fill in the missing (now +% split) ranges with a special {split, OldNodeUUid} marker. However, when +% sequences are emitted, that will make the N- prefix (SeqSum) bounce around +% from higher to lower numbers if we leave it as is. To fix that, use 0 for +% that range, assuming it will be full rewind. That way the N-... prefix is +% more likely to be incrementing, which is what users would generally expect. +% +fake_packed_seq({Seq, {split, <<_/binary>>}, _Node}) when is_integer(Seq) -> 0; +fake_packed_seq({Seq, _Uuid, _Node}) -> Seq; +fake_packed_seq({Seq, _Uuid}) -> Seq; +fake_packed_seq(Seq) -> Seq. unpack_seq_regex_match(Packed) -> Pattern = "^\"?([0-9]+-)?(?<opaque>.*?)\"?$", @@ -1065,4 +1080,18 @@ find_replacement_sequence_test() -> ?assertEqual(0, find_replacement_sequence(Dead4, [0, 10])), ?assertEqual(0, find_replacement_sequence(Dead4, [0, 5])). +pack_split_seq_test() -> + Shards = [{"n1", 0, 10}, {"n2", 11, ?RING_END}], + Workers = mk_workers(Shards, {42, {split, <<"abc1234">>}, '[email protected]'}), + PackedSeq = pack_seqs(Workers), + ?assertMatch(<<"0-", _/binary>>, PackedSeq), + DecodedSeq = decode_seq(PackedSeq), + ?assertEqual( + [ + {n1, [0, 10], {42, {split, <<"abc1234">>}, '[email protected]'}}, + {n2, [11, 4294967295], {42, {split, <<"abc1234">>}, '[email protected]'}} + ], + DecodedSeq + ). + -endif.
