This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch 3.3.x-pending-changes-3.3.3
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 49e852d50de34991849a89881853155d65a83d88
Author: Nick Vatamaniuc <vatam...@gmail.com>
AuthorDate: Thu Jun 15 20:32:46 2023 -0400

    Improve emitted change feed sequence after a split
    
    When we get sequence before the split, we'll fill in the missing (now split)
    ranges with a special `{split, OldNodeUUid}` marker. However, when sequences
    are emitted in the changes API, that will make the N- prefix (SeqSum) bounce
    around from higher to lower numbers, while users expect those to be mostly
    incrementing. So take a conservative approach and assume it will be a full
    rewind for that range, and use 0 instead. This is a purely cosmetic thing, 
when
    we decode the sequence that prefix gets thrown away anyway.
    
    To emphasize that the sequence prefix is mostly a visual aid, rename the 
seq/1
    function to fake_packed_seq/1 with a comment about what it does and a note
    about the special split case.
    
    Fixes a part of issue: #4640
---
 src/fabric/src/fabric_view_changes.erl | 37 ++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/fabric/src/fabric_view_changes.erl 
b/src/fabric/src/fabric_view_changes.erl
index fd439c5c3..970a200d2 100644
--- a/src/fabric/src/fabric_view_changes.erl
+++ b/src/fabric/src/fabric_view_changes.erl
@@ -421,13 +421,28 @@ pending_count(Dict) ->
 
 pack_seqs(Workers) ->
     SeqList = [{N, R, S} || {#shard{node = N, range = R}, S} <- Workers],
-    SeqSum = lists:sum([seq(S) || {_, _, S} <- SeqList]),
+    SeqSum = lists:sum([fake_packed_seq(S) || {_, _, S} <- SeqList]),
     Opaque = couch_util:encodeBase64Url(?term_to_bin(SeqList, [compressed])),
     ?l2b([integer_to_list(SeqSum), $-, Opaque]).
 
-seq({Seq, _Uuid, _Node}) -> Seq;
-seq({Seq, _Uuid}) -> Seq;
-seq(Seq) -> Seq.
+% Generate the sequence number used to build the emitted N-... prefix.
+%
+% The prefix is discarded when the sequence is decoded in the current CouchDB
+% version. It's mostly there as a visual heuristic about all the packed
+% sequences and to keep backwards compatibility with previous CouchDB versions.
+%
+% We handle a few backwards compatibility clauses, and also when we get a
+% sequence before a shard split. In that case, we fill in the missing ranges
+% with a special {split, OldNodeUUid} marker. However, when sequences are
+% emitted, that will make the N- prefix (SeqSum) bounce around from higher to
+% lower numbers if we leave it as is. To fix that, use 0 for that range,
+% assuming it will be full rewind. That way the N-... prefix is more likely to
+% be incrementing, which is what users would generally expect.
+%
+fake_packed_seq({Seq, {split, <<_/binary>>}, _Node}) when is_integer(Seq) -> 0;
+fake_packed_seq({Seq, _Uuid, _Node}) -> Seq;
+fake_packed_seq({Seq, _Uuid}) -> Seq;
+fake_packed_seq(Seq) -> Seq.
 
 unpack_seq_regex_match(Packed) ->
     Pattern = "^\"?([0-9]+-)?(?<opaque>.*?)\"?$",
@@ -1065,4 +1080,18 @@ find_replacement_sequence_test() ->
     ?assertEqual(0, find_replacement_sequence(Dead4, [0, 10])),
     ?assertEqual(0, find_replacement_sequence(Dead4, [0, 5])).
 
+pack_split_seq_test() ->
+    Shards = [{"n1", 0, 10}, {"n2", 11, ?RING_END}],
+    Workers = mk_workers(Shards, {42, {split, <<"abc1234">>}, 
'node1@127.0.0.1'}),
+    PackedSeq = pack_seqs(Workers),
+    ?assertMatch(<<"0-", _/binary>>, PackedSeq),
+    DecodedSeq = decode_seq(PackedSeq),
+    ?assertEqual(
+        [
+            {n1, [0, 10], {42, {split, <<"abc1234">>}, 'node1@127.0.0.1'}},
+            {n2, [11, 4294967295], {42, {split, <<"abc1234">>}, 
'node1@127.0.0.1'}}
+        ],
+        DecodedSeq
+    ).
+
 -endif.

Reply via email to