davisp commented on a change in pull request #1370: [5/5] Clustered Purge 
Implementation
URL: https://github.com/apache/couchdb/pull/1370#discussion_r195214156
 
 

 ##########
 File path: src/mem3/src/mem3_rep.erl
 ##########
 @@ -169,20 +200,131 @@ find_source_seq_int(#doc{body={Props}}, SrcNode0, 
TgtNode0, TgtUUID, TgtSeq) ->
     end.
 
 
-repl(#acc{db = Db} = Acc0) ->
-    erlang:put(io_priority, {internal_repl, couch_db:name(Db)}),
-    #acc{seq=Seq} = Acc1 = calculate_start_seq(Acc0),
-    case Seq >= couch_db:get_update_seq(Db) of
-        true ->
-            {ok, 0};
-        false ->
-            Fun = fun ?MODULE:changes_enumerator/2,
-            {ok, Acc2} = couch_db:fold_changes(Db, Seq, Fun, Acc1),
-            {ok, #acc{seq = LastSeq}} = replicate_batch(Acc2),
-            {ok, couch_db:count_changes_since(Db, LastSeq)}
+repl(#acc{db = Db0} = Acc0) ->
+    erlang:put(io_priority, {internal_repl, couch_db:name(Db0)}),
+    Acc1 = calculate_start_seq(Acc0),
+    try
+        Acc3 = case config:get_boolean("mem3", "replicate_purges", false) of
+            true ->
+                Acc2 = pull_purges(Acc1),
+                push_purges(Acc2);
+            false ->
+                Acc1
+        end,
+        push_changes(Acc3)
+    catch
+        throw:{finished, Count} ->
+            {ok, Count}
     end.
 
 
+pull_purges(#acc{} = Acc0) ->
+    #acc{
+        batch_size = Count,
+        seq = UpdateSeq,
+        target = Target
+    } = Acc0,
+    #shard{
+        node = TgtNode,
+        name = TgtDbName
+    } = Target,
+
+    with_src_db(Acc0, fun(Db) ->
 
 Review comment:
   Its definitely subtle and I wasn't a fan either but it was the least bad I 
could think of at the time. For the couch_server, the `#acc.db` instance 
guarantees that the database is held open in the `couch_db` ets cache so the 
with_src_db call is guaranteed to be cheap from that respect.
   
   The second bit about not re-using `#acc.db` was that it makes this bit of 
logic harder/impossible:
   
   
https://github.com/apache/couchdb/blob/d1b73e75891753aef6366bafbfaa1cbcbcbb4bb5/src/mem3/src/mem3_rep.erl#L81-L87
   
   Specifically, that after clause can't reference the return of `repl/1` 
because its not guaranteed to have returned. Thus threading db instances 
through means you have to not close the first instance the first time you 
re-open or else you'll break that outside close. That seemed a lot more 
complicated than just using the with_src_db approach.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to