janl opened a new issue, #5393:
URL: https://github.com/apache/couchdb/issues/5393
Heya, for hot-fixing a cluster I had to write a `_conflicts` and `_deleted
_conflicts` finder that I could paste into remsh. I thought this might be a
good idea to turn into a couch scanner plugin, but I don’t have time to do this
right now. If someone would like to take this on, you’re all very welcome.
This is “make it work for me” levels of good code, but I ran this across
~2TB worth of shard files without issue. The “progress bar” can probably taken
out and the doc/s reporting needs to go into stats, but that’s all.
Here’s my code:
```erlang
rr(couch_db).
rr(couch_changes).
MinConflicts = 5.
ioq:set_io_priority({compaction, self()}).
Sorter = fun({_, A}, {_, B}) -> A > B end.
MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts ->
false;
(_) -> true
end.
MaybeAppend = fun (_Id, 0, Acc) -> Acc;
(Id, List, Acc) -> lists:append([{Id, List}],
Acc)
end.
GetDocCount = fun(Db) ->
{ok, DbInfo} = couch_db:get_db_info(Db),
DocCount = proplists:get_value(doc_count, DbInfo, 0),
DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0),
DocCount + DelDocCount
end.
MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted;
(_, _, 0, LastPrinted) -> LastPrinted;
(Begin, DocCount, DocsProcessed,
LastPrinted) ->
Perc = 100 / DocCount * DocsProcessed,
DoJump = (Perc - LastPrinted) > 10,
case DoJump of
true ->
End = os:timestamp(),
Duration = timer:now_diff(End,
Begin) / 1000 / 1000,
DocsPerSecond = DocsProcessed /
Duration,
io:format("~p% (~.2f docs/s) ",
[trunc(Perc), DocsPerSecond]),
Perc;
_ -> LastPrinted
end
end.
Scanner = fun(DbName) ->
OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles =
[<<"_admin">>]}}],
{ok, Db} = couch_db:open(DbName, OpenOpts),
Begin = os:timestamp(),
io:format("~n ~p: ", [DbName]),
DocCount = GetDocCount(Db),
UserFun = fun(FullDocInfo, Acc) ->
DocInfo = couch_doc:to_doc_info(FullDocInfo),
% RevTree = FullDocInfo#full_doc_info.rev_tree,
% Leafs = couch_key_tree:get_all_leafs(RevTree),
% io:format("~n FullDocInfo: ~p~n", [FullDocInfo]),
% io:format("~n DocInfo: ~p~n", [DocInfo]),
% io:format("~n Leafs: ~p~n", [Leafs]),
[_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev?
{DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) ->
% io:format("~n RevInfo: ~p~n", [RevInfo]),
% io:format("~n RevInfo#rev_info.deleted: ~p~n",
[RevInfo#rev_info.deleted]),
% {Leaf0} = Leaf,
% io:format("~n Leaf0: ~p~n", [Leaf0]),
RevInfo#rev_info.deleted
end, Revs),
{AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted}
= Acc#changes_acc.user_acc,
NewLastPrinted = MaybePrintStats(Begin, DocCount,
DocsProcessed, LastPrinted),
{ok, Acc#changes_acc{
user_acc = {
MaybeAppend(DocInfo#doc_info.id,
length(DeletedConflicts), AccDeletedConflicts),
MaybeAppend(DocInfo#doc_info.id,
length(Conflicts), AccConflicts),
DocsProcessed + 1,
NewLastPrinted
}
}}
end,
StartSeq = 0,
UserAcc = #changes_acc{user_acc = {[], [], 0, 0}},
Opts = [{include_docs, true},{deleted, true}],
{ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun,
UserAcc, Opts),
couch_db:close(Db),
{DeletedConflicts, Conflicts, DocsProcessed, _} =
ChangesAcc#changes_acc.user_acc,
io:format("Total Docs Processed: ~p ", [DocsProcessed]),
ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter,
Conflicts)),
DeletedConflictsSorted = lists:filter(MinConflictsFilter,
lists:sort(Sorter, DeletedConflicts)),
{ConflictsSorted, DeletedConflictsSorted}
end.
Enumerator = fun () ->
{ok, AllShards} = couch_server:all_databases(),
lists:foreach(fun(Shard) ->
{Conflicts, DeletedConflicts} = Scanner(Shard),
io:format("~n Conflicts: ~n ~p ~n
DeletedConflicts: ~n ~p", [Conflicts, DeletedConflicts])
end, AllShards)
end.
Enumerator().
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]