Optimize merge_updates There are two basic strategies here. We move the function clauses which are matched less often to the end of the function definition, and we use a body-recursive implementation instead of tail-recursion with a final reverse. The latter change is motivated by the implementation of orddict:merge/3 in the stdlib, and simple benchmarks on R14B04 indicate that the body recursive implementation is faster for all batch sizes.
Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/5af2b899 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/5af2b899 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/5af2b899 Branch: refs/heads/import Commit: 5af2b8996231ec2c65683a0ae6a68399dd183daa Parents: c578e70 Author: Adam Kocoloski <[email protected]> Authored: Fri Feb 24 16:11:13 2012 -0500 Committer: Paul J. Davis <[email protected]> Committed: Fri Jan 17 16:44:29 2014 -0800 ---------------------------------------------------------------------- src/couch_db_updater.erl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/5af2b899/src/couch_db_updater.erl ---------------------------------------------------------------------- diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl index 414e3a0..1be3e3a 100644 --- a/src/couch_db_updater.erl +++ b/src/couch_db_updater.erl @@ -308,20 +308,16 @@ handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name} = Db) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - -merge_updates([], RestB, AccOutGroups) -> - lists:reverse(AccOutGroups, RestB); -merge_updates(RestA, [], AccOutGroups) -> - lists:reverse(AccOutGroups, RestA); -merge_updates([[{_, {#doc{id=IdA}, _}}|_]=GroupA | RestA], - [[{_, {#doc{id=IdB}, _}}|_]=GroupB | RestB], AccOutGroups) -> - if IdA == IdB -> - merge_updates(RestA, RestB, [GroupA ++ GroupB | AccOutGroups]); - IdA < IdB -> - merge_updates(RestA, [GroupB | RestB], [GroupA | AccOutGroups]); - true -> - merge_updates([GroupA | RestA], RestB, [GroupB | AccOutGroups]) - end. +merge_updates([[{_,#doc{id=X}}|_]=A|RestA], [[{_,#doc{id=X}}|_]=B|RestB]) -> + [A++B | merge_updates(RestA, RestB)]; +merge_updates([[{_,#doc{id=X}}|_]|_]=A, [[{_,#doc{id=Y}}|_]|_]=B) when X < Y -> + [hd(A) | merge_updates(tl(A), B)]; +merge_updates([[{_,#doc{id=X}}|_]|_]=A, [[{_,#doc{id=Y}}|_]|_]=B) when X > Y -> + [hd(B) | merge_updates(A, tl(B))]; +merge_updates([], RestB) -> + RestB; +merge_updates(RestA, []) -> + RestA. collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> receive @@ -333,7 +329,7 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup] || DocGroup <- GroupedDocs], GroupedDocsAcc2 = - merge_updates(GroupedDocsAcc, GroupedDocs2, []), + merge_updates(GroupedDocsAcc, GroupedDocs2), collect_updates(GroupedDocsAcc2, [Client | ClientsAcc], MergeConflicts, (FullCommit or FullCommit2)) after 0 ->
