Spread ushards load to more nodes In some cases, notably q=1 databases, the current ushards algorithm will always choose the same replica (because of the lists:sort and order-preserving orddict). This causes a severely skewed load profile if you have lots of these cases.
This patch rotates each group of nodes using the crc32 of the database name, spreading out the load pretty evenly. The patch is a little obscure because ushards still has remnants of previous work (breaking nodes into the local, same zone, different zone, but then deliberately merging local and same zone back together because that was a silly idea). BugzID: 17801 Project: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/commit/06e0baea Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/tree/06e0baea Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/diff/06e0baea Branch: refs/heads/import Commit: 06e0baea96710752d1514b1ad7b078537743dbb9 Parents: 8740fb4 Author: Robert Newson <[email protected]> Authored: Wed Mar 6 08:29:13 2013 -0600 Committer: Robert Newson <[email protected]> Committed: Thu Mar 7 12:47:21 2013 -0600 ---------------------------------------------------------------------- src/mem3.erl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-mem3/blob/06e0baea/src/mem3.erl ---------------------------------------------------------------------- diff --git a/src/mem3.erl b/src/mem3.erl index 854a1f1..18257ba 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -15,7 +15,7 @@ -module(mem3). -export([start/0, stop/0, restart/0, nodes/0, node_info/2, shards/1, shards/2, - choose_shards/2, n/1, dbname/1, ushards/1, ushards/2]). + choose_shards/2, n/1, dbname/1, ushards/1]). -export([get_shard/3, local_shards/1, fold_shards/2]). -export([sync_security/0, sync_security/1]). -export([compare_nodelists/0, compare_shards/1]). @@ -105,14 +105,14 @@ shards(DbName, DocId) -> ushards(DbName) -> Nodes = [node()|erlang:nodes()], ZoneMap = zone_map(Nodes), - ushards(live_shards(DbName, Nodes), ZoneMap). + ushards(DbName, live_shards(DbName, Nodes), ZoneMap). -ushards(Shards0, ZoneMap) -> +ushards(DbName, Shards0, ZoneMap) -> {L,S,D} = group_by_proximity(Shards0, ZoneMap), % Prefer shards in the local zone over shards in a different zone, % but sort each zone separately to ensure a consistent choice between % nodes in the same zone. - Shards = choose_ushards(L ++ S) ++ choose_ushards(D), + Shards = choose_ushards(DbName, L ++ S) ++ choose_ushards(DbName, D), lists:ukeysort(#shard.range, Shards). get_shard(DbName, Node, Range) -> @@ -215,13 +215,17 @@ group_by_proximity(Shards, ZoneMap) -> {SameZone, DifferentZone} = lists:partition(Fun, Remote), {Local, SameZone, DifferentZone}. -choose_ushards(Shards) -> - Groups = group_by_range(lists:sort(Shards)), +choose_ushards(DbName, Shards) -> + Groups = group_by_range(rotate_list(DbName, lists:sort(Shards))), Fun = fun(Group, {N, Acc}) -> {N+1, [lists:nth(1 + N rem length(Group), Group) | Acc]} end, {_, Result} = lists:foldl(Fun, {0, []}, Groups), Result. +rotate_list(DbName, List) -> + {H, T} = lists:split(erlang:crc32(DbName) rem length(List), List), + T ++ H. + group_by_range(Shards) -> Groups0 = lists:foldl(fun(#shard{range=Range}=Shard, Dict) -> orddict:append(Range, Shard, Dict) end, orddict:new(), Shards),
