Spread ushards load to more nodes

In some cases, notably q=1 databases, the current ushards algorithm
will always choose the same replica (because of the lists:sort and
order-preserving orddict). This causes a severely skewed load profile
if you have lots of these cases.

This patch rotates each group of nodes using the crc32 of the database
name, spreading out the load pretty evenly.

The patch is a little obscure because ushards still has remnants of
previous work (breaking nodes into the local, same zone, different
zone, but then deliberately merging local and same zone back together
because that was a silly idea).

BugzID: 17801


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/commit/06e0baea
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/tree/06e0baea
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mem3/diff/06e0baea

Branch: refs/heads/import
Commit: 06e0baea96710752d1514b1ad7b078537743dbb9
Parents: 8740fb4
Author: Robert Newson <[email protected]>
Authored: Wed Mar 6 08:29:13 2013 -0600
Committer: Robert Newson <[email protected]>
Committed: Thu Mar 7 12:47:21 2013 -0600

----------------------------------------------------------------------
 src/mem3.erl | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mem3/blob/06e0baea/src/mem3.erl
----------------------------------------------------------------------
diff --git a/src/mem3.erl b/src/mem3.erl
index 854a1f1..18257ba 100644
--- a/src/mem3.erl
+++ b/src/mem3.erl
@@ -15,7 +15,7 @@
 -module(mem3).
 
 -export([start/0, stop/0, restart/0, nodes/0, node_info/2, shards/1, shards/2,
-    choose_shards/2, n/1, dbname/1, ushards/1, ushards/2]).
+    choose_shards/2, n/1, dbname/1, ushards/1]).
 -export([get_shard/3, local_shards/1, fold_shards/2]).
 -export([sync_security/0, sync_security/1]).
 -export([compare_nodelists/0, compare_shards/1]).
@@ -105,14 +105,14 @@ shards(DbName, DocId) ->
 ushards(DbName) ->
     Nodes = [node()|erlang:nodes()],
     ZoneMap = zone_map(Nodes),
-    ushards(live_shards(DbName, Nodes), ZoneMap).
+    ushards(DbName, live_shards(DbName, Nodes), ZoneMap).
 
-ushards(Shards0, ZoneMap) ->
+ushards(DbName, Shards0, ZoneMap) ->
     {L,S,D} = group_by_proximity(Shards0, ZoneMap),
     % Prefer shards in the local zone over shards in a different zone,
     % but sort each zone separately to ensure a consistent choice between
     % nodes in the same zone.
-    Shards = choose_ushards(L ++ S) ++ choose_ushards(D),
+    Shards = choose_ushards(DbName, L ++ S) ++ choose_ushards(DbName, D),
     lists:ukeysort(#shard.range, Shards).
 
 get_shard(DbName, Node, Range) ->
@@ -215,13 +215,17 @@ group_by_proximity(Shards, ZoneMap) ->
     {SameZone, DifferentZone} = lists:partition(Fun, Remote),
     {Local, SameZone, DifferentZone}.
 
-choose_ushards(Shards) ->
-    Groups = group_by_range(lists:sort(Shards)),
+choose_ushards(DbName, Shards) ->
+    Groups = group_by_range(rotate_list(DbName, lists:sort(Shards))),
     Fun = fun(Group, {N, Acc}) ->
         {N+1, [lists:nth(1 + N rem length(Group), Group) | Acc]} end,
     {_, Result} = lists:foldl(Fun, {0, []}, Groups),
     Result.
 
+rotate_list(DbName, List) ->
+    {H, T} = lists:split(erlang:crc32(DbName) rem length(List), List),
+    T ++ H.
+
 group_by_range(Shards) ->
     Groups0 = lists:foldl(fun(#shard{range=Range}=Shard, Dict) ->
         orddict:append(Range, Shard, Dict) end, orddict:new(), Shards),

Reply via email to