HELIX-148: Current preferred placement for auto rebalace is suboptimal for n > p
Project: http://git-wip-us.apache.org/repos/asf/incubator-helix/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-helix/commit/937bc9aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-helix/tree/937bc9aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-helix/diff/937bc9aa Branch: refs/heads/helix_api_refactoring Commit: 937bc9aa06cccb7b021972b0a904f7d6dc791d78 Parents: 04ca91b Author: zzhang <[email protected]> Authored: Mon Jul 29 16:18:47 2013 -0700 Committer: zzhang <[email protected]> Committed: Mon Jul 29 16:18:47 2013 -0700 ---------------------------------------------------------------------- .../controller/strategy/AutoRebalanceStrategy.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/937bc9aa/helix-core/src/main/java/org/apache/helix/controller/strategy/AutoRebalanceStrategy.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/controller/strategy/AutoRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/strategy/AutoRebalanceStrategy.java index ee398d2..58f5f17 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/strategy/AutoRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/strategy/AutoRebalanceStrategy.java @@ -420,13 +420,23 @@ public class AutoRebalanceStrategy implements Rebalancer { Map<Replica, Node> preferredMapping; preferredMapping = new HashMap<Replica, Node>(); int partitionId = 0; - for (String partition : _partitions) { int replicaId = 0; for (String state : _states.keySet()) { for (int i = 0; i < _states.get(state); i++) { Replica replica = new Replica(partition, state, i); - int index = (partitionId + replicaId) % allNodes.size(); + int index; + if (allNodes.size() > _partitions.size()) { + // assign replicas in partition order in case there are more nodes than partitions + index = (partitionId + replicaId * _partitions.size()) % allNodes.size(); + } else if (allNodes.size() == _partitions.size()) { + // need a replica offset in case the sizes of these sets are the same + index = ((partitionId + replicaId * _partitions.size()) % allNodes.size() + + replicaId) % allNodes.size(); + } else { + // in all other cases, assigning a replica at a time for each partition is reasonable + index = (partitionId + replicaId) % allNodes.size(); + } preferredMapping.put(replica, _nodeMap.get(allNodes.get(index))); replicaId = replicaId + 1; }
