From: levin li <[email protected]> When epoch changes, new node join or old node leave, we should recalculate the vnode_info for every sd_node, and the disk space is stored in sd_node, transfered to every other node together with join message.
Signed-off-by: levin li <[email protected]> --- include/internal_proto.h | 1 + sheep/group.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 0 deletions(-) diff --git a/include/internal_proto.h b/include/internal_proto.h index 3e22124..d48c8af 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -174,6 +174,7 @@ struct sd_node { struct node_id nid; uint16_t nr_vnodes; uint32_t zone; + uint32_t space; }; struct epoch_log { diff --git a/sheep/group.c b/sheep/group.c index 16cbdaf..960987f 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -772,6 +772,24 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t nr_nodes) queue_work(sys->block_wqueue, &w->work); } +static void recalculate_vnodes(struct sd_node *nodes, int nr_nodes) +{ + int i; + uint64_t avg_size = 0; + float factor; + + for (i = 0; i < nr_nodes; i++) + avg_size += nodes[i].space; + avg_size /= nr_nodes; + + for (i = 0;i < nr_nodes; i++) { + factor = (float)nodes[i].space / (float)avg_size; + nodes[i].nr_vnodes = SD_DEFAULT_VNODES * factor; + dprintf("node %d has %d vnodes, free space %" PRIu32 "\n", + nodes[i].nid.port, nodes[i].nr_vnodes, nodes[i].space); + } +} + static void update_cluster_info(struct join_message *msg, struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) @@ -784,6 +802,8 @@ static void update_cluster_info(struct join_message *msg, if (!sys->join_finished) finish_join(msg, joined, nodes, nr_nodes); + recalculate_vnodes(nodes, nr_nodes); + old_vnode_info = current_vnode_info; current_vnode_info = alloc_vnode_info(nodes, nr_nodes); @@ -1090,6 +1110,8 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members, if (sys->status == SD_STATUS_SHUTDOWN) return; + recalculate_vnodes(members, nr_members); + old_vnode_info = current_vnode_info; current_vnode_info = alloc_vnode_info(members, nr_members); @@ -1151,6 +1173,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes, sys->this_node.zone = zone; dprintf("zone id = %u\n", sys->this_node.zone); + sys->this_node.space = sys->disk_space; + if (get_latest_epoch() > 0) { sys->status = SD_STATUS_WAIT_FOR_JOIN; -- 1.7.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
