From: Yunkai Zhang <qiushu....@taobao.com> Sheep master will fill joining nodes in join_message when a sheep joins.
Newly added sheep get joining nodes from join_message and use it to initialize and recalculate all_nodes array which will be used to generate current_vnode_info in cluster_enable_recover(). Signed-off-by: Yunkai Zhang <qiushu....@taobao.com> --- include/internal_proto.h | 10 ++++++---- sheep/group.c | 39 +++++++++++++++++++++++++++++++-------- sheep/ops.c | 1 + 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/include/internal_proto.h b/include/internal_proto.h index 83d98f1..717fb79 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -194,6 +194,7 @@ struct join_message { uint16_t nr_nodes; uint16_t nr_failed_nodes; uint16_t nr_delayed_nodes; + uint16_t nr_joining_nodes; uint16_t cluster_flags; uint32_t cluster_status; uint32_t epoch; @@ -204,10 +205,11 @@ struct join_message { /* * A joining sheep puts the local node list here, which is nr_nodes - * entries long. After the master replies it will contain the list of - * nodes that attempted to join but failed the join process. The - * number of entries in that case is nr_failed_nodes, which by - * defintion must be smaller than nr_nodes. + * entries long. After the master replies it will contain the list of + * nodes by following order: + * [ failed nodes ]: size = nr_failed_nodes + * [ delayed nodes ]: size = nr_delayed_nodes + * [ joining nodes ]: size = nr_joining_nodes */ struct sd_node nodes[]; }; diff --git a/sheep/group.c b/sheep/group.c index cb244f7..ad3447d 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -767,16 +767,32 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t nr_nodes) queue_work(sys->block_wqueue, &w->work); } -static void prepare_recovery(struct sd_node *joined, - struct sd_node *nodes, size_t nr_nodes) +static void prepare_recovery(struct join_message *jm, + struct sd_node *joined, + struct sd_node *nodes, size_t nr_nodes) { - int i; + int i, j, n, found; joining_nodes[nr_joining_nodes++] = *joined; + if (!nr_all_nodes) { - /* exclude the newly added one */ - for (i = 0; i < nr_nodes; i++) { - if (!node_eq(nodes + i, joined)) + /* initialize joining_nodes */ + n = jm->nr_failed_nodes + jm->nr_delayed_nodes; + memcpy(&joining_nodes[nr_joining_nodes], &jm->nodes[n], + jm->nr_joining_nodes * sizeof(*joining_nodes)); + nr_joining_nodes += jm->nr_joining_nodes; + + /* initialize all_nodes */ + for (found = 0, i = 0; i < nr_nodes; i++) { + /* exclude all joining nodes */ + for (j = 0; j < nr_joining_nodes; j++) { + if (node_eq(nodes + i, joining_nodes + j)) { + found = 1; + break; + } + } + + if (!found) all_nodes[nr_all_nodes++] = nodes[i]; } } @@ -866,7 +882,7 @@ static void update_cluster_info(struct join_message *msg, start_recovery(current_vnode_info, old_vnode_info); } else - prepare_recovery(joined, nodes, nr_nodes); + prepare_recovery(msg, joined, nodes, nr_nodes); } if (have_enough_zones()) @@ -1013,7 +1029,14 @@ enum cluster_join_result sd_check_join_cb(struct sd_node *joining, (ret == CJ_RES_SUCCESS || ret == CJ_RES_JOIN_LATER)) format_exceptional_node_list(jm); - n = jm->nr_failed_nodes + jm->nr_delayed_nodes; + if (sys->disable_recovery) { + n = jm->nr_failed_nodes + jm->nr_delayed_nodes; + memcpy(&jm->nodes[n], joining_nodes, + nr_joining_nodes * sizeof(*joining_nodes)); + jm->nr_joining_nodes = nr_joining_nodes; + } + + n = jm->nr_failed_nodes + jm->nr_delayed_nodes + jm->nr_joining_nodes; *opaque_len = sizeof(*jm) + n * sizeof(jm->nodes[0]); return ret; } diff --git a/sheep/ops.c b/sheep/ops.c index 0cddf66..b2e9c69 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -292,6 +292,7 @@ static int cluster_enable_recover(const struct sd_req *req, put_vnode_info(old_vnode_info); } + nr_all_nodes = 0; nr_joining_nodes = 0; sys->disable_recovery = 0; return SD_RES_SUCCESS; -- 1.7.11.2 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog