From: levin li <[email protected]> The new joined node doesn't have the vdi copy list, or have incomplete vdi copy list, so we need to fetch the copy list data from other nodes
Signed-off-by: levin li <[email protected]> --- include/internal_proto.h | 1 + sheep/group.c | 14 ++++++++- sheep/ops.c | 13 +++++++++ sheep/sheep_priv.h | 2 + sheep/vdi.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 2 deletions(-) diff --git a/include/internal_proto.h b/include/internal_proto.h index 83d98f1..3d70ba9 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -63,6 +63,7 @@ #define SD_OP_ENABLE_RECOVER 0xA8 #define SD_OP_DISABLE_RECOVER 0xA9 #define SD_OP_INFO_RECOVER 0xAA +#define SD_OP_GET_VDI_COPIES 0xAB /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 diff --git a/sheep/group.c b/sheep/group.c index 05ffb3e..b06a667 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -820,6 +820,17 @@ static void update_cluster_info(struct join_message *msg, eprintf("status = %d, epoch = %d, finished: %d\n", msg->cluster_status, msg->epoch, sys->join_finished); + if (node_eq(joined, &sys->this_node)) { + int i, ret; + for (i = 0; i < nr_nodes; i++) { + if (node_eq(nodes + i, joined)) + continue; + ret = fetch_vdi_copies_from(nodes + i); + if (ret == SD_RES_SUCCESS) + break; + } + } + sys->disable_recovery = msg->disable_recovery; if (!sys->join_finished) @@ -863,8 +874,7 @@ static void update_cluster_info(struct join_message *msg, nodes, nr_nodes); } - start_recovery(current_vnode_info, - old_vnode_info); + start_recovery(current_vnode_info, old_vnode_info); } else prepare_recovery(joined, nodes, nr_nodes); } diff --git a/sheep/ops.c b/sheep/ops.c index efaf979..d74ffb6 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -447,6 +447,13 @@ static int local_get_obj_list(struct request *req) (struct sd_list_rsp *)&req->rp, req->data); } +static int local_get_vdi_copies(struct request *req) +{ + req->rp.data_length = fill_vdi_copy_list(req->data); + + return SD_RES_SUCCESS; +} + static int local_get_epoch(struct request *req) { uint32_t epoch = req->rq.obj.tgt_epoch; @@ -989,6 +996,12 @@ static struct sd_op_template sd_ops[] = { .process_work = local_get_obj_list, }, + [SD_OP_GET_VDI_COPIES] = { + .name = "GET_VDI_COPIES", + .type = SD_OP_TYPE_LOCAL, + .process_work = local_get_vdi_copies, + }, + [SD_OP_GET_EPOCH] = { .name = "GET_EPOCH", .type = SD_OP_TYPE_LOCAL, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 3f763c4..46b89cd 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -207,6 +207,8 @@ int get_max_copy_number(void); int get_req_copy_number(struct request *req); int add_vdi_copies(uint32_t vid, int nr_copies); int load_vdi_copies(void); +int fetch_vdi_copies_from(struct sd_node *node); +int fill_vdi_copy_list(void *data); int vdi_exist(uint32_t vid); int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid); diff --git a/sheep/vdi.c b/sheep/vdi.c index dd3d3af..4ca7863 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -18,6 +18,11 @@ #include "sheepdog_proto.h" #include "sheep_priv.h" +struct vdi_copy { + uint32_t vid; + uint32_t nr_copies; +}; + struct vdi_copy_entry { uint32_t vid; unsigned int nr_copies; @@ -221,6 +226,69 @@ out: return ret; } +int fetch_vdi_copies_from(struct sd_node *node) +{ + char host[128]; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + struct vdi_copy *vc; + int fd, ret, i; + unsigned int wlen, rlen; + int count = 1 << 15; + + addr_to_str(host, sizeof(host), node->nid.addr, 0); + dprintf("fetch vdi copy list from %s:%d\n", host, node->nid.port); + fd = connect_to(host, node->nid.port); + if (fd < 0) { + dprintf("fail: %m\n"); + return SD_RES_NETWORK_ERROR; + } + + sd_init_req(&hdr, SD_OP_GET_VDI_COPIES); + hdr.epoch = sys->epoch; + hdr.data_length = count * sizeof(*vc); + rlen = hdr.data_length; + wlen = 0; + + vc = xzalloc(rlen); + + ret = exec_req(fd, &hdr, (char *)vc, &wlen, &rlen); + close(fd); + + if (ret || rsp->result != SD_RES_SUCCESS) { + eprintf("fail to get VDI copy list (%d, %d)\n", + ret, rsp->result); + goto out; + } + + count = rsp->data_length / sizeof(*vc); + for (i = 0; i < count; i++) + add_vdi_copies(vc[i].vid, vc[i].nr_copies); +out: + free(vc); + return ret; +} + +int fill_vdi_copy_list(void *data) +{ + int nr = 0; + struct rb_node *n; + struct vdi_copy *vc = data; + struct vdi_copy_entry *entry; + + pthread_rwlock_rdlock(&vdi_copy_lock); + for (n = rb_first(&vdi_copy_root); n; n = rb_next(n)) { + entry = rb_entry(n, struct vdi_copy_entry, node); + vc->vid = entry->vid; + vc->nr_copies = entry->nr_copies; + vc++; + nr++; + } + pthread_rwlock_unlock(&vdi_copy_lock); + + return nr * sizeof(*vc); +} + int vdi_exist(uint32_t vid) { struct sheepdog_inode *inode; -- 1.7.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
