[sheepdog] [PATCH 11/11] sheep: fix dead lock problem of cluster_force_recover

MORITA Kazutaka Tue, 16 Apr 2013 00:27:33 -0700

We cannot call exec_req in cluster_force_recover (in the main thread).
With this patch, sheep gets epoch info in the worker thread, and
notifies it to all the node.


Signed-off-by: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>
---
 collie/cluster.c   | 13 ++++++++---
 sheep/group.c      |  4 ++--
 sheep/ops.c        | 64 ++++++++++++++++++++++++++++++++++++++++--------------
 sheep/sheep_priv.h |  2 ++
 4 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index da2effa..0c5ac13 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -325,7 +325,9 @@ static int cluster_force_recover(int argc, char **argv)
 {
        int ret;
        struct sd_req hdr;
+       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
        char str[123] = {'\0'};
+       struct sd_node nodes[SD_MAX_NODES];
 
        if (!cluster_cmd_data.force) {
                int i, l;
@@ -341,10 +343,15 @@ static int cluster_force_recover(int argc, char **argv)
        }
 
        sd_init_req(&hdr, SD_OP_FORCE_RECOVER);
+       hdr.data_length = sizeof(nodes);
 
-       ret = send_light_req(&hdr, sdhost, sdport);
-       if (ret) {
-               fprintf(stderr, "failed to execute request\n");
+       ret = collie_exec_req(sdhost, sdport, &hdr, nodes);
+       if (ret < 0)
+               return EXIT_SYSFAIL;
+
+       if (rsp->result != SD_RES_SUCCESS) {
+               fprintf(stderr, "failed to execute request, %s\n",
+                       sd_strerror(rsp->result));
                return EXIT_FAILURE;
        }
 
diff --git a/sheep/group.c b/sheep/group.c
index e70389f..85fe8d5 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -166,8 +166,8 @@ void put_vnode_info(struct vnode_info *vnode_info)
        }
 }
 
-static struct vnode_info *alloc_vnode_info(const struct sd_node *nodes,
-                                          size_t nr_nodes)
+struct vnode_info *alloc_vnode_info(const struct sd_node *nodes,
+                                   size_t nr_nodes)
 {
        struct vnode_info *vnode_info;
 
diff --git a/sheep/ops.c b/sheep/ops.c
index a6494b7..96b25eb 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -480,13 +480,10 @@ static int local_get_epoch(struct request *req)
        return SD_RES_SUCCESS;
 }
 
-static int cluster_force_recover(const struct sd_req *req, struct sd_rsp *rsp,
-                               void *data)
+static int cluster_force_recover_work(struct request *req)
 {
-       struct vnode_info *old_vnode_info, *vnode_info;
-       int ret = SD_RES_SUCCESS;
-       uint8_t c;
-       uint16_t f;
+       struct vnode_info *old_vnode_info;
+       uint32_t epoch = sys_epoch();
 
        /*
         * We should manually recover the cluster when
@@ -494,8 +491,48 @@ static int cluster_force_recover(const struct sd_req *req, 
struct sd_rsp *rsp,
         * 2) some nodes are physically down (same epoch condition).
         * In both case, the nodes(s) stat is WAIT_FOR_JOIN.
         */
-       if (sys->status != SD_STATUS_WAIT_FOR_JOIN)
+       if (sys->status != SD_STATUS_WAIT_FOR_JOIN || req->vinfo == NULL)
+               return SD_RES_FORCE_RECOVER;
+
+       old_vnode_info = get_vnode_info_epoch(epoch, req->vinfo);
+       if (!old_vnode_info) {
+               sd_printf(SDOG_EMERG, "cannot get vnode info for epoch %d",
+                         epoch);
+               put_vnode_info(old_vnode_info);
                return SD_RES_FORCE_RECOVER;
+       }
+
+       if (req->rq.data_length <
+           sizeof(*old_vnode_info->nodes) * old_vnode_info->nr_nodes) {
+               sd_eprintf("too small buffer size, %d", req->rq.data_length);
+               return SD_RES_INVALID_PARMS;
+       }
+
+       req->rp.epoch = epoch;
+       req->rp.data_length = sizeof(*old_vnode_info->nodes) *
+               old_vnode_info->nr_nodes;
+       memcpy(req->data, old_vnode_info->nodes, req->rp.data_length);
+
+       put_vnode_info(old_vnode_info);
+
+       return SD_RES_SUCCESS;
+}
+
+static int cluster_force_recover_main(const struct sd_req *req,
+                                     struct sd_rsp *rsp,
+                                     void *data)
+{
+       struct vnode_info *old_vnode_info, *vnode_info;
+       int ret = SD_RES_SUCCESS;
+       uint8_t c;
+       uint16_t f;
+       struct sd_node *nodes = data;
+       size_t nr_nodes = rsp->data_length / sizeof(*nodes);
+
+       if (rsp->epoch != sys->epoch) {
+               sd_eprintf("epoch was incremented while cluster_force_recover");
+               return SD_RES_FORCE_RECOVER;
+       }
 
        ret = get_cluster_copies(&c);
        if (ret) {
@@ -511,14 +548,6 @@ static int cluster_force_recover(const struct sd_req *req, 
struct sd_rsp *rsp,
        sys->nr_copies = c;
        sys->flags = f;
 
-       vnode_info = get_vnode_info();
-       old_vnode_info = get_vnode_info_epoch(sys->epoch, vnode_info);
-       if (!old_vnode_info) {
-               sd_printf(SDOG_EMERG, "cannot get vnode info for epoch %d",
-                         sys->epoch);
-               goto err;
-       }
-
        sys->epoch++; /* some nodes are left, so we get a new epoch */
        ret = log_current_epoch();
        if (ret) {
@@ -531,6 +560,8 @@ static int cluster_force_recover(const struct sd_req *req, 
struct sd_rsp *rsp,
        else
                sys->status = SD_STATUS_HALT;
 
+       vnode_info = get_vnode_info();
+       old_vnode_info = alloc_vnode_info(nodes, nr_nodes);
        start_recovery(vnode_info, old_vnode_info);
        put_vnode_info(vnode_info);
        put_vnode_info(old_vnode_info);
@@ -993,7 +1024,8 @@ static struct sd_op_template sd_ops[] = {
                .name = "FORCE_RECOVER",
                .type = SD_OP_TYPE_CLUSTER,
                .force = true,
-               .process_main = cluster_force_recover,
+               .process_work = cluster_force_recover_work,
+               .process_main = cluster_force_recover_main,
        },
 
        [SD_OP_SNAPSHOT] = {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7722473..8f33e40 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -274,6 +274,8 @@ bool have_enough_zones(void);
 struct vnode_info *grab_vnode_info(struct vnode_info *vnode_info);
 struct vnode_info *get_vnode_info(void);
 void put_vnode_info(struct vnode_info *vinfo);
+struct vnode_info *alloc_vnode_info(const struct sd_node *nodes,
+                                   size_t nr_nodes);
 struct vnode_info *get_vnode_info_epoch(uint32_t epoch,
                                        struct vnode_info *cur_vinfo);
 void wait_get_vdis_done(void);
-- 
1.8.1.3.566.gaa39828

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

[sheepdog] [PATCH 11/11] sheep: fix dead lock problem of cluster_force_recover

Reply via email to