From: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>

This prepares for the succeeding patches.

Signed-off-by: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>
---
 sheep/recovery.c |  150 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 79 insertions(+), 71 deletions(-)

diff --git a/sheep/recovery.c b/sheep/recovery.c
index 4b3455a..1429391 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -67,117 +67,123 @@ static int obj_cmp(const void *oid1, const void *oid2)
        return 0;
 }
 
-static int recover_object_from_replica(uint64_t oid,
-                                      const struct sd_vnode *vnode,
+/*
+ * A virtual node that does not match any node in current node list
+ * means the node has left the cluster, then it's an invalid virtual node.
+ */
+static bool is_invalid_vnode(const struct sd_vnode *entry,
+                            struct sd_node *nodes, int nr_nodes)
+{
+       if (bsearch(entry, nodes, nr_nodes, sizeof(struct sd_node),
+                   node_id_cmp))
+               return false;
+       return true;
+}
+
+static int recover_object_from_replica(uint64_t oid, struct vnode_info *old,
+                                      struct vnode_info *cur,
                                       uint32_t epoch, uint32_t tgt_epoch)
 {
        struct sd_req hdr;
        struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
        unsigned rlen;
-       int ret = SD_RES_NO_MEM;
+       int nr_copies, ret;
        void *buf = NULL;
        struct siocb iocb = { 0 };
 
-       if (vnode_is_local(vnode) && tgt_epoch < sys_epoch()) {
-               ret = sd_store->link(oid, tgt_epoch);
-               goto out;
-       }
-
        rlen = get_objsize(oid);
        buf = valloc(rlen);
        if (!buf) {
+               ret = SD_RES_NO_MEM;
                sd_eprintf("%m");
                goto out;
        }
 
-       sd_init_req(&hdr, SD_OP_READ_PEER);
-       hdr.epoch = epoch;
-       hdr.flags = SD_FLAG_CMD_RECOVERY;
-       hdr.data_length = rlen;
-       hdr.obj.oid = oid;
-       hdr.obj.tgt_epoch = tgt_epoch;
+       /* Let's do a breadth-first search */
+       nr_copies = get_obj_copy_number(oid, old->nr_zones);
+       for (int i = 0; i < nr_copies; i++) {
+               const struct sd_vnode *vnode;
 
-       ret = sheep_exec_req(&vnode->nid, &hdr, buf);
-       if (ret != SD_RES_SUCCESS)
-               goto out;
-       iocb.epoch = epoch;
-       iocb.length = rsp->data_length;
-       iocb.offset = rsp->obj.offset;
-       iocb.buf = buf;
-       ret = sd_store->create_and_write(oid, &iocb);
-out:
-       if (ret == SD_RES_SUCCESS) {
-               sd_dprintf("recovered oid %"PRIx64" from %d to epoch %d", oid,
-                       tgt_epoch, epoch);
-               objlist_cache_insert(oid);
+               vnode = oid_to_vnode(old->vnodes, old->nr_vnodes, oid, i);
+
+               if (is_invalid_vnode(vnode, cur->nodes, cur->nr_nodes))
+                       continue;
+
+               if (vnode_is_local(vnode) && tgt_epoch < sys_epoch()) {
+                       ret = sd_store->link(oid, tgt_epoch);
+                       goto out;
+               }
+
+               sd_init_req(&hdr, SD_OP_READ_PEER);
+               hdr.epoch = epoch;
+               hdr.flags = SD_FLAG_CMD_RECOVERY;
+               hdr.data_length = rlen;
+               hdr.obj.oid = oid;
+               hdr.obj.tgt_epoch = tgt_epoch;
+
+               ret = sheep_exec_req(&vnode->nid, &hdr, buf);
+               if (ret == SD_RES_SUCCESS) {
+                       iocb.epoch = epoch;
+                       iocb.length = rsp->data_length;
+                       iocb.offset = rsp->obj.offset;
+                       iocb.buf = buf;
+                       ret = sd_store->create_and_write(oid, &iocb);
+               }
+
+               switch (ret) {
+               case SD_RES_SUCCESS:
+                       sd_dprintf("recovered oid %"PRIx64" from %d to epoch 
%d", oid,
+                                  tgt_epoch, epoch);
+                       objlist_cache_insert(oid);
+                       goto out;
+               case SD_RES_OLD_NODE_VER:
+                       /* move to the next epoch recovery */
+                       goto out;
+               default:
+                       break;
+               }
        }
+out:
        free(buf);
        return ret;
 }
 
 /*
- * A virtual node that does not match any node in current node list
- * means the node has left the cluster, then it's an invalid virtual node.
- */
-static bool is_invalid_vnode(const struct sd_vnode *entry,
-                            struct sd_node *nodes, int nr_nodes)
-{
-       if (bsearch(entry, nodes, nr_nodes, sizeof(struct sd_node),
-                   node_id_cmp))
-               return false;
-       return true;
-}
-
-/*
  * Recover the object from its track in epoch history. That is,
  * the routine will try to recovery it from the nodes it has stayed,
  * at least, *theoretically* on consistent hash ring.
  */
 static int do_recover_object(struct recovery_work *rw)
 {
-       struct vnode_info *old;
+       struct vnode_info *old, *cur;
        uint64_t oid = rw->oids[rw->done];
        uint32_t epoch = rw->epoch, tgt_epoch = rw->epoch;
-       int nr_copies, ret, i;
+       int ret;
+       struct vnode_info *new_old;
 
        old = grab_vnode_info(rw->old_vinfo);
-
+       cur = grab_vnode_info(rw->cur_vinfo);
 again:
        sd_dprintf("try recover object %"PRIx64" from epoch %"PRIu32, oid,
                   tgt_epoch);
 
-       /* Let's do a breadth-first search */
-       nr_copies = get_obj_copy_number(oid, old->nr_zones);
-       for (i = 0; i < nr_copies; i++) {
-               const struct sd_vnode *tgt_vnode;
-
-               tgt_vnode = oid_to_vnode(old->vnodes, old->nr_vnodes, oid, i);
-
-               if (is_invalid_vnode(tgt_vnode, rw->cur_vinfo->nodes,
-                                    rw->cur_vinfo->nr_nodes))
-                       continue;
-               ret = recover_object_from_replica(oid, tgt_vnode,
-                                                 epoch, tgt_epoch);
-               if (ret == SD_RES_SUCCESS) {
-                       /* Succeed */
-                       break;
-               } else if (SD_RES_OLD_NODE_VER == ret) {
-                       rw->stop = true;
-                       goto err;
-               } else
-                       ret = -1;
-       }
-
-       /* No luck, roll back to an older configuration and try again */
-       if (ret < 0) {
-               struct vnode_info *new_old;
+       ret = recover_object_from_replica(oid, old, cur, epoch, tgt_epoch);
 
+       switch (ret) {
+       case SD_RES_SUCCESS:
+               /* Succeed */
+               break;
+       case SD_RES_OLD_NODE_VER:
+               rw->stop = true;
+               break;
+       default:
+               /* No luck, roll back to an older configuration and try again */
 rollback:
                tgt_epoch--;
                if (tgt_epoch < 1) {
                        sd_eprintf("can not recover oid %"PRIx64, oid);
                        ret = -1;
-                       goto err;
+                       break;
                }
 
                new_old = get_vnode_info_epoch(tgt_epoch, rw->cur_vinfo);
@@ -185,12 +191,14 @@ rollback:
                        /* We rollback in case we don't get a valid epoch */
                        goto rollback;
 
-               put_vnode_info(old);
+               put_vnode_info(cur);
+               cur = old;
                old = new_old;
                goto again;
        }
-err:
+
        put_vnode_info(old);
+       put_vnode_info(cur);
        return ret;
 }
 
-- 
1.7.9.5

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to