Currently, Sheepdog blocks I/O operations when the target objects are
not recovered yet.  This patch recovers such objects first, and
reduces the time of blocking.

Signed-off-by: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp>
---
 sheep/store.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/sheep/store.c b/sheep/store.c
index e93f44d..abc2b30 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1021,7 +1021,8 @@ struct recovery_work {
 };
 
 static LIST_HEAD(recovery_work_list);
-static int recovering;
+static struct recovery_work *recovering_work;
+static uint64_t blocking_oid;
 
 static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int 
old_nr, int old_idx,
                         struct sheepdog_node_list_entry *cur_entry, int 
cur_nr, int cur_idx,
@@ -1238,9 +1239,20 @@ static void recover_one(struct work *work, int idx)
        int old_nr, cur_nr;
        uint32_t epoch = rw->epoch;
        int i, my_idx = -1, copy_idx = 0, cur_idx = -1;
+       int fd;
 
        eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
 
+       if (blocking_oid)
+               oid = blocking_oid; /* recover the blocked object first */
+
+       fd = ob_open(epoch, oid, 0, &ret);
+       if (fd != -1) {
+               /* the object is already recovered */
+               close(fd);
+               goto out;
+       }
+
        if (is_data_obj(oid))
                buf = malloc(SD_DATA_OBJ_SIZE);
        else
@@ -1341,9 +1353,36 @@ int is_recoverying_oid(uint64_t oid)
 {
        uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
        uint64_t recovering_hval = fnv_64a_buf(&recovering_oid, 
sizeof(uint64_t), FNV1A_64_INIT);
+       struct recovery_work *rw = recovering_work;
+       int ret, fd;
+
+       if (oid == 0)
+               return 0;
+
+       if (!rw)
+               return 0; /* there is no thread working for object recovery */
+
+       if (before(rw->epoch, sys->epoch))
+               return 1;
+
+       fd = ob_open(sys->epoch, oid, 0, &ret);
+       if (fd != -1) {
+               dprintf("the object %lx is already recoverd\n", oid);
+               close(fd);
+               return 0;
+       }
+
+       if (recovering_hval <= hval) {
+               if (bsearch(&oid, ((uint64_t *)rw->buf) + rw->done,
+                           rw->count - rw->done, sizeof(oid), obj_cmp)) {
+                       dprintf("recover the object %lx first\n", oid);
+                       blocking_oid = oid;
+                       return 1;
+               }
+       }
 
-       return before(sys->recovered_epoch, sys->epoch - 1) ||
-               (sys->recovered_epoch == sys->epoch - 1 && recovering_hval <= 
hval);
+       dprintf("the object %lx is not found\n", oid);
+       return 0;
 }
 
 static void recover_done(struct work *work, int idx)
@@ -1351,10 +1390,6 @@ static void recover_done(struct work *work, int idx)
        struct recovery_work *rw = container_of(work, struct recovery_work, 
work);
        uint64_t oid = *(((uint64_t *)rw->buf) + rw->done);
 
-       recovering_oid = 0;
-
-       resume_pending_requests();
-
        if (rw->retry && list_empty(&recovery_work_list)) {
                rw->retry = 0;
 
@@ -1364,6 +1399,8 @@ static void recover_done(struct work *work, int idx)
                return;
        }
 
+       blocking_oid = 0;
+
        if (rw->done < rw->count && list_empty(&recovery_work_list)) {
                rw->work.fn = recover_one;
 
@@ -1372,12 +1409,14 @@ static void recover_done(struct work *work, int idx)
                        return;
                }
                recovering_oid = oid;
+               resume_pending_requests();
                queue_work(&rw->work);
                return;
        }
 
        dprintf("recovery done, %"PRIu32"\n", rw->epoch);
-       recovering = 0;
+       recovering_oid = 0;
+       recovering_work = NULL;
 
        sys->recovered_epoch = rw->epoch;
        resume_pending_requests();
@@ -1391,7 +1430,7 @@ static void recover_done(struct work *work, int idx)
 
                list_del(&rw->rw_siblings);
 
-               recovering = 1;
+               recovering_work = rw;
                queue_work(&rw->work);
        }
 }
@@ -1582,10 +1621,10 @@ int start_recovery(uint32_t epoch)
        rw->work.fn = __start_recovery;
        rw->work.done = recover_done;
 
-       if (recovering)
+       if (recovering_work != NULL)
                list_add_tail(&rw->rw_siblings, &recovery_work_list);
        else {
-               recovering = 1;
+               recovering_work = rw;
                queue_work(&rw->work);
        }
 
-- 
1.5.6.5

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to