Currently, Sheepdog blocks I/O operations when the target objects are not recovered yet. This patch recovers such objects first, and reduces the time of blocking.
Signed-off-by: MORITA Kazutaka <morita.kazut...@lab.ntt.co.jp> --- sheep/store.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 50 insertions(+), 11 deletions(-) diff --git a/sheep/store.c b/sheep/store.c index e93f44d..abc2b30 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -1021,7 +1021,8 @@ struct recovery_work { }; static LIST_HEAD(recovery_work_list); -static int recovering; +static struct recovery_work *recovering_work; +static uint64_t blocking_oid; static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr, int old_idx, struct sheepdog_node_list_entry *cur_entry, int cur_nr, int cur_idx, @@ -1238,9 +1239,20 @@ static void recover_one(struct work *work, int idx) int old_nr, cur_nr; uint32_t epoch = rw->epoch; int i, my_idx = -1, copy_idx = 0, cur_idx = -1; + int fd; eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid); + if (blocking_oid) + oid = blocking_oid; /* recover the blocked object first */ + + fd = ob_open(epoch, oid, 0, &ret); + if (fd != -1) { + /* the object is already recovered */ + close(fd); + goto out; + } + if (is_data_obj(oid)) buf = malloc(SD_DATA_OBJ_SIZE); else @@ -1341,9 +1353,36 @@ int is_recoverying_oid(uint64_t oid) { uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT); uint64_t recovering_hval = fnv_64a_buf(&recovering_oid, sizeof(uint64_t), FNV1A_64_INIT); + struct recovery_work *rw = recovering_work; + int ret, fd; + + if (oid == 0) + return 0; + + if (!rw) + return 0; /* there is no thread working for object recovery */ + + if (before(rw->epoch, sys->epoch)) + return 1; + + fd = ob_open(sys->epoch, oid, 0, &ret); + if (fd != -1) { + dprintf("the object %lx is already recoverd\n", oid); + close(fd); + return 0; + } + + if (recovering_hval <= hval) { + if (bsearch(&oid, ((uint64_t *)rw->buf) + rw->done, + rw->count - rw->done, sizeof(oid), obj_cmp)) { + dprintf("recover the object %lx first\n", oid); + blocking_oid = oid; + return 1; + } + } - return before(sys->recovered_epoch, sys->epoch - 1) || - (sys->recovered_epoch == sys->epoch - 1 && recovering_hval <= hval); + dprintf("the object %lx is not found\n", oid); + return 0; } static void recover_done(struct work *work, int idx) @@ -1351,10 +1390,6 @@ static void recover_done(struct work *work, int idx) struct recovery_work *rw = container_of(work, struct recovery_work, work); uint64_t oid = *(((uint64_t *)rw->buf) + rw->done); - recovering_oid = 0; - - resume_pending_requests(); - if (rw->retry && list_empty(&recovery_work_list)) { rw->retry = 0; @@ -1364,6 +1399,8 @@ static void recover_done(struct work *work, int idx) return; } + blocking_oid = 0; + if (rw->done < rw->count && list_empty(&recovery_work_list)) { rw->work.fn = recover_one; @@ -1372,12 +1409,14 @@ static void recover_done(struct work *work, int idx) return; } recovering_oid = oid; + resume_pending_requests(); queue_work(&rw->work); return; } dprintf("recovery done, %"PRIu32"\n", rw->epoch); - recovering = 0; + recovering_oid = 0; + recovering_work = NULL; sys->recovered_epoch = rw->epoch; resume_pending_requests(); @@ -1391,7 +1430,7 @@ static void recover_done(struct work *work, int idx) list_del(&rw->rw_siblings); - recovering = 1; + recovering_work = rw; queue_work(&rw->work); } } @@ -1582,10 +1621,10 @@ int start_recovery(uint32_t epoch) rw->work.fn = __start_recovery; rw->work.done = recover_done; - if (recovering) + if (recovering_work != NULL) list_add_tail(&rw->rw_siblings, &recovery_work_list); else { - recovering = 1; + recovering_work = rw; queue_work(&rw->work); } -- 1.5.6.5 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog