From: Liu Yuan <tailai...@taobao.com> We try to delay reclaim objects to avoid object ping-pong because the pulled object is clean and likely to be reclaimed in a cache over high watermark. We can't simply pass without waking up reclaimer because the cache is easy to be filled full with a read storm.
Signed-off-by: Liu Yuan <tailai...@taobao.com> --- sheep/object_cache.c | 39 ++++++++++++++++++++++++++++----------- sheep/ops.c | 2 +- sheep/sheep_priv.h | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/sheep/object_cache.c b/sheep/object_cache.c index 3383f13..a4b06e2 100644 --- a/sheep/object_cache.c +++ b/sheep/object_cache.c @@ -450,12 +450,21 @@ static void do_reclaim_object(struct object_cache *oc) pthread_yield(); } +struct reclaim_work { + struct work work; + int delay; +}; + static void do_reclaim(struct work *work) { + struct reclaim_work *rw = container_of(work, struct reclaim_work, + work); struct object_cache *cache; struct hlist_node *node; int i, j; + if (rw->delay) + sleep(rw->delay); /* We choose a random victim to avoid reclaim the same one every time */ j = random(); for (i = 0; i < HASH_SIZE; i++) { @@ -542,9 +551,9 @@ out: return cache; } -void object_cache_try_to_reclaim(void) +void object_cache_try_to_reclaim(int delay) { - struct work *work; + struct reclaim_work *rw; if (!sys->object_cache_size) return; @@ -556,10 +565,11 @@ void object_cache_try_to_reclaim(void) /* the cache is already in reclaim, */ return; - work = xzalloc(sizeof(struct work)); - work->fn = do_reclaim; - work->done = reclaim_done; - queue_work(sys->reclaim_wqueue, work); + rw = xzalloc(sizeof(struct reclaim_work)); + rw->delay = delay; + rw->work.fn = do_reclaim; + rw->work.done = reclaim_done; + queue_work(sys->reclaim_wqueue, &rw->work); } static inline struct object_cache_entry * @@ -634,7 +644,7 @@ static int object_cache_lookup(struct object_cache *oc, uint32_t idx, ret = SD_RES_EIO; } else { add_to_lru_cache(oc, idx, writeback); - object_cache_try_to_reclaim(); + object_cache_try_to_reclaim(0); } close(fd); @@ -737,14 +747,21 @@ static int object_cache_pull(struct object_cache *oc, uint32_t idx) ret = create_cache_object(oc, idx, buf, rsp->data_length, rsp->obj.offset, data_length); /* - * We don't try to reclaim objects to avoid object ping-pong + * We try to delay reclaim objects to avoid object ping-pong * because the pulled object is clean and likely to be reclaimed - * in a full cache. + * in a cache over high watermark. We can't simply pass without + * waking up reclaimer because the cache is easy to be filled + * full with a read storm. */ - if (ret == SD_RES_SUCCESS) + switch (ret) { + case SD_RES_SUCCESS: add_to_lru_cache(oc, idx, false); - else if (ret == SD_RES_OID_EXIST) + object_cache_try_to_reclaim(1); + break; + case SD_RES_OID_EXIST: ret = SD_RES_SUCCESS; + break; + } } free(buf); out: diff --git a/sheep/ops.c b/sheep/ops.c index f3e68a9..3cdadfb 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -642,7 +642,7 @@ static int local_set_cache_size(const struct sd_req *req, struct sd_rsp *rsp, uatomic_set(&sys->object_cache_size, cache_size); dprintf("Max cache size set to %dM\n", cache_size); - object_cache_try_to_reclaim(); + object_cache_try_to_reclaim(0); return SD_RES_SUCCESS; } diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 44380fc..b6a703f 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -383,7 +383,7 @@ void object_cache_format(void); bool bypass_object_cache(const struct request *req); bool object_is_cached(uint64_t oid); -void object_cache_try_to_reclaim(void); +void object_cache_try_to_reclaim(int); int object_cache_handle_request(struct request *req); int object_cache_write(uint64_t oid, char *data, unsigned int datalen, uint64_t offset, uint16_t flags, bool create); -- 1.7.9.5 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog