From: levin li <xingke....@taobao.com> In gateway_read{write,create_and_write}_obj, and read{write}_object, we should use different copies number for different requests and objects, instead of using the global copies number sys->nr_copies or calculated from vnodes
Signed-off-by: levin li <xingke....@taobao.com> --- sheep/farm/trunk.c | 2 +- sheep/gateway.c | 4 +- sheep/ops.c | 2 +- sheep/recovery.c | 4 +- sheep/request.c | 2 +- sheep/sheep_priv.h | 6 ++-- sheep/store.c | 9 +++++-- sheep/vdi.c | 56 +++++++++++++++++++++++++++++++++------------------ 8 files changed, 52 insertions(+), 33 deletions(-) diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c index cd1fd20..1bfb6b3 100644 --- a/sheep/farm/trunk.c +++ b/sheep/farm/trunk.c @@ -245,7 +245,7 @@ static int oid_stale(uint64_t oid) struct sd_vnode *obj_vnodes[SD_MAX_COPIES]; vinfo = get_vnode_info(); - nr_copies = get_nr_copies(vinfo); + nr_copies = get_obj_copy_number(oid); oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid, nr_copies, obj_vnodes); diff --git a/sheep/gateway.c b/sheep/gateway.c index bdbd08c..41d712b 100644 --- a/sheep/gateway.c +++ b/sheep/gateway.c @@ -35,7 +35,7 @@ int gateway_read_obj(struct request *req) if (sys->enable_write_cache && !req->local && !bypass_object_cache(req)) return object_cache_handle_request(req); - nr_copies = get_nr_copies(req->vinfo); + nr_copies = get_req_copy_number(req); oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid, nr_copies, obj_vnodes); for (i = 0; i < nr_copies; i++) { @@ -245,7 +245,7 @@ static int gateway_forward_request(struct request *req) write_info_init(&wi); wlen = hdr.data_length; - nr_copies = get_nr_copies(req->vinfo); + nr_copies = get_req_copy_number(req); oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid, nr_copies, obj_vnodes); diff --git a/sheep/ops.c b/sheep/ops.c index ce0f8a4..faa50b5 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -664,7 +664,7 @@ static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch, char name[128]; int rounded_rand, local = -1; - nr_copies = get_nr_copies(vnodes); + nr_copies = get_obj_copy_number(oid); oid_to_vnodes(vnodes->vnodes, vnodes->nr_vnodes, oid, nr_copies, obj_vnodes); diff --git a/sheep/recovery.c b/sheep/recovery.c index 3fdcad2..060730b 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -172,7 +172,7 @@ again: oid, tgt_epoch); /* Let's do a breadth-first search */ - nr_copies = get_nr_copies(old); + nr_copies = get_obj_copy_number(oid); for (i = 0; i < nr_copies; i++) { struct sd_vnode *tgt_vnode = oid_to_vnode(old->vnodes, old->nr_vnodes, @@ -515,8 +515,8 @@ static void screen_object_list(struct recovery_work *rw, int nr_objs; int i, j; - nr_objs = get_nr_copies(rw->cur_vinfo); for (i = 0; i < nr_oids; i++) { + nr_objs = get_obj_copy_number(oids[i]); oid_to_vnodes(rw->cur_vinfo->vnodes, rw->cur_vinfo->nr_vnodes, oids[i], nr_objs, vnodes); for (j = 0; j < nr_objs; j++) { diff --git a/sheep/request.c b/sheep/request.c index ab7c63a..ce4315b 100644 --- a/sheep/request.c +++ b/sheep/request.c @@ -30,7 +30,7 @@ static int is_access_local(struct request *req, uint64_t oid) int nr_copies; int i; - nr_copies = get_nr_copies(req->vinfo); + nr_copies = get_req_copy_number(req); oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid, nr_copies, obj_vnodes); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 335e337..ec2b4dc 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -282,10 +282,10 @@ int is_recovery_init(void); int node_in_recovery(void); int write_object(uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, uint16_t flags, int create); + uint64_t offset, uint16_t flags, int create, int nr_copies); int read_object(uint64_t oid, char *data, unsigned int datalen, - uint64_t offset); -int remove_object(uint64_t oid); + uint64_t offset, int nr_copies); +int remove_object(uint64_t oid, int nr_copies); int exec_local_req(struct sd_req *rq, void *data); void local_req_init(void); diff --git a/sheep/store.c b/sheep/store.c index bebb2c0..4839d13 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -548,7 +548,7 @@ int init_store(const char *d, int enable_write_cache) * Write data to both local object cache (if enabled) and backends */ int write_object(uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, uint16_t flags, int create) + uint64_t offset, uint16_t flags, int create, int nr_copies) { struct sd_req hdr; int ret; @@ -576,6 +576,7 @@ forward_write: hdr.obj.oid = oid; hdr.obj.offset = offset; + hdr.obj.copies = nr_copies; ret = exec_local_req(&hdr, data); if (ret != SD_RES_SUCCESS) @@ -589,7 +590,7 @@ forward_write: * try read backends */ int read_object(uint64_t oid, char *data, unsigned int datalen, - uint64_t offset) + uint64_t offset, int nr_copies) { struct sd_req hdr; int ret; @@ -609,6 +610,7 @@ forward_read: hdr.data_length = datalen; hdr.obj.oid = oid; hdr.obj.offset = offset; + hdr.obj.copies = nr_copies; ret = exec_local_req(&hdr, data); if (ret != SD_RES_SUCCESS) @@ -617,13 +619,14 @@ forward_read: return ret; } -int remove_object(uint64_t oid) +int remove_object(uint64_t oid, int copies) { struct sd_req hdr; int ret; sd_init_req(&hdr, SD_OP_REMOVE_OBJ); hdr.obj.oid = oid; + hdr.obj.copies = copies; ret = exec_local_req(&hdr, NULL); if (ret != SD_RES_SUCCESS) diff --git a/sheep/vdi.c b/sheep/vdi.c index 72fbd7b..c39074f 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -300,6 +300,7 @@ int vdi_exist(uint32_t vid) { struct sheepdog_inode *inode; int ret = 1; + int nr_copies; inode = zalloc(sizeof(*inode)); if (!inode) { @@ -307,8 +308,10 @@ int vdi_exist(uint32_t vid) goto out; } + nr_copies = get_vdi_copy_number(vid); + ret = read_object(vid_to_vdi_oid(vid), (char *)inode, - sizeof(*inode), 0); + sizeof(*inode), 0, nr_copies); if (ret != SD_RES_SUCCESS) { eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid); ret = 0; @@ -359,7 +362,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid, if (iocb->base_vid) { ret = read_object(vid_to_vdi_oid(iocb->base_vid), (char *)base, - sizeof(*base), 0); + sizeof(*base), 0, iocb->nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_BASE_VDI_READ; goto out; @@ -374,7 +377,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid, name, cur_vid, iocb->base_vid); ret = read_object(vid_to_vdi_oid(cur_vid), (char *)cur, - SD_INODE_HEADER_SIZE, 0); + SD_INODE_HEADER_SIZE, 0, iocb->nr_copies); if (ret != SD_RES_SUCCESS) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_READ; @@ -416,7 +419,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid, if (iocb->is_snapshot && cur_vid != iocb->base_vid) { ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur, - SD_INODE_HEADER_SIZE, 0, 0, 0); + SD_INODE_HEADER_SIZE, 0, 0, 0, + iocb->nr_copies); if (ret != 0) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_READ; @@ -426,7 +430,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid, if (iocb->base_vid) { ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base, - SD_INODE_HEADER_SIZE, 0, 0, 0); + SD_INODE_HEADER_SIZE, 0, 0, 0, + iocb->nr_copies); if (ret != 0) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_WRITE; @@ -435,7 +440,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid, } ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new), - 0, 0, 1); + 0, 0, 1, iocb->nr_copies); if (ret != 0) ret = SD_RES_VDI_WRITE; @@ -455,6 +460,7 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name, unsigned long i; int ret = SD_RES_NO_MEM; int vdi_found = 0; + int nr_copies; inode = malloc(SD_INODE_HEADER_SIZE); if (!inode) { @@ -463,8 +469,9 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name, } for (i = start; i >= end; i--) { + nr_copies = get_vdi_copy_number(i); ret = read_object(vid_to_vdi_oid(i), (char *)inode, - SD_INODE_HEADER_SIZE, 0); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_EIO; goto out_free_inode; @@ -662,6 +669,7 @@ struct deletion_work { struct request *req; uint32_t vid; + int nr_copies; int count; uint32_t *buf; @@ -681,7 +689,7 @@ static int delete_inode(struct deletion_work *dw) } ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0); + SD_INODE_HEADER_SIZE, 0, dw->nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_EIO; goto out; @@ -690,7 +698,7 @@ static int delete_inode(struct deletion_work *dw) memset(inode->name, 0, sizeof(inode->name)); ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0, 0, 0); + SD_INODE_HEADER_SIZE, 0, 0, 0, dw->nr_copies); if (ret != 0) { ret = SD_RES_EIO; goto out; @@ -723,6 +731,7 @@ static void delete_one(struct work *work) uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1); int ret, i, nr_deleted; struct sheepdog_inode *inode = NULL; + int nr_copies; eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id); @@ -732,8 +741,9 @@ static void delete_one(struct work *work) goto out; } + nr_copies = get_vdi_copy_number(vdi_id); ret = read_object(vid_to_vdi_oid(vdi_id), - (void *)inode, sizeof(*inode), 0); + (void *)inode, sizeof(*inode), 0, nr_copies); if (ret != SD_RES_SUCCESS) { eprintf("cannot find VDI object\n"); @@ -757,7 +767,7 @@ static void delete_one(struct work *work) continue; } - ret = remove_object(oid); + ret = remove_object(oid, nr_copies); if (ret != SD_RES_SUCCESS) eprintf("remove object %" PRIx64 " fail, %d\n", oid, ret); @@ -774,7 +784,7 @@ static void delete_one(struct work *work) memset(inode->name, 0, sizeof(inode->name)); write_object(vid_to_vdi_oid(vdi_id), (void *)inode, - sizeof(*inode), 0, 0, 0); + sizeof(*inode), 0, 0, 0, nr_copies); out: free(inode); } @@ -811,6 +821,7 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid) struct sheepdog_inode *inode = NULL; int done = dw->count; uint32_t vid; + int nr_copies; inode = malloc(SD_INODE_HEADER_SIZE); if (!inode) { @@ -821,8 +832,9 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid) dw->buf[dw->count++] = root_vid; again: vid = dw->buf[done++]; + nr_copies = get_vdi_copy_number(vid); ret = read_object(vid_to_vdi_oid(vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (ret != SD_RES_SUCCESS) { eprintf("cannot find VDI object\n"); @@ -851,7 +863,7 @@ out: static uint64_t get_vdi_root(uint32_t vid, int *cloned) { - int ret; + int ret, nr_copies; struct sheepdog_inode *inode = NULL; *cloned = 0; @@ -863,8 +875,9 @@ static uint64_t get_vdi_root(uint32_t vid, int *cloned) goto out; } next: + nr_copies = get_vdi_copy_number(vid); ret = read_object(vid_to_vdi_oid(vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (vid == inode->vdi_id && inode->snap_id == 1 && inode->parent_vdi_id != 0 @@ -910,6 +923,7 @@ static int start_deletion(struct request *req, uint32_t vid) dw->count = 0; dw->vid = vid; dw->req = req; + dw->nr_copies = get_vdi_copy_number(vid); dw->work.fn = delete_one; dw->work.done = delete_one_done; @@ -965,7 +979,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len, struct sheepdog_vdi_attr tmp_attr; uint64_t oid, hval; uint32_t end; - int ret; + int ret, nr_copies; vattr->ctime = create_time; @@ -979,12 +993,13 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len, end = *attrid - 1; while (*attrid != end) { oid = vid_to_attr_oid(vid, *attrid); + nr_copies = get_vdi_copy_number(vid); ret = read_object(oid, (char *)&tmp_attr, - sizeof(tmp_attr), 0); + sizeof(tmp_attr), 0, nr_copies); if (ret == SD_RES_NO_OBJ && wr) { ret = write_object(oid, (char *)vattr, - data_len, 0, 0, 1); + data_len, 0, 0, 1, nr_copies); if (ret) ret = SD_RES_EIO; else @@ -1005,14 +1020,15 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len, else if (delete) { ret = write_object(oid, (char *)"", 1, offsetof(struct sheepdog_vdi_attr, name), - 0, 0); + 0, 0, nr_copies); if (ret) ret = SD_RES_EIO; else ret = SD_RES_SUCCESS; } else if (wr) { ret = write_object(oid, (char *)vattr, - SD_ATTR_OBJ_SIZE, 0, 0, 0); + SD_ATTR_OBJ_SIZE, 0, 0, 0, + nr_copies); if (ret) ret = SD_RES_EIO; -- 1.7.1 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog