From: levin li <xingke....@taobao.com>

In gateway_read{write,create_and_write}_obj, and read{write}_object,
we should use different copies number for different requests and objects,
instead of using the global copies number sys->nr_copies or calculated
from vnodes

Signed-off-by: levin li <xingke....@taobao.com>
---
 sheep/farm/trunk.c |    2 +-
 sheep/gateway.c    |    4 +-
 sheep/ops.c        |    2 +-
 sheep/recovery.c   |    4 +-
 sheep/request.c    |    2 +-
 sheep/sheep_priv.h |    6 ++--
 sheep/store.c      |    9 +++++--
 sheep/vdi.c        |   56 +++++++++++++++++++++++++++++++++------------------
 8 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index cd1fd20..1bfb6b3 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -245,7 +245,7 @@ static int oid_stale(uint64_t oid)
        struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 
        vinfo = get_vnode_info();
-       nr_copies = get_nr_copies(vinfo);
+       nr_copies = get_obj_copy_number(oid);
 
        oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
                      nr_copies, obj_vnodes);
diff --git a/sheep/gateway.c b/sheep/gateway.c
index bdbd08c..41d712b 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -35,7 +35,7 @@ int gateway_read_obj(struct request *req)
        if (sys->enable_write_cache && !req->local && !bypass_object_cache(req))
                return object_cache_handle_request(req);
 
-       nr_copies = get_nr_copies(req->vinfo);
+       nr_copies = get_req_copy_number(req);
        oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
                      nr_copies, obj_vnodes);
        for (i = 0; i < nr_copies; i++) {
@@ -245,7 +245,7 @@ static int gateway_forward_request(struct request *req)
 
        write_info_init(&wi);
        wlen = hdr.data_length;
-       nr_copies = get_nr_copies(req->vinfo);
+       nr_copies = get_req_copy_number(req);
        oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
                      nr_copies, obj_vnodes);
 
diff --git a/sheep/ops.c b/sheep/ops.c
index ce0f8a4..faa50b5 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -664,7 +664,7 @@ static int read_copy_from_replica(struct vnode_info 
*vnodes, uint32_t epoch,
        char name[128];
        int rounded_rand, local = -1;
 
-       nr_copies = get_nr_copies(vnodes);
+       nr_copies = get_obj_copy_number(oid);
        oid_to_vnodes(vnodes->vnodes, vnodes->nr_vnodes, oid,
                      nr_copies, obj_vnodes);
 
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 3fdcad2..060730b 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -172,7 +172,7 @@ again:
                oid, tgt_epoch);
 
        /* Let's do a breadth-first search */
-       nr_copies = get_nr_copies(old);
+       nr_copies = get_obj_copy_number(oid);
        for (i = 0; i < nr_copies; i++) {
                struct sd_vnode *tgt_vnode = oid_to_vnode(old->vnodes,
                                                          old->nr_vnodes,
@@ -515,8 +515,8 @@ static void screen_object_list(struct recovery_work *rw,
        int nr_objs;
        int i, j;
 
-       nr_objs = get_nr_copies(rw->cur_vinfo);
        for (i = 0; i < nr_oids; i++) {
+               nr_objs = get_obj_copy_number(oids[i]);
                oid_to_vnodes(rw->cur_vinfo->vnodes, rw->cur_vinfo->nr_vnodes,
                              oids[i], nr_objs, vnodes);
                for (j = 0; j < nr_objs; j++) {
diff --git a/sheep/request.c b/sheep/request.c
index ab7c63a..ce4315b 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -30,7 +30,7 @@ static int is_access_local(struct request *req, uint64_t oid)
        int nr_copies;
        int i;
 
-       nr_copies = get_nr_copies(req->vinfo);
+       nr_copies = get_req_copy_number(req);
        oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
                      nr_copies, obj_vnodes);
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 335e337..ec2b4dc 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -282,10 +282,10 @@ int is_recovery_init(void);
 int node_in_recovery(void);
 
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-                uint64_t offset, uint16_t flags, int create);
+                uint64_t offset, uint16_t flags, int create, int nr_copies);
 int read_object(uint64_t oid, char *data, unsigned int datalen,
-               uint64_t offset);
-int remove_object(uint64_t oid);
+               uint64_t offset, int nr_copies);
+int remove_object(uint64_t oid, int nr_copies);
 
 int exec_local_req(struct sd_req *rq, void *data);
 void local_req_init(void);
diff --git a/sheep/store.c b/sheep/store.c
index bebb2c0..4839d13 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -548,7 +548,7 @@ int init_store(const char *d, int enable_write_cache)
  * Write data to both local object cache (if enabled) and backends
  */
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-                uint64_t offset, uint16_t flags, int create)
+                uint64_t offset, uint16_t flags, int create, int nr_copies)
 {
        struct sd_req hdr;
        int ret;
@@ -576,6 +576,7 @@ forward_write:
 
        hdr.obj.oid = oid;
        hdr.obj.offset = offset;
+       hdr.obj.copies = nr_copies;
 
        ret = exec_local_req(&hdr, data);
        if (ret != SD_RES_SUCCESS)
@@ -589,7 +590,7 @@ forward_write:
  * try read backends
  */
 int read_object(uint64_t oid, char *data, unsigned int datalen,
-               uint64_t offset)
+               uint64_t offset, int nr_copies)
 {
        struct sd_req hdr;
        int ret;
@@ -609,6 +610,7 @@ forward_read:
        hdr.data_length = datalen;
        hdr.obj.oid = oid;
        hdr.obj.offset = offset;
+       hdr.obj.copies = nr_copies;
 
        ret = exec_local_req(&hdr, data);
        if (ret != SD_RES_SUCCESS)
@@ -617,13 +619,14 @@ forward_read:
        return ret;
 }
 
-int remove_object(uint64_t oid)
+int remove_object(uint64_t oid, int copies)
 {
        struct sd_req hdr;
        int ret;
 
        sd_init_req(&hdr, SD_OP_REMOVE_OBJ);
        hdr.obj.oid = oid;
+       hdr.obj.copies = copies;
 
        ret = exec_local_req(&hdr, NULL);
        if (ret != SD_RES_SUCCESS)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 72fbd7b..c39074f 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -300,6 +300,7 @@ int vdi_exist(uint32_t vid)
 {
        struct sheepdog_inode *inode;
        int ret = 1;
+       int nr_copies;
 
        inode = zalloc(sizeof(*inode));
        if (!inode) {
@@ -307,8 +308,10 @@ int vdi_exist(uint32_t vid)
                goto out;
        }
 
+       nr_copies = get_vdi_copy_number(vid);
+
        ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-                         sizeof(*inode), 0);
+                         sizeof(*inode), 0, nr_copies);
        if (ret != SD_RES_SUCCESS) {
                eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
                ret = 0;
@@ -359,7 +362,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t 
new_vid,
 
        if (iocb->base_vid) {
                ret = read_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
-                                 sizeof(*base), 0);
+                                 sizeof(*base), 0, iocb->nr_copies);
                if (ret != SD_RES_SUCCESS) {
                        ret = SD_RES_BASE_VDI_READ;
                        goto out;
@@ -374,7 +377,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t 
new_vid,
                                name, cur_vid, iocb->base_vid);
 
                        ret = read_object(vid_to_vdi_oid(cur_vid), (char *)cur,
-                                         SD_INODE_HEADER_SIZE, 0);
+                                         SD_INODE_HEADER_SIZE, 0, 
iocb->nr_copies);
                        if (ret != SD_RES_SUCCESS) {
                                vprintf(SDOG_ERR, "failed\n");
                                ret = SD_RES_BASE_VDI_READ;
@@ -416,7 +419,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t 
new_vid,
 
        if (iocb->is_snapshot && cur_vid != iocb->base_vid) {
                ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur,
-                                  SD_INODE_HEADER_SIZE, 0, 0, 0);
+                                  SD_INODE_HEADER_SIZE, 0, 0, 0,
+                                  iocb->nr_copies);
                if (ret != 0) {
                        vprintf(SDOG_ERR, "failed\n");
                        ret = SD_RES_BASE_VDI_READ;
@@ -426,7 +430,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t 
new_vid,
 
        if (iocb->base_vid) {
                ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
-                                  SD_INODE_HEADER_SIZE, 0, 0, 0);
+                                  SD_INODE_HEADER_SIZE, 0, 0, 0,
+                                  iocb->nr_copies);
                if (ret != 0) {
                        vprintf(SDOG_ERR, "failed\n");
                        ret = SD_RES_BASE_VDI_WRITE;
@@ -435,7 +440,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t 
new_vid,
        }
 
        ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new),
-                          0, 0, 1);
+                          0, 0, 1, iocb->nr_copies);
        if (ret != 0)
                ret = SD_RES_VDI_WRITE;
 
@@ -455,6 +460,7 @@ static int find_first_vdi(unsigned long start, unsigned 
long end, char *name,
        unsigned long i;
        int ret = SD_RES_NO_MEM;
        int vdi_found = 0;
+       int nr_copies;
 
        inode = malloc(SD_INODE_HEADER_SIZE);
        if (!inode) {
@@ -463,8 +469,9 @@ static int find_first_vdi(unsigned long start, unsigned 
long end, char *name,
        }
 
        for (i = start; i >= end; i--) {
+               nr_copies = get_vdi_copy_number(i);
                ret = read_object(vid_to_vdi_oid(i), (char *)inode,
-                                 SD_INODE_HEADER_SIZE, 0);
+                                 SD_INODE_HEADER_SIZE, 0, nr_copies);
                if (ret != SD_RES_SUCCESS) {
                        ret = SD_RES_EIO;
                        goto out_free_inode;
@@ -662,6 +669,7 @@ struct deletion_work {
        struct request *req;
 
        uint32_t vid;
+       int nr_copies;
 
        int count;
        uint32_t *buf;
@@ -681,7 +689,7 @@ static int delete_inode(struct deletion_work *dw)
        }
 
        ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-                         SD_INODE_HEADER_SIZE, 0);
+                         SD_INODE_HEADER_SIZE, 0, dw->nr_copies);
        if (ret != SD_RES_SUCCESS) {
                ret = SD_RES_EIO;
                goto out;
@@ -690,7 +698,7 @@ static int delete_inode(struct deletion_work *dw)
        memset(inode->name, 0, sizeof(inode->name));
 
        ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-                          SD_INODE_HEADER_SIZE, 0, 0, 0);
+                          SD_INODE_HEADER_SIZE, 0, 0, 0, dw->nr_copies);
        if (ret != 0) {
                ret = SD_RES_EIO;
                goto out;
@@ -723,6 +731,7 @@ static void delete_one(struct work *work)
        uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
        int ret, i, nr_deleted;
        struct sheepdog_inode *inode = NULL;
+       int nr_copies;
 
        eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
 
@@ -732,8 +741,9 @@ static void delete_one(struct work *work)
                goto out;
        }
 
+       nr_copies = get_vdi_copy_number(vdi_id);
        ret = read_object(vid_to_vdi_oid(vdi_id),
-                         (void *)inode, sizeof(*inode), 0);
+                         (void *)inode, sizeof(*inode), 0, nr_copies);
 
        if (ret != SD_RES_SUCCESS) {
                eprintf("cannot find VDI object\n");
@@ -757,7 +767,7 @@ static void delete_one(struct work *work)
                        continue;
                }
 
-               ret = remove_object(oid);
+               ret = remove_object(oid, nr_copies);
                if (ret != SD_RES_SUCCESS)
                        eprintf("remove object %" PRIx64 " fail, %d\n", oid, 
ret);
 
@@ -774,7 +784,7 @@ static void delete_one(struct work *work)
        memset(inode->name, 0, sizeof(inode->name));
 
        write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
-                    sizeof(*inode), 0, 0, 0);
+                    sizeof(*inode), 0, 0, 0, nr_copies);
 out:
        free(inode);
 }
@@ -811,6 +821,7 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t 
root_vid)
        struct sheepdog_inode *inode = NULL;
        int done = dw->count;
        uint32_t vid;
+       int nr_copies;
 
        inode = malloc(SD_INODE_HEADER_SIZE);
        if (!inode) {
@@ -821,8 +832,9 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t 
root_vid)
        dw->buf[dw->count++] = root_vid;
 again:
        vid = dw->buf[done++];
+       nr_copies = get_vdi_copy_number(vid);
        ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-                         SD_INODE_HEADER_SIZE, 0);
+                         SD_INODE_HEADER_SIZE, 0, nr_copies);
 
        if (ret != SD_RES_SUCCESS) {
                eprintf("cannot find VDI object\n");
@@ -851,7 +863,7 @@ out:
 
 static uint64_t get_vdi_root(uint32_t vid, int *cloned)
 {
-       int ret;
+       int ret, nr_copies;
        struct sheepdog_inode *inode = NULL;
 
        *cloned = 0;
@@ -863,8 +875,9 @@ static uint64_t get_vdi_root(uint32_t vid, int *cloned)
                goto out;
        }
 next:
+       nr_copies = get_vdi_copy_number(vid);
        ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-                         SD_INODE_HEADER_SIZE, 0);
+                         SD_INODE_HEADER_SIZE, 0, nr_copies);
 
        if (vid == inode->vdi_id && inode->snap_id == 1
                        && inode->parent_vdi_id != 0
@@ -910,6 +923,7 @@ static int start_deletion(struct request *req, uint32_t vid)
        dw->count = 0;
        dw->vid = vid;
        dw->req = req;
+       dw->nr_copies = get_vdi_copy_number(vid);
 
        dw->work.fn = delete_one;
        dw->work.done = delete_one_done;
@@ -965,7 +979,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int 
data_len,
        struct sheepdog_vdi_attr tmp_attr;
        uint64_t oid, hval;
        uint32_t end;
-       int ret;
+       int ret, nr_copies;
 
        vattr->ctime = create_time;
 
@@ -979,12 +993,13 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int 
data_len,
        end = *attrid - 1;
        while (*attrid != end) {
                oid = vid_to_attr_oid(vid, *attrid);
+               nr_copies = get_vdi_copy_number(vid);
                ret = read_object(oid, (char *)&tmp_attr,
-                                 sizeof(tmp_attr), 0);
+                                 sizeof(tmp_attr), 0, nr_copies);
 
                if (ret == SD_RES_NO_OBJ && wr) {
                        ret = write_object(oid, (char *)vattr,
-                                          data_len, 0, 0, 1);
+                                          data_len, 0, 0, 1, nr_copies);
                        if (ret)
                                ret = SD_RES_EIO;
                        else
@@ -1005,14 +1020,15 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int 
data_len,
                        else if (delete) {
                                ret = write_object(oid, (char *)"", 1,
                                                   offsetof(struct 
sheepdog_vdi_attr, name),
-                                                  0, 0);
+                                                  0, 0, nr_copies);
                                if (ret)
                                        ret = SD_RES_EIO;
                                else
                                        ret = SD_RES_SUCCESS;
                        } else if (wr) {
                                ret = write_object(oid, (char *)vattr,
-                                                  SD_ATTR_OBJ_SIZE, 0, 0, 0);
+                                                  SD_ATTR_OBJ_SIZE, 0, 0, 0,
+                                                  nr_copies);
 
                                if (ret)
                                        ret = SD_RES_EIO;
-- 
1.7.1

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to