From: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp> To: Teruaki Ishizaki <ishizaki.teruaki@lab.ntt.co.jp> Cc: sheepdog@lists.wpkg.org Subject: Re: [sheepdog] [PATCH 3/3] add selectable object_size support of VDI operation In-Reply-To: <1418129364-29585-4-git-send-email-ishizaki.teruaki@lab.ntt.co.jp> References: <1418129364-29585-1-git-send-email-ishizaki.teruaki@lab.ntt.co.jp> <1418129364-29585-4-git-send-email-ishizaki.teruaki@lab.ntt.co.jp> User-Agent: Wanderlust/2.15.9 (Almost Unreal) SEMI/1.14.6 (Maruoka) FLIM/1.14.9 (=?ISO-2022-JP-2?B?R29qGyQoRCtXGyhC?=) APEL/10.8 Emacs/23.4 (x86_64-pc-linux-gnu) MULE/6.0 (HANACHIRUSATO) MIME-Version: 1.0 (generated by SEMI 1.14.6 - "Maruoka") Content-Type: text/plain; charset=US-ASCII
At Tue, 9 Dec 2014 21:49:24 +0900, Teruaki Ishizaki wrote: > > Data object size was fix to 4MB and not selectable. > This patch add feature to select data object size of VDI. > > If you want to use 8MB data object_size, specify the shift bit num. > ex) dog vdi create -z 23 hogehoge 100M > > Signed-off-by: Teruaki Ishizaki <ishizaki.teruaki@lab.ntt.co.jp> > --- > dog/common.c | 7 +- > dog/dog.h | 6 +- > dog/farm/farm.c | 17 ++- > dog/vdi.c | 254 ++++++++++++++++++++++++++++++------------- > include/fec.h | 12 +- > include/sheepdog_proto.h | 7 +- > lib/fec.c | 9 +- > sheep/gateway.c | 2 +- > sheep/group.c | 3 +- > sheep/journal.c | 5 +- > sheep/object_cache.c | 27 +++-- > sheep/ops.c | 14 ++- > sheep/plain_store.c | 17 ++- > sheep/recovery.c | 3 +- > sheep/sheep_priv.h | 6 +- > sheep/vdi.c | 82 +++++++++++--- > tests/unit/sheep/test_vdi.c | 6 +- > 17 files changed, 336 insertions(+), 141 deletions(-) > > diff --git a/dog/common.c b/dog/common.c > index 2d8a173..11011a7 100644 > --- a/dog/common.c > +++ b/dog/common.c > @@ -365,7 +365,8 @@ void show_progress(uint64_t done, uint64_t total, bool raw) > free(buf); > } > > -size_t get_store_objsize(uint8_t copy_policy, uint64_t oid) > +size_t get_store_objsize(uint8_t copy_policy, uint32_t object_size, > + uint64_t oid) > { > if (is_vdi_obj(oid)) > return SD_INODE_SIZE; > @@ -375,9 +376,9 @@ size_t get_store_objsize(uint8_t copy_policy, uint64_t oid) > int d; > > ec_policy_to_dp(copy_policy, &d, NULL); > - return SD_DATA_OBJ_SIZE / d; > + return object_size / d; > } > - return get_objsize(oid); > + return get_objsize(oid, object_size); > } > > bool is_erasure_oid(uint64_t oid, uint8_t policy) > diff --git a/dog/dog.h b/dog/dog.h > index 80becc6..d460a0b 100644 > --- a/dog/dog.h > +++ b/dog/dog.h > @@ -87,10 +87,12 @@ void confirm(const char *message); > void work_queue_wait(struct work_queue *q); > int do_vdi_create(const char *vdiname, int64_t vdi_size, > uint32_t base_vid, uint32_t *vdi_id, bool snapshot, > - uint8_t nr_copies, uint8_t copy_policy, uint8_t store_policy); > + uint8_t nr_copies, uint8_t copy_policy, > + uint8_t store_policy, uint32_t object_size); > int do_vdi_check(const struct sd_inode *inode); > void show_progress(uint64_t done, uint64_t total, bool raw); > -size_t get_store_objsize(uint8_t copy_policy, uint64_t oid); > +size_t get_store_objsize(uint8_t copy_policy, uint32_t object_size, > + uint64_t oid); > bool is_erasure_oid(uint64_t oid, uint8_t policy); > uint8_t parse_copy(const char *str, uint8_t *copy_policy); > > diff --git a/dog/farm/farm.c b/dog/farm/farm.c > index 9414d42..c5fa40e 100644 > --- a/dog/farm/farm.c > +++ b/dog/farm/farm.c > @@ -38,6 +38,7 @@ struct active_vdi_entry { > uint8_t nr_copies; > uint8_t copy_policy; > uint8_t store_policy; > + uint32_t object_size; > }; > > struct registered_obj_entry { > @@ -77,6 +78,7 @@ static void update_active_vdi_entry(struct active_vdi_entry *vdi, > vdi->nr_copies = new->nr_copies; > vdi->copy_policy = new->copy_policy; > vdi->store_policy = new->store_policy; > + vdi->object_size = (UINT32_C(1) << new->block_size_shift); > } > > static void add_active_vdi(struct sd_inode *new) > @@ -131,7 +133,8 @@ static int create_active_vdis(void) > vdi->vdi_id, &new_vid, > false, vdi->nr_copies, > vdi->copy_policy, > - vdi->store_policy) < 0) > + vdi->store_policy, > + vdi->object_size) < 0) > return -1; > } > return 0; > @@ -202,7 +205,7 @@ out: > } > > static int notify_vdi_add(uint32_t vdi_id, uint8_t nr_copies, > - uint8_t copy_policy) > + uint8_t copy_policy, uint32_t object_size) > { > int ret; > struct sd_req hdr; > @@ -213,13 +216,14 @@ static int notify_vdi_add(uint32_t vdi_id, uint8_t nr_copies, > hdr.vdi_state.new_vid = vdi_id; > hdr.vdi_state.copies = nr_copies; > hdr.vdi_state.copy_policy = copy_policy; > + hdr.vdi_state.object_size = object_size; > hdr.vdi_state.set_bitmap = true; > > ret = dog_exec_req(&sd_nid, &hdr, buf); > > if (ret < 0) > - sd_err("Fail to notify vdi add event(%"PRIx32", %d)", vdi_id, > - nr_copies); > + sd_err("Fail to notify vdi add event(%"PRIx32", %d" > + ", %"PRIu32")", vdi_id, nr_copies, object_size); > if (rsp->result != SD_RES_SUCCESS) { > sd_err("%s", sd_strerror(rsp->result)); > ret = -1; > @@ -261,7 +265,7 @@ static void do_save_object(struct work *work) > > sw = container_of(work, struct snapshot_work, work); > > - size = get_objsize(sw->entry.oid); > + size = get_objsize(sw->entry.oid, sw->entry.object_size); > buf = xmalloc(size); > > if (dog_read_object(sw->entry.oid, buf, size, 0, true) < 0) > @@ -413,7 +417,8 @@ static void do_load_object(struct work *work) > vid = oid_to_vid(sw->entry.oid); > if (register_vdi(vid)) { > if (notify_vdi_add(vid, sw->entry.nr_copies, > - sw->entry.copy_policy) < 0) > + sw->entry.copy_policy, > + sw->entry.object_size) < 0) > goto error; > } > > diff --git a/dog/vdi.c b/dog/vdi.c > index 5353062..3b0c408 100644 > --- a/dog/vdi.c > +++ b/dog/vdi.c > @@ -38,6 +38,8 @@ static struct sd_option vdi_options[] = { > {'o', "oid", true, "specify the object id of the tracking object"}, > {'e', "exist", false, "only check objects exist or not,\n" > " neither comparing nor repairing"}, > + {'z', "objsize", true, "specify the bit shift num for" > + " data object size"}, > { 0, NULL, false, NULL }, > }; > > @@ -49,6 +51,7 @@ static struct vdi_cmd_data { > bool delete; > bool prealloc; > int nr_copies; > + uint32_t object_size; > bool writeback; > int from_snapshot_id; > char from_snapshot_tag[SD_MAX_VDI_TAG_LEN]; > @@ -67,6 +70,7 @@ struct get_vdi_info { > uint32_t snapid; > uint8_t nr_copies; > uint8_t copy_policy; > + uint32_t object_size; > }; > > int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t offset, > @@ -118,6 +122,7 @@ static void print_vdi_list(uint32_t vid, const char *name, const char *tag, > struct tm tm; > char dbuf[128]; > struct get_vdi_info *info = data; > + uint32_t object_size = (UINT32_C(1) << i->block_size_shift); > > if (info && strcmp(name, info->name) != 0) > return; > @@ -143,23 +148,24 @@ static void print_vdi_list(uint32_t vid, const char *name, const char *tag, > putchar('\\'); > putchar(*name++); > } > - printf(" %d %s %s %s %s %" PRIx32 " %s %s\n", snapid, > - strnumber(i->vdi_size), > - strnumber(my_objs * SD_DATA_OBJ_SIZE), > - strnumber(cow_objs * SD_DATA_OBJ_SIZE), > + printf(" %d %s %s %s %s %" PRIx32 " %s %s %" PRIu32 "\n", > + snapid, strnumber(i->vdi_size), > + strnumber(my_objs * object_size), > + strnumber(cow_objs * object_size), > dbuf, vid, > redundancy_scheme(i->nr_copies, i->copy_policy), > - i->tag); > + i->tag, object_size); > } else { > - printf("%c %-8s %5d %7s %7s %7s %s %7" PRIx32 " %6s %13s\n", > + printf("%c %-8s %5d %7s %7s %7s %s %7" PRIx32 > + " %6s %13s %7" PRIu32 "\n", > vdi_is_snapshot(i) ? 's' : (is_clone ? 'c' : ' '), > name, snapid, > strnumber(i->vdi_size), > - strnumber(my_objs * SD_DATA_OBJ_SIZE), > - strnumber(cow_objs * SD_DATA_OBJ_SIZE), > + strnumber(my_objs * object_size), > + strnumber(cow_objs * object_size), > dbuf, vid, > redundancy_scheme(i->nr_copies, i->copy_policy), > - i->tag); > + i->tag, object_size); > } > } > > @@ -282,7 +288,8 @@ static int vdi_list(int argc, char **argv) > const char *vdiname = argv[optind]; > > if (!raw_output) > - printf(" Name Id Size Used Shared Creation time VDI id Copies Tag\n"); > + printf(" Name Id Size Used Shared" > + " Creation time VDI id Copies Tag Obj Size\n"); > > if (vdiname) { > struct get_vdi_info info; > @@ -396,7 +403,8 @@ int read_vdi_obj(const char *vdiname, int snapid, const char *tag, > > int do_vdi_create(const char *vdiname, int64_t vdi_size, > uint32_t base_vid, uint32_t *vdi_id, bool snapshot, > - uint8_t nr_copies, uint8_t copy_policy, uint8_t store_policy) > + uint8_t nr_copies, uint8_t copy_policy, > + uint8_t store_policy, uint32_t object_size) > { > struct sd_req hdr; > struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > @@ -416,6 +424,7 @@ int do_vdi_create(const char *vdiname, int64_t vdi_size, > hdr.vdi.copies = nr_copies; > hdr.vdi.copy_policy = copy_policy; > hdr.vdi.store_policy = store_policy; > + hdr.vdi.object_size = object_size; > > ret = dog_exec_req(&sd_nid, &hdr, buf); > if (ret < 0) > @@ -440,6 +449,8 @@ static int vdi_create(int argc, char **argv) > uint32_t vid; > uint64_t oid; > uint32_t idx, max_idx; > + uint32_t object_size; > + uint64_t old_max_total_size = 0; > struct sd_inode *inode = NULL; > int ret; > > @@ -451,10 +462,34 @@ static int vdi_create(int argc, char **argv) > if (ret < 0) > return EXIT_USAGE; > > - if (size > SD_OLD_MAX_VDI_SIZE && 0 == vdi_cmd_data.store_policy) { > + if (vdi_cmd_data.object_size) > + old_max_total_size = > + vdi_cmd_data.object_size * OLD_MAX_DATA_OBJS; > + else{ > + struct sd_req hdr; > + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > + struct cluster_info cinfo; > + sd_init_req(&hdr, SD_OP_CLUSTER_INFO); > + hdr.data_length = sizeof(cinfo); > + ret = dog_exec_req(&sd_nid, &hdr, &cinfo); > + if (ret < 0) { > + sd_err("Fail to execute request: SD_OP_CLUSTER_INFO"); > + ret = EXIT_FAILURE; > + goto out; > + } > + if (rsp->result != SD_RES_SUCCESS) { > + sd_err("%s", sd_strerror(rsp->result)); > + ret = EXIT_FAILURE; > + goto out; > + } > + old_max_total_size = cinfo.object_size * OLD_MAX_DATA_OBJS; > + } I cannot understand that why blocksize should be read before creating VDI. If sd_req->vdi.object_size is equal to 0, sheep can use its default value from cinfo->object_size. In addition, sd_inode already has a member block_size_shift for representing object size. Newly added members of cluster_info and sd_req should be a number of block size shift, not bytes. Thanks, Hitoshi > + > + if (size > old_max_total_size && 0 == vdi_cmd_data.store_policy) { > sd_err("VDI size is larger than %s bytes, please use '-y' to " > - "create a hyper volume with size up to %s bytes", > - strnumber(SD_OLD_MAX_VDI_SIZE), > + "create a hyper volume with size up to %s bytes" > + " or use '-z' to create larger object size volume", > + strnumber(old_max_total_size), > strnumber(SD_MAX_VDI_SIZE)); > return EXIT_USAGE; > } > @@ -466,7 +501,8 @@ static int vdi_create(int argc, char **argv) > > ret = do_vdi_create(vdiname, size, 0, &vid, false, > vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy, > - vdi_cmd_data.store_policy); > + vdi_cmd_data.store_policy, > + vdi_cmd_data.object_size); > if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc) > goto out; > > @@ -479,10 +515,11 @@ static int vdi_create(int argc, char **argv) > ret = EXIT_FAILURE; > goto out; > } > - max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE); > + object_size = (UINT32_C(1) << inode->block_size_shift); > + max_idx = DIV_ROUND_UP(size, object_size); > > for (idx = 0; idx < max_idx; idx++) { > - vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size); > + vdi_show_progress(idx * object_size, inode->vdi_size); > oid = vid_to_data_oid(vid, idx); > > ret = dog_write_object(oid, 0, NULL, 0, 0, 0, inode->nr_copies, > @@ -499,7 +536,7 @@ static int vdi_create(int argc, char **argv) > goto out; > } > } > - vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size); > + vdi_show_progress(idx * object_size, inode->vdi_size); > ret = EXIT_SUCCESS; > > out: > @@ -559,6 +596,7 @@ static int vdi_snapshot(int argc, char **argv) > { > const char *vdiname = argv[optind++]; > uint32_t vid, new_vid; > + uint32_t object_size; > int ret; > char buf[SD_INODE_HEADER_SIZE]; > struct sd_inode *inode = (struct sd_inode *)buf; > @@ -662,9 +700,10 @@ static int vdi_snapshot(int argc, char **argv) > if (ret != SD_RES_SUCCESS) > goto out; > > + object_size = (UINT32_C(1) << inode->block_size_shift); > ret = do_vdi_create(vdiname, inode->vdi_size, vid, &new_vid, true, > inode->nr_copies, inode->copy_policy, > - inode->store_policy); > + inode->store_policy, object_size); > > if (ret == EXIT_SUCCESS && verbose) { > if (raw_output) > @@ -691,6 +730,7 @@ static int vdi_clone(int argc, char **argv) > uint32_t base_vid, new_vid, vdi_id; > uint64_t oid; > uint32_t idx, max_idx, ret; > + uint32_t object_size; > struct sd_inode *inode = NULL, *new_inode = NULL; > char *buf = NULL; > > @@ -719,9 +759,10 @@ static int vdi_clone(int argc, char **argv) > if (vdi_cmd_data.no_share == true) > base_vid = 0; > > + object_size = (UINT32_C(1) << inode->block_size_shift); > ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false, > inode->nr_copies, inode->copy_policy, > - inode->store_policy); > + inode->store_policy, object_size); > if (ret != EXIT_SUCCESS || > (!vdi_cmd_data.prealloc && !vdi_cmd_data.no_share)) > goto out; > @@ -732,23 +773,23 @@ static int vdi_clone(int argc, char **argv) > if (ret != EXIT_SUCCESS) > goto out; > > - buf = xzalloc(SD_DATA_OBJ_SIZE); > + buf = xzalloc(object_size); > max_idx = count_data_objs(inode); > > for (idx = 0; idx < max_idx; idx++) { > size_t size; > > - vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size); > + vdi_show_progress(idx * object_size, inode->vdi_size); > vdi_id = sd_inode_get_vid(inode, idx); > if (vdi_id) { > oid = vid_to_data_oid(vdi_id, idx); > - ret = dog_read_object(oid, buf, SD_DATA_OBJ_SIZE, 0, > + ret = dog_read_object(oid, buf, object_size, 0, > true); > if (ret) { > ret = EXIT_FAILURE; > goto out; > } > - size = SD_DATA_OBJ_SIZE; > + size = object_size; > } else { > if (vdi_cmd_data.no_share && !vdi_cmd_data.prealloc) > continue; > @@ -772,7 +813,7 @@ static int vdi_clone(int argc, char **argv) > goto out; > } > } > - vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size); > + vdi_show_progress(idx * object_size, inode->vdi_size); > ret = EXIT_SUCCESS; > > out: > @@ -952,6 +993,7 @@ static int vdi_rollback(int argc, char **argv) > { > const char *vdiname = argv[optind++]; > uint32_t base_vid, new_vid; > + uint32_t object_size; > int ret; > char buf[SD_INODE_HEADER_SIZE]; > struct sd_inode *inode = (struct sd_inode *)buf; > @@ -977,9 +1019,10 @@ static int vdi_rollback(int argc, char **argv) > return EXIT_FAILURE; > } > > + object_size = (UINT32_C(1) << inode->block_size_shift); > ret = do_vdi_create(vdiname, inode->vdi_size, base_vid, &new_vid, > false, vdi_cmd_data.nr_copies, inode->copy_policy, > - inode->store_policy); > + inode->store_policy, object_size); > > if (ret == EXIT_SUCCESS && verbose) { > if (raw_output) > @@ -1494,6 +1537,7 @@ static int vdi_read(int argc, char **argv) > struct sd_inode *inode = NULL; > uint64_t offset = 0, oid, done = 0, total = (uint64_t) -1; > uint32_t vdi_id, idx; > + uint32_t object_size; > unsigned int len; > char *buf = NULL; > > @@ -1509,25 +1553,27 @@ static int vdi_read(int argc, char **argv) > } > > inode = malloc(sizeof(*inode)); > - buf = xmalloc(SD_DATA_OBJ_SIZE); > > ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id, > vdi_cmd_data.snapshot_tag, NULL, inode, > SD_INODE_SIZE); > if (ret != EXIT_SUCCESS) > - goto out; > + goto load_inode_err; > > if (inode->vdi_size < offset) { > sd_err("Read offset is beyond the end of the VDI"); > ret = EXIT_FAILURE; > - goto out; > + goto load_inode_err; > } > > + object_size = (UINT32_C(1) << inode->block_size_shift); > + buf = xmalloc(object_size); > + > total = min(total, inode->vdi_size - offset); > - idx = offset / SD_DATA_OBJ_SIZE; > - offset %= SD_DATA_OBJ_SIZE; > + idx = offset / object_size; > + offset %= object_size; > while (done < total) { > - len = min(total - done, SD_DATA_OBJ_SIZE - offset); > + len = min(total - done, object_size - offset); > vdi_id = sd_inode_get_vid(inode, idx); > if (vdi_id) { > oid = vid_to_data_oid(vdi_id, idx); > @@ -1554,8 +1600,9 @@ static int vdi_read(int argc, char **argv) > fsync(STDOUT_FILENO); > ret = EXIT_SUCCESS; > out: > - free(inode); > free(buf); > +load_inode_err: > + free(inode); > > return ret; > } > @@ -1564,6 +1611,7 @@ static int vdi_write(int argc, char **argv) > { > const char *vdiname = argv[optind++]; > uint32_t vid, flags, vdi_id, idx; > + uint32_t object_size; > int ret; > struct sd_inode *inode = NULL; > uint64_t offset = 0, oid, old_oid, done = 0, total = (uint64_t) -1; > @@ -1583,26 +1631,28 @@ static int vdi_write(int argc, char **argv) > } > > inode = xmalloc(sizeof(*inode)); > - buf = xmalloc(SD_DATA_OBJ_SIZE); > > ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_SIZE); > if (ret != EXIT_SUCCESS) > - goto out; > + goto load_inode_err; > > if (inode->vdi_size < offset) { > sd_err("Write offset is beyond the end of the VDI"); > ret = EXIT_FAILURE; > - goto out; > + goto load_inode_err; > } > > + object_size = (UINT32_C(1) << inode->block_size_shift); > + buf = xmalloc(object_size); > + > total = min(total, inode->vdi_size - offset); > - idx = offset / SD_DATA_OBJ_SIZE; > - offset %= SD_DATA_OBJ_SIZE; > + idx = offset / object_size; > + offset %= object_size; > while (done < total) { > create = false; > old_oid = 0; > flags = 0; > - len = min(total - done, SD_DATA_OBJ_SIZE - offset); > + len = min(total - done, object_size - offset); > > vdi_id = sd_inode_get_vid(inode, idx); > if (!vdi_id) > @@ -1647,7 +1697,7 @@ static int vdi_write(int argc, char **argv) > } > > offset += len; > - if (offset == SD_DATA_OBJ_SIZE) { > + if (offset == object_size) { > offset = 0; > idx++; > } > @@ -1655,8 +1705,9 @@ static int vdi_write(int argc, char **argv) > } > ret = EXIT_SUCCESS; > out: > - free(inode); > free(buf); > +load_inode_err: > + free(inode); > > return ret; > } > @@ -1709,6 +1760,7 @@ struct vdi_check_info { > uint64_t oid; > uint8_t nr_copies; > uint8_t copy_policy; > + uint32_t object_size; > uint64_t total; > uint64_t *done; > int refcnt; > @@ -1721,7 +1773,7 @@ struct vdi_check_info { > static void free_vdi_check_info(struct vdi_check_info *info) > { > if (info->done) { > - *info->done += SD_DATA_OBJ_SIZE; > + *info->done += info->object_size; > vdi_show_progress(*info->done, info->total); > } > free(info); > @@ -1783,6 +1835,7 @@ static void vdi_check_object_work(struct work *work) > if (is_erasure_oid(info->oid, info->copy_policy)) { > sd_init_req(&hdr, SD_OP_READ_PEER); > hdr.data_length = get_store_objsize(info->copy_policy, > + info->object_size, > info->oid); > hdr.obj.ec_index = vcw->ec_index; > hdr.epoch = sd_epoch; > @@ -1856,7 +1909,8 @@ static void check_erasure_object(struct vdi_check_info *info) > struct fec *ctx = ec_init(d, dp); > int miss_idx[dp], input_idx[dp]; > uint64_t oid = info->oid; > - size_t len = get_store_objsize(info->copy_policy, oid); > + size_t len = get_store_objsize(info->copy_policy, > + info->object_size, oid); > char *obj = xmalloc(len); > uint8_t *input[dp]; > > @@ -1882,7 +1936,8 @@ static void check_erasure_object(struct vdi_check_info *info) > uint8_t *ds[d]; > for (j = 0; j < d; j++) > ds[j] = info->vcw[j].buf; > - ec_decode_buffer(ctx, ds, idx, obj, d + k); > + ec_decode_buffer(ctx, ds, idx, obj, d + k, > + info->object_size); > if (memcmp(obj, info->vcw[d + k].buf, len) != 0) { > /* TODO repair the inconsistency */ > sd_err("object %"PRIx64" is inconsistent", oid); > @@ -1900,7 +1955,8 @@ static void check_erasure_object(struct vdi_check_info *info) > > for (i = 0; i < d; i++) > ds[i] = input[i]; > - ec_decode_buffer(ctx, ds, input_idx, obj, m); > + ec_decode_buffer(ctx, ds, input_idx, obj, m, > + info->object_size); > write_object_to(info->vcw[m].vnode, oid, obj, > len, true, info->vcw[m].ec_index); > fprintf(stdout, "fixed missing %"PRIx64", " > @@ -2023,6 +2079,7 @@ struct check_arg { > uint64_t *done; > struct work_queue *wq; > int nr_copies; > + uint32_t object_size; > }; > > static void check_cb(struct sd_index *idx, void *arg, int ignore) > @@ -2032,7 +2089,7 @@ static void check_cb(struct sd_index *idx, void *arg, int ignore) > > if (idx->vdi_id) { > oid = vid_to_data_oid(idx->vdi_id, idx->idx); > - *(carg->done) = (uint64_t)idx->idx * SD_DATA_OBJ_SIZE; > + *(carg->done) = (uint64_t)idx->idx * carg->object_size; > vdi_show_progress(*(carg->done), carg->inode->vdi_size); > queue_vdi_check_work(carg->inode, oid, NULL, carg->wq, > carg->nr_copies); > @@ -2046,6 +2103,7 @@ int do_vdi_check(const struct sd_inode *inode) > uint32_t vid; > struct work_queue *wq; > int nr_copies = min((int)inode->nr_copies, sd_zones_nr); > + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); > > if (0 < inode->copy_policy && sd_zones_nr < (int)inode->nr_copies) { > sd_err("ABORT: Not enough active zones for consistency-checking" > @@ -2070,12 +2128,13 @@ int do_vdi_check(const struct sd_inode *inode) > queue_vdi_check_work(inode, oid, &done, wq, > nr_copies); > } else { > - done += SD_DATA_OBJ_SIZE; > + done += object_size; > vdi_show_progress(done, inode->vdi_size); > } > } > } else { > - struct check_arg arg = {inode, &done, wq, nr_copies}; > + struct check_arg arg = {inode, &done, wq, nr_copies, > + object_size}; > sd_inode_index_walk(inode, check_cb, &arg); > vdi_show_progress(inode->vdi_size, inode->vdi_size); > } > @@ -2125,11 +2184,12 @@ struct obj_backup { > uint32_t offset; > uint32_t length; > uint32_t reserved; > - uint8_t data[SD_DATA_OBJ_SIZE]; > + uint8_t *data; > }; > > /* discards redundant area from backup data */ > -static void compact_obj_backup(struct obj_backup *backup, uint8_t *from_data) > +static void compact_obj_backup(struct obj_backup *backup, uint8_t *from_data, > + uint32_t object_size) > { > uint8_t *p1, *p2; > > @@ -2142,8 +2202,8 @@ static void compact_obj_backup(struct obj_backup *backup, uint8_t *from_data) > backup->length -= SECTOR_SIZE; > } > > - p1 = backup->data + SD_DATA_OBJ_SIZE - SECTOR_SIZE; > - p2 = from_data + SD_DATA_OBJ_SIZE - SECTOR_SIZE; > + p1 = backup->data + object_size - SECTOR_SIZE; > + p2 = from_data + object_size - SECTOR_SIZE; > while (backup->length > 0 && memcmp(p1, p2, SECTOR_SIZE) == 0) { > p1 -= SECTOR_SIZE; > p2 -= SECTOR_SIZE; > @@ -2152,29 +2212,29 @@ static void compact_obj_backup(struct obj_backup *backup, uint8_t *from_data) > } > > static int get_obj_backup(uint32_t idx, uint32_t from_vid, uint32_t to_vid, > - struct obj_backup *backup) > + struct obj_backup *backup, uint32_t object_size) > { > int ret; > - uint8_t *from_data = xzalloc(SD_DATA_OBJ_SIZE); > + uint8_t *from_data = xzalloc(object_size); > > backup->idx = idx; > backup->offset = 0; > - backup->length = SD_DATA_OBJ_SIZE; > + backup->length = object_size; > > if (to_vid) { > ret = dog_read_object(vid_to_data_oid(to_vid, idx), > - backup->data, SD_DATA_OBJ_SIZE, 0, true); > + backup->data, object_size, 0, true); > if (ret != SD_RES_SUCCESS) { > sd_err("Failed to read object %" PRIx32 ", %d", to_vid, > idx); > return EXIT_FAILURE; > } > } else > - memset(backup->data, 0, SD_DATA_OBJ_SIZE); > + memset(backup->data, 0, object_size); > > if (from_vid) { > ret = dog_read_object(vid_to_data_oid(from_vid, idx), from_data, > - SD_DATA_OBJ_SIZE, 0, true); > + object_size, 0, true); > if (ret != SD_RES_SUCCESS) { > sd_err("Failed to read object %" PRIx32 ", %d", > from_vid, idx); > @@ -2182,7 +2242,7 @@ static int get_obj_backup(uint32_t idx, uint32_t from_vid, uint32_t to_vid, > } > } > > - compact_obj_backup(backup, from_data); > + compact_obj_backup(backup, from_data, object_size); > > free(from_data); > > @@ -2194,13 +2254,13 @@ static int vdi_backup(int argc, char **argv) > const char *vdiname = argv[optind++]; > int ret = EXIT_SUCCESS; > uint32_t idx, nr_objs; > + uint32_t object_size; > struct sd_inode *from_inode = xzalloc(sizeof(*from_inode)); > struct sd_inode *to_inode = xzalloc(sizeof(*to_inode)); > struct backup_hdr hdr = { > .version = VDI_BACKUP_FORMAT_VERSION, > .magic = VDI_BACKUP_MAGIC, > }; > - struct obj_backup *backup = xzalloc(sizeof(*backup)); > > if ((!vdi_cmd_data.snapshot_id && !vdi_cmd_data.snapshot_tag[0]) || > (!vdi_cmd_data.from_snapshot_id && > @@ -2214,21 +2274,25 @@ static int vdi_backup(int argc, char **argv) > vdi_cmd_data.from_snapshot_tag, NULL, > from_inode, SD_INODE_SIZE); > if (ret != EXIT_SUCCESS) > - goto out; > + goto load_inode_err; > > ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id, > vdi_cmd_data.snapshot_tag, NULL, to_inode, > SD_INODE_SIZE); > if (ret != EXIT_SUCCESS) > - goto out; > + goto load_inode_err; > > nr_objs = count_data_objs(to_inode); > > + struct obj_backup *backup = xzalloc(sizeof(*backup)); > + object_size = (UINT32_C(1) << from_inode->block_size_shift); > + backup->data = xzalloc(sizeof(uint8_t) * object_size); > + > ret = xwrite(STDOUT_FILENO, &hdr, sizeof(hdr)); > if (ret < 0) { > sd_err("failed to write backup header, %m"); > ret = EXIT_SYSFAIL; > - goto out; > + goto error; > } > > for (idx = 0; idx < nr_objs; idx++) { > @@ -2238,9 +2302,10 @@ static int vdi_backup(int argc, char **argv) > if (to_vid == 0 && from_vid == 0) > continue; > > - ret = get_obj_backup(idx, from_vid, to_vid, backup); > + ret = get_obj_backup(idx, from_vid, to_vid, > + backup, object_size); > if (ret != EXIT_SUCCESS) > - goto out; > + goto error; > > if (backup->length == 0) > continue; > @@ -2250,14 +2315,14 @@ static int vdi_backup(int argc, char **argv) > if (ret < 0) { > sd_err("failed to write backup data, %m"); > ret = EXIT_SYSFAIL; > - goto out; > + goto error; > } > ret = xwrite(STDOUT_FILENO, backup->data + backup->offset, > backup->length); > if (ret < 0) { > sd_err("failed to write backup data, %m"); > ret = EXIT_SYSFAIL; > - goto out; > + goto error; > } > } > > @@ -2269,15 +2334,18 @@ static int vdi_backup(int argc, char **argv) > if (ret < 0) { > sd_err("failed to write end marker, %m"); > ret = EXIT_SYSFAIL; > - goto out; > + goto error; > } > > fsync(STDOUT_FILENO); > ret = EXIT_SUCCESS; > -out: > +error: > + free(backup->data); > + free(backup); > +load_inode_err: > free(from_inode); > free(to_inode); > - free(backup); > +out: > return ret; > } > > @@ -2310,6 +2378,7 @@ static uint32_t do_restore(const char *vdiname, int snapid, const char *tag) > { > int ret; > uint32_t vid; > + uint32_t object_size; > struct backup_hdr hdr; > struct obj_backup *backup = xzalloc(sizeof(*backup)); > struct sd_inode *inode = xzalloc(sizeof(*inode)); > @@ -2329,9 +2398,10 @@ static uint32_t do_restore(const char *vdiname, int snapid, const char *tag) > if (ret != EXIT_SUCCESS) > goto out; > > + object_size = (UINT32_C(1) << inode->block_size_shift); > ret = do_vdi_create(vdiname, inode->vdi_size, inode->vdi_id, &vid, > false, inode->nr_copies, inode->copy_policy, > - inode->store_policy); > + inode->store_policy, object_size); > if (ret != EXIT_SUCCESS) { > sd_err("Failed to read VDI"); > goto out; > @@ -2435,12 +2505,15 @@ static int vdi_restore(int argc, char **argv) > out: > if (need_current_recovery) { > int recovery_ret; > + uint32_t object_size = > + (UINT32_C(1) << current_inode->block_size_shift); > /* recreate the current vdi object */ > recovery_ret = do_vdi_create(vdiname, current_inode->vdi_size, > current_inode->parent_vdi_id, NULL, > true, current_inode->nr_copies, > current_inode->copy_policy, > - current_inode->store_policy); > + current_inode->store_policy, > + object_size); > if (recovery_ret != EXIT_SUCCESS) { > sd_err("failed to resume the current vdi"); > ret = recovery_ret; > @@ -2563,9 +2636,25 @@ static int vdi_cache_info(int argc, char **argv) > > fprintf(stdout, "Name\tTag\tTotal\tDirty\tClean\n"); > for (i = 0; i < info.count; i++) { > - uint64_t total = info.caches[i].total * SD_DATA_OBJ_SIZE, > - dirty = info.caches[i].dirty * SD_DATA_OBJ_SIZE, > + uint32_t object_size; > + uint32_t vid = info.caches[i].vid; > + struct sd_inode *inode = NULL; > + int r; > + > + r = dog_read_object(vid_to_vdi_oid(vid), inode, > + SD_INODE_HEADER_SIZE, 0, true); > + if (r != EXIT_SUCCESS) > + return r; > + > + if (!inode->block_size_shift) > + return EXIT_FAILURE; > + > + object_size = (UINT32_C(1) << inode->block_size_shift); > + > + uint64_t total = info.caches[i].total * object_size, > + dirty = info.caches[i].dirty * object_size, > clean = total - dirty; > + > char name[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; > > ret = vid_to_name_tag(info.caches[i].vid, name, tag); > @@ -2955,7 +3044,7 @@ static struct subcommand vdi_cmd[] = { > {"check", "<vdiname>", "seaphT", "check and repair image's consistency", > NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, > vdi_check, vdi_options}, > - {"create", "<vdiname> <size>", "PycaphrvT", "create an image", > + {"create", "<vdiname> <size>", "PycaphrvzT", "create an image", > NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, > vdi_create, vdi_options}, > {"snapshot", "<vdiname>", "saphrvT", "create a snapshot", > @@ -3023,6 +3112,7 @@ static struct subcommand vdi_cmd[] = { > static int vdi_parser(int ch, const char *opt) > { > char *p; > + uint32_t object_size_shift_bit; > > switch (ch) { > case 'P': > @@ -3101,6 +3191,20 @@ static int vdi_parser(int ch, const char *opt) > case 'e': > vdi_cmd_data.exist = true; > break; > + case 'z': > + object_size_shift_bit = (uint32_t)atoi(opt); > + if (object_size_shift_bit > 31) { > + sd_err("Object Size is limited to 2^31." > + " Please set shift bit lower than 31"); > + exit(EXIT_FAILURE); > + } > + vdi_cmd_data.object_size = > + (UINT32_C(1) << object_size_shift_bit); > + if (!vdi_cmd_data.object_size) { > + sd_err("Invalid parameter %s", opt); > + exit(EXIT_FAILURE); > + } > + break; > } > > return 0; > diff --git a/include/fec.h b/include/fec.h > index 1ae32e4..b3ef8d8 100644 > --- a/include/fec.h > +++ b/include/fec.h > @@ -96,12 +96,12 @@ void fec_encode(const struct fec *code, > size_t num_block_nums, size_t sz); > > void fec_decode_buffer(struct fec *ctx, uint8_t *input[], const int in_idx[], > - char *buf, int idx); > + char *buf, int idx, uint32_t object_size); > > /* for isa-l */ > > void isa_decode_buffer(struct fec *ctx, uint8_t *input[], const int in_idx[], > - char *buf, int idx); > + char *buf, int idx, uint32_t object_size); > > /* > * @param inpkts an array of packets (size k); If a primary block, i, is present > @@ -119,7 +119,6 @@ void fec_decode(const struct fec *code, > > /* Set data stripe as sector size to make VM happy */ > #define SD_EC_DATA_STRIPE_SIZE (512) /* 512 Byte */ > -#define SD_EC_NR_STRIPE_PER_OBJECT (SD_DATA_OBJ_SIZE / SD_EC_DATA_STRIPE_SIZE) > #define SD_EC_MAX_STRIP (16) > > static inline int ec_policy_to_dp(uint8_t policy, int *d, int *p) > @@ -205,11 +204,12 @@ static inline void ec_destroy(struct fec *ctx) > } > > static inline void ec_decode_buffer(struct fec *ctx, uint8_t *input[], > - const int in_idx[], char *buf, int idx) > + const int in_idx[], char *buf, > + int idx, uint32_t object_size) > { > if (cpu_has_ssse3) > - isa_decode_buffer(ctx, input, in_idx, buf, idx); > + isa_decode_buffer(ctx, input, in_idx, buf, idx, object_size); > else > - fec_decode_buffer(ctx, input, in_idx, buf, idx); > + fec_decode_buffer(ctx, input, in_idx, buf, idx, object_size); > } > #endif > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > index cbb65b6..5cdedf5 100644 > --- a/include/sheepdog_proto.h > +++ b/include/sheepdog_proto.h > @@ -477,10 +477,11 @@ static inline bool is_data_obj(uint64_t oid) > > static inline size_t count_data_objs(const struct sd_inode *inode) > { > - return DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE); > + return DIV_ROUND_UP(inode->vdi_size, > + (UINT32_C(1) << inode->block_size_shift)); > } > > -static inline size_t get_objsize(uint64_t oid) > +static inline size_t get_objsize(uint64_t oid, uint32_t object_size) > { > if (is_vdi_obj(oid)) > return SD_INODE_SIZE; > @@ -494,7 +495,7 @@ static inline size_t get_objsize(uint64_t oid) > if (is_ledger_object(oid)) > return SD_LEDGER_OBJ_SIZE; > > - return SD_DATA_OBJ_SIZE;
-- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog