[Qemu-devel] [PATCH v5] sheepdog: selectable object size support
Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify object_size option. If you want to create a VDI of 8MB object size, you need to specify following command option. # qemu-img create -o object_size=8M sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V5: - Change option from block_size_shift to object_size. - Change parse type to QEMU_OPT_SIZE. - Add operation to verify max VDI size for resizing. - Change to use 4MB object size with using old Sheepdog. V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 155 ++--- include/block/block_int.h |1 + 2 files changed, 134 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..f6fe97e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -91,6 +92,7 @@ #define SD_NR_VDIS (1U 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22 /* * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and * (SD_EC_MAX_STRIP - 1) for parity strips @@ -167,7 +169,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +189,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1562,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1588,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; +unsigned long buf_size; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1607,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); +buf = g_malloc0(buf_size); + +max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } @@ -1669,6 +1697,27 @@ static int
Re: [Qemu-devel] [sheepdog] [PATCH v4] sheepdog: selectable object size support
(2015/02/12 16:42), Liu Yuan wrote: On Thu, Feb 12, 2015 at 04:28:01PM +0900, Hitoshi Mitake wrote: At Thu, 12 Feb 2015 15:00:49 +0800, Liu Yuan wrote: On Thu, Feb 12, 2015 at 03:19:21PM +0900, Hitoshi Mitake wrote: At Tue, 10 Feb 2015 18:35:58 +0800, Liu Yuan wrote: On Tue, Feb 10, 2015 at 06:56:33PM +0900, Teruaki Ishizaki wrote: (2015/02/10 17:58), Liu Yuan wrote: On Tue, Feb 10, 2015 at 05:22:02PM +0900, Teruaki Ishizaki wrote: (2015/02/10 12:10), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; +unsigned long buf_size; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); +buf = g_malloc0(buf_size); + +max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/12 11:55), Liu Yuan wrote: On Thu, Feb 12, 2015 at 11:33:16AM +0900, Teruaki Ishizaki wrote: (2015/02/12 11:19), Liu Yuan wrote: On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote: (2015/02/10 20:12), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 This might not be necessary. For old qemu or the qemu-img without setting option, the block_size_shift will be 0. If we make 0 to represent 4MB object, then we don't need to get the default cluster object size. We migth even get rid of the idea of cluster default size. The downsize is that, if we want to create a vdi with different size not the default 4MB, we have to write it every time for qemu-img or dog. If we choose to keep the idea of cluster default size, I think we'd also try to avoid call this request from QEMU to make backward compatibility easier. In this scenario, 0 might be used to ask new sheep to decide to use cluster default size. Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can handle 0 though it has different meanings. Table for this bit as 0: Qe: qemu SD: Sheep daemon CDS: Cluster Default Size Ign: Ignored by the sheep daemon Qe/sd newold new CDSIgn old CDSNULL Does Ign mean that VDI is handled as 4MB object size? Yes, old sheep can only handle 4MB object and doesn't check this field at all. I think this approach is acceptable. The difference to your patch is that we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and SD_OP_GET_CLUSTER_DEFAULT can be removed. When users create a new VDI with qemu-img, qemu's Sheepdog backend driver calculates max limit VDI size. But if block_size_shift option is not specified, qemu's Sheepdog backend driver can't calculate max limit VDI size. If block_size_shift not specified, this means 1 for old sheep, use 4MB size 2 for new sheep, use cluster wide default value. And sheep then can calculate it on its own, no? Dog command(client) calculate max size, so I think that qemu's Sheepdog backend driver should calculate it like dog command. Is that policy changeable? I checked the QEMU code and got your idea. In the past it was fixed size so very easy to hardcode the check in the client, no communication with sheep needed. Yes, if it is reasonable, we can change it. I think we can push the size calculation logic into sheep, if not the right size return INVALID_PARAMETER to clients. Clients just check this and report error back to users. There is no backward compability for this approach, since 4MB is the smallest size. OLD QEMU will limit the max_size as 4TB, which is no problem for new sheep. I have checked the Qemu and sheepdog code. When we resize VDI, sd_truncate() is called and resize value is handled by Qemu. (Sorry I haven't noticed this operation) Then, sd_truncate() writes Sheepdog inode object directly. So Sheepdog server can't handle maximum VDI size. As I thought, should we use SD_OP_GET_CLUSTER_DEFAULT? Should maxmimum VDI size be calculated on client program? Thanks, Teruaki
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/13 11:01), Liu Yuan wrote: On Fri, Feb 13, 2015 at 10:33:04AM +0900, Teruaki Ishizaki wrote: (2015/02/12 11:55), Liu Yuan wrote: On Thu, Feb 12, 2015 at 11:33:16AM +0900, Teruaki Ishizaki wrote: (2015/02/12 11:19), Liu Yuan wrote: On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote: (2015/02/10 20:12), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 This might not be necessary. For old qemu or the qemu-img without setting option, the block_size_shift will be 0. If we make 0 to represent 4MB object, then we don't need to get the default cluster object size. We migth even get rid of the idea of cluster default size. The downsize is that, if we want to create a vdi with different size not the default 4MB, we have to write it every time for qemu-img or dog. If we choose to keep the idea of cluster default size, I think we'd also try to avoid call this request from QEMU to make backward compatibility easier. In this scenario, 0 might be used to ask new sheep to decide to use cluster default size. Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can handle 0 though it has different meanings. Table for this bit as 0: Qe: qemu SD: Sheep daemon CDS: Cluster Default Size Ign: Ignored by the sheep daemon Qe/sd newold new CDSIgn old CDSNULL Does Ign mean that VDI is handled as 4MB object size? Yes, old sheep can only handle 4MB object and doesn't check this field at all. I think this approach is acceptable. The difference to your patch is that we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and SD_OP_GET_CLUSTER_DEFAULT can be removed. When users create a new VDI with qemu-img, qemu's Sheepdog backend driver calculates max limit VDI size. But if block_size_shift option is not specified, qemu's Sheepdog backend driver can't calculate max limit VDI size. If block_size_shift not specified, this means 1 for old sheep, use 4MB size 2 for new sheep, use cluster wide default value. And sheep then can calculate it on its own, no? Dog command(client) calculate max size, so I think that qemu's Sheepdog backend driver should calculate it like dog command. Is that policy changeable? I checked the QEMU code and got your idea. In the past it was fixed size so very easy to hardcode the check in the client, no communication with sheep needed. Yes, if it is reasonable, we can change it. I think we can push the size calculation logic into sheep, if not the right size return INVALID_PARAMETER to clients. Clients just check this and report error back to users. There is no backward compability for this approach, since 4MB is the smallest size. OLD QEMU will limit the max_size as 4TB, which is no problem for new sheep. I have checked the Qemu and sheepdog code. When we resize VDI, sd_truncate() is called and resize value is handled by Qemu. (Sorry I haven't noticed this operation) Then, sd_truncate() writes Sheepdog inode object directly. So Sheepdog server can't handle maximum VDI size. As I thought, should we use SD_OP_GET_CLUSTER_DEFAULT? Should maxmimum VDI size be calculated on client program? Based on your description, yes, we have to use
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/10 20:12), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 This might not be necessary. For old qemu or the qemu-img without setting option, the block_size_shift will be 0. If we make 0 to represent 4MB object, then we don't need to get the default cluster object size. We migth even get rid of the idea of cluster default size. The downsize is that, if we want to create a vdi with different size not the default 4MB, we have to write it every time for qemu-img or dog. If we choose to keep the idea of cluster default size, I think we'd also try to avoid call this request from QEMU to make backward compatibility easier. In this scenario, 0 might be used to ask new sheep to decide to use cluster default size. Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can handle 0 though it has different meanings. Table for this bit as 0: Qe: qemu SD: Sheep daemon CDS: Cluster Default Size Ign: Ignored by the sheep daemon Qe/sd newold new CDSIgn old CDSNULL Does Ign mean that VDI is handled as 4MB object size? I think this approach is acceptable. The difference to your patch is that we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and SD_OP_GET_CLUSTER_DEFAULT can be removed. When users create a new VDI with qemu-img, qemu's Sheepdog backend driver calculates max limit VDI size. But if block_size_shift option is not specified, qemu's Sheepdog backend driver can't calculate max limit VDI size. So, I think that qemu's Sheepdog backend driver must get cluster default value from sheep daemon. Thanks, Teruaki
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/12 11:19), Liu Yuan wrote: On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote: (2015/02/10 20:12), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 This might not be necessary. For old qemu or the qemu-img without setting option, the block_size_shift will be 0. If we make 0 to represent 4MB object, then we don't need to get the default cluster object size. We migth even get rid of the idea of cluster default size. The downsize is that, if we want to create a vdi with different size not the default 4MB, we have to write it every time for qemu-img or dog. If we choose to keep the idea of cluster default size, I think we'd also try to avoid call this request from QEMU to make backward compatibility easier. In this scenario, 0 might be used to ask new sheep to decide to use cluster default size. Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can handle 0 though it has different meanings. Table for this bit as 0: Qe: qemu SD: Sheep daemon CDS: Cluster Default Size Ign: Ignored by the sheep daemon Qe/sd newold new CDSIgn old CDSNULL Does Ign mean that VDI is handled as 4MB object size? Yes, old sheep can only handle 4MB object and doesn't check this field at all. I think this approach is acceptable. The difference to your patch is that we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and SD_OP_GET_CLUSTER_DEFAULT can be removed. When users create a new VDI with qemu-img, qemu's Sheepdog backend driver calculates max limit VDI size. But if block_size_shift option is not specified, qemu's Sheepdog backend driver can't calculate max limit VDI size. If block_size_shift not specified, this means 1 for old sheep, use 4MB size 2 for new sheep, use cluster wide default value. And sheep then can calculate it on its own, no? Dog command(client) calculate max size, so I think that qemu's Sheepdog backend driver should calculate it like dog command. Is that policy changeable? Is there no policy? Thanks, Teruaki
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/10 12:10), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; +unsigned long buf_size; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); +buf = g_malloc0(buf_size); + +max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } @@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = +(uint8_t)qemu_opt_get_number_del(opt, block_size_shift, 0); +if (inode
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/10 17:58), Liu Yuan wrote: On Tue, Feb 10, 2015 at 05:22:02PM +0900, Teruaki Ishizaki wrote: (2015/02/10 12:10), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; +unsigned long buf_size; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); +buf = g_malloc0(buf_size); + +max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } @@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) +{ +struct SheepdogInode *inode = s-inode; +inode
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/06 11:18), Liu Yuan wrote: On Wed, Feb 04, 2015 at 01:54:19PM +0900, Teruaki Ishizaki wrote: (2015/02/02 15:52), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M Is it possible to make this option more user friendly? such as $ qemu-img create -o object_size=8M sheepdog:test 1G At first, I thought that the object_size was user friendly. But, Sheepdog has already the value of block_size_shift in the inode layout that means like object_size. 'object_size' doesn't always fit right in 'block_size_shift'. On the other hands, 'block_size_shift' always fit right in 'object_size'. I think that existing layout shouldn't be changed easily and it seems that it is difficult for users to specify the object_size value that fit right in 'block_size_shift'. I don't think we need to change the layout. block_size_shift is what QEMU talks to sheep, and QEMU options is what users talks to QEMU. We can convert the user friendly object size into block_size_shift internally in the driver before sending it tosheep daemon, no? For example, users specify 12MB for object size, block_size_shift doesn't fit exactly to an integer. I suppose that normally an administrator do format sheepdog cluster with specifying block_size_shift and users usually do qemu-img command without a block_size_shift option. I think that the block_size_shift option of qemu-img command is used for a experimental purpose. Thanks, Teruaki Ishizaki
Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support
(2015/02/02 15:52), Liu Yuan wrote: On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M Is it possible to make this option more user friendly? such as $ qemu-img create -o object_size=8M sheepdog:test 1G At first, I thought that the object_size was user friendly. But, Sheepdog has already the value of block_size_shift in the inode layout that means like object_size. 'object_size' doesn't always fit right in 'block_size_shift'. On the other hands, 'block_size_shift' always fit right in 'object_size'. I think that existing layout shouldn't be changed easily and it seems that it is difficult for users to specify the object_size value that fit right in 'block_size_shift'. Thanks, Teruaki Ishizaki
[Qemu-devel] [PATCH v4] sheepdog: selectable object size support
Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V4: - Limit a read/write buffer size for creating a preallocated VDI. - Replace a parse function for the block_size_shift option. - Fix an error message. V3: - Delete the needless operation of buffer. - Delete the needless operations of request header. for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq. - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 138 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..a43b947 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; +unsigned long buf_size; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); +buf = g_malloc0(buf_size); + +max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret 0) { goto out; } @@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = +(uint8_t)qemu_opt_get_number_del(opt, block_size_shift, 0); +if (inode-block_size_shift == 0) { +/* block_size_shift is set for cluster default value by sheepdog */ +return 0; +} else if (inode-block_size_shift 20
Re: [Qemu-devel] [PATCH v3] sheepdog: selectable object size support
(2015/01/26 19:33), Kevin Wolf wrote: Am 26.01.2015 um 10:52 hat Teruaki Ishizaki geschrieben: Hi, Kevin Thanks for your review! (2015/01/23 22:53), Kevin Wolf wrote: Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V3: - Delete the needless operation of buffer. - Delete the needless operations of request header for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 140 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..c9f06db 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf = g_malloc0(object_size); If I understand correctly, block_size_shift can be up to 31, i.e. this is a 2 GB allocation. Do you really think this is a good idea? I think that it is not best idea. Sheepdog internal limit of block_size_shift is 31, so I set logical limit of block_size_shift for 31. I think there is no real requirement to use the object size here, this is only the buffer size for some bdrv_read/write calls, which works fine in smaller chunks. So you could still allow block_size_shift up to 31, but use a different number here. Perhaps always leaving it at 4 MB is good enough, otherwise you could use something like MIN(24, base-inode.block_size_shift). At least use g_try_malloc0() here, so that a memory allocation failure doesn't crash qemu. (Same goes for all potentially huge allocations that you make in the whole codebase.) As you pointed out, memory allocation was needed for that value. I suppose that block_size_shift should be up to 26(64MB) or 27(128M) actually. Should it be checked actual limits in that source code? I suggest limiting the maximum buffer size as above so that a failure becomes
Re: [Qemu-devel] [PATCH v3] sheepdog: selectable object size support
Hi, Kevin Thanks for your review! (2015/01/23 22:53), Kevin Wolf wrote: Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V3: - Delete the needless operation of buffer. - Delete the needless operations of request header for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 140 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..c9f06db 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf = g_malloc0(object_size); If I understand correctly, block_size_shift can be up to 31, i.e. this is a 2 GB allocation. Do you really think this is a good idea? I think that it is not best idea. Sheepdog internal limit of block_size_shift is 31, so I set logical limit of block_size_shift for 31. At least use g_try_malloc0() here, so that a memory allocation failure doesn't crash qemu. (Same goes for all potentially huge allocations that you make in the whole codebase.) As you pointed out, memory allocation was needed for that value. I suppose that block_size_shift should be up to 26(64MB) or 27(128M) actually. Should it be checked actual limits in that source code? +max_idx = DIV_ROUND_UP(vdi_size, object_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret
[Qemu-devel] [PATCH v3] sheepdog: selectable object size support
Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V3: - Delete the needless operation of buffer. - Delete the needless operations of request header for SD_OP_GET_CLUSTER_DEFAULT. - Fix coding style problems. V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 140 ++--- include/block/block_int.h |1 + 2 files changed, 119 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..c9f06db 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf = g_malloc0(object_size); + +max_idx = DIV_ROUND_UP(vdi_size, object_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } @@ -1610,7 +1635,9 @@ out_with_err_set: if (bs) { bdrv_unref(bs); } -g_free(buf); +if (buf) { +g_free(buf); +} return ret; } @@ -1669,6 +1696,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = (uint8_t)atoi(opt); +if (inode-block_size_shift 20 || inode-block_size_shift 31) { +return -EINVAL; +} + +return 0; +} + static int sd_create(const char *filename, QemuOpts *opts, Error **errp) { @@ -1679,6 +1717,7 @@ static int sd_create(const char
Re: [Qemu-devel] [sheepdog] [PATCH v2] sheepdog: selectable object size support
Hi, Hitoshi Thanks for your review! (2015/01/22 14:22), Hitoshi Mitake wrote: At Tue, 20 Jan 2015 16:14:28 +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 142 ++--- include/block/block_int.h |1 + 2 files changed, 121 insertions(+), 22 deletions(-) @@ -1757,6 +1800,48 @@ static int sd_create(const char *filename, QemuOpts *opts, } s-aio_context = qemu_get_aio_context(); + +/* if block_size_shift is not specified, get cluster default value */ +if (s-inode.block_size_shift == 0) { +SheepdogVdiReq hdr; +SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)hdr; +Error *local_err = NULL; +int fd; +unsigned int wlen = 0, rlen = 0; + +fd = connect_to_sdog(s, local_err); +if (fd 0) { +error_report(%s, error_get_pretty(local_err)); +error_free(local_err); +ret = -EIO; +goto out; +} + +memset(hdr, 0, sizeof(hdr)); SD_OP_GET_CLUSTER_DEFAULT doesn't require succeeding data, so the below memset() for buf and +memset(buf, 0, sizeof(buf)); +hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; +hdr.proto_ver = SD_PROTO_VER; the below two statements aren't required. +hdr.data_length = wlen; +hdr.flags = SD_FLAG_CMD_WRITE; I comprehend above things you pointed out. I'll fix the patch! Thanks, Teruaki Ishizaki
[Qemu-devel] [PATCH v2] sheepdog: selectable object size support
Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- V2: - Fix coding style problem (white space). - Add members, store_policy and block_size_shift to struct SheepdogVdiReq - Initialize request header to use block_size_shift specified by user. --- block/sheepdog.c | 142 ++--- include/block/block_int.h |1 + 2 files changed, 121 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..ac18966 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf = g_malloc0(object_size); + +max_idx = DIV_ROUND_UP(vdi_size, object_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } @@ -1610,7 +1635,8 @@ out_with_err_set: if (bs) { bdrv_unref(bs); } -g_free(buf); +if (buf) + g_free(buf); return ret; } @@ -1669,6 +1695,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = (uint8_t)atoi(opt); +if(inode-block_size_shift 20 || inode-block_size_shift 31){ +return -EINVAL; +} + +return 0; +} + static int sd_create(const char *filename, QemuOpts *opts, Error **errp) { @@ -1679,6 +1716,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BDRVSheepdogState *s; char tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; +uint64_t max_vdi_size; bool prealloc = false; s
Re: [Qemu-devel] [sheepdog] [PATCH] sheepdog: selectable object size support
Hi Hitoshi, Thanks for your check. (2015/01/15 15:05), Hitoshi Mitake wrote: At Tue, 13 Jan 2015 17:41:12 +0900, Teruaki Ishizaki wrote: Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- block/sheepdog.c | 138 +--- include/block/block_int.h |1 + 2 files changed, 117 insertions(+), 22 deletions(-) @@ -1610,7 +1633,8 @@ out_with_err_set: if (bs) { bdrv_unref(bs); } -g_free(buf); +if (buf) The above line has a style problem (white space). I'll change it. @@ -1669,6 +1693,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = (uint8_t)atoi(opt); This patch cannot create VDI with specified block size shift. Below is the reason: Initializing block_size_shift of the inode object here (in parse_block_size_shift()) doesn't make sense. Because the block_size_shift of newly created VDI is specified by block_size_shift of SheepdogVdiReq (in sheepdog source code, sd_req.vdi). You need to synchronize the struct and initialize the parameter. Below is a slack solution: diff --git a/block/sheepdog.c b/block/sheepdog.c index 525254e..3dc3359 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -168,7 +168,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; -uint8_t reserved[2]; +uint8_t store_policy; +uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -1560,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s-inode.vdi_size; hdr.copy_policy = s-inode.copy_policy; hdr.copies = s-inode.nr_copies; +hdr.block_size_shift = s-inode.block_size_shift; ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen); Oh, sorry for my miss-edit for the patch I had tested the source code as you sugested fixing. I'll fix the patch like that! # This is a very slack solution. You need to avoid using inode as a # temporal space of the parameter. Originally, do_sd_create() had argument struct BDRVSheepdogState which included inode, and parse_redundancy() substitute copy_policy and copies substituted for inode. So, I created parse_block_size_shift() like parse_redundancy() to handle block_size_shift like copy_policy and copies options. Which point is temporal? Thanks, Teruaki Ishizaki
[Qemu-devel] [PATCH] sheepdog: selectable object size support
Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle block_size_shift value for calculating VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify block_size_shift option. If you want to create a VDI of 8MB(1 23) object size, you need to specify following command option. # qemu-img create -o block_size_shift=23 sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp --- block/sheepdog.c | 138 +--- include/block/block_int.h |1 + 2 files changed, 117 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176f..525254e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE0x01 #define SD_FLAG_CMD_COW 0x02 @@ -186,6 +187,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { +uint8_t proto_ver; +uint8_t opcode; +uint16_t flags; +uint32_t epoch; +uint32_t id; +uint32_t data_length; +uint32_t result; +uint8_t nr_copies; +uint8_t copy_policy; +uint8_t block_size_shift; +uint8_t __pad1; +uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1569,9 +1585,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; +BDRVSheepdogState *base = NULL; uint32_t idx, max_idx; +uint32_t object_size; int64_t vdi_size; -void *buf = g_malloc0(SD_DATA_OBJ_SIZE); +void *buf = NULL; int ret; ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1603,23 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } -max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + +base = bs-opaque; +object_size = (UINT32_C(1) base-inode.block_size_shift); +buf = g_malloc0(object_size); + +max_idx = DIV_ROUND_UP(vdi_size, object_size); for (idx = 0; idx max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ -ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pread(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } -ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); +ret = bdrv_pwrite(bs, idx * object_size, buf, object_size); if (ret 0) { goto out; } @@ -1610,7 +1633,8 @@ out_with_err_set: if (bs) { bdrv_unref(bs); } -g_free(buf); +if (buf) + g_free(buf); return ret; } @@ -1669,6 +1693,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt) +{ +struct SheepdogInode *inode = s-inode; +inode-block_size_shift = (uint8_t)atoi(opt); +if(inode-block_size_shift 20 || inode-block_size_shift 31){ +return -EINVAL; +} + +return 0; +} + static int sd_create(const char *filename, QemuOpts *opts, Error **errp) { @@ -1679,6 +1714,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BDRVSheepdogState *s; char tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; +uint64_t max_vdi_size; bool prealloc = false; s = g_new0(BDRVSheepdogState, 1); @@ -1717,11 +1753,14 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } } - -if (s-inode.vdi_size SD_MAX_VDI_SIZE) { -error_setg(errp, too big image size); -ret = -EINVAL; -goto out; +buf = qemu_opt_get_del(opts, BLOCK_OPT_BLOCK_SIZE_SHIFT); +if (buf) { +ret = parse_block_size_shift(s, buf); +if (ret 0) { +error_setg(errp, Invalid block_size_shift: + '%s'. please use 20-31, buf); +goto out; +} } if (backing_file) { @@ -1757,6 +1796,48 @@ static int sd_create(const char *filename, QemuOpts *opts, } s-aio_context = qemu_get_aio_context(); + +/* if block_size_shift is not specified, get cluster