[Qemu-devel] [PATCH v5] sheepdog: selectable object size support

2015-02-13 Thread Teruaki Ishizaki
Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify object_size option.

If you want to create a VDI of 8MB object size,
you need to specify following command option.

 # qemu-img create -o object_size=8M sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

 # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V5:
 - Change option from block_size_shift to object_size.
 - Change parse type to QEMU_OPT_SIZE.
 - Add operation to verify max VDI size for resizing.
 - Change to use 4MB object size with using old Sheepdog.

V4:
 - Limit a read/write buffer size for creating a preallocated VDI.
 - Replace a parse function for the block_size_shift option.
 - Fix an error message.

V3:
 - Delete the needless operation of buffer.
 - Delete the needless operations of request header.
   for SD_OP_GET_CLUSTER_DEFAULT.
 - Fix coding style problems.

V2:
 - Fix coding style problem (white space).
 - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
 - Initialize request header to use block_size_shift specified by user.
---
 block/sheepdog.c  |  155 ++---
 include/block/block_int.h |1 +
 2 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..f6fe97e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
 #define SD_OP_READ_VDIS  0x15
 #define SD_OP_FLUSH_VDI  0x16
 #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18
 
 #define SD_FLAG_CMD_WRITE0x01
 #define SD_FLAG_CMD_COW  0x02
@@ -91,6 +92,7 @@
 #define SD_NR_VDIS   (1U  24)
 #define SD_DATA_OBJ_SIZE (UINT64_C(1)  22)
 #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
+#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
 /*
  * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
  * (SD_EC_MAX_STRIP - 1) for parity strips
@@ -167,7 +169,8 @@ typedef struct SheepdogVdiReq {
 uint32_t base_vdi_id;
 uint8_t copies;
 uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
 uint32_t snapid;
 uint32_t type;
 uint32_t pad[2];
@@ -186,6 +189,21 @@ typedef struct SheepdogVdiRsp {
 uint32_t pad[5];
 } SheepdogVdiRsp;
 
+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
 typedef struct SheepdogInode {
 char name[SD_MAX_VDI_LEN];
 char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1562,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 hdr.vdi_size = s-inode.vdi_size;
 hdr.copy_policy = s-inode.copy_policy;
 hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;
 
 ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);
 
@@ -1569,9 +1588,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
 BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
+unsigned long buf_size;
 uint32_t idx, max_idx;
+uint32_t object_size;
 int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
 int ret;
 
 ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1607,24 @@ static int sd_prealloc(const char *filename, Error 
**errp)
 ret = vdi_size;
 goto out;
 }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+buf = g_malloc0(buf_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, buf_size);
 
 for (idx = 0; idx  max_idx; idx++) {
 /*
  * The created image can be a cloned image, so we need to read
  * a data from the source image.
  */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
 if (ret  0) {
 goto out;
 }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
 if (ret  0) {
 goto out;
 }
@@ -1669,6 +1697,27 @@ static int

Re: [Qemu-devel] [sheepdog] [PATCH v4] sheepdog: selectable object size support

2015-02-12 Thread Teruaki Ishizaki

(2015/02/12 16:42), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 04:28:01PM +0900, Hitoshi Mitake wrote:

At Thu, 12 Feb 2015 15:00:49 +0800,
Liu Yuan wrote:


On Thu, Feb 12, 2015 at 03:19:21PM +0900, Hitoshi Mitake wrote:

At Tue, 10 Feb 2015 18:35:58 +0800,
Liu Yuan wrote:


On Tue, Feb 10, 2015 at 06:56:33PM +0900, Teruaki Ishizaki wrote:

(2015/02/10 17:58), Liu Yuan wrote:

On Tue, Feb 10, 2015 at 05:22:02PM +0900, Teruaki Ishizaki wrote:

(2015/02/10 12:10), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18

  #define SD_FLAG_CMD_WRITE0x01
  #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
  uint32_t pad[5];
  } SheepdogVdiRsp;

+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
  typedef struct SheepdogInode {
  char name[SD_MAX_VDI_LEN];
  char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);

@@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  static int sd_prealloc(const char *filename, Error **errp)
  {
  BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
+unsigned long buf_size;
  uint32_t idx, max_idx;
+uint32_t object_size;
  int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
  int ret;

  ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error 
**errp)
  ret = vdi_size;
  goto out;
  }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+buf = g_malloc0(buf_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, buf_size);

  for (idx = 0; idx  max_idx; idx++) {
  /*
   * The created image can be a cloned image, so we need to read
   * a data from the source image.
   */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
  if (ret  0) {
  goto out;
  }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx

Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-12 Thread Teruaki Ishizaki

(2015/02/12 11:55), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 11:33:16AM +0900, Teruaki Ishizaki wrote:

(2015/02/12 11:19), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote:

(2015/02/10 20:12), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18


This might not be necessary. For old qemu or the qemu-img without setting
option, the block_size_shift will be 0.

If we make 0 to represent 4MB object, then we don't need to get the default
cluster object size.

We migth even get rid of the idea of cluster default size. The downsize is that,
if we want to create a vdi with different size not the default 4MB,
we have to write it every time for qemu-img or dog.

If we choose to keep the idea of cluster default size, I think we'd also try to
avoid call this request from QEMU to make backward compatibility easier. In this
scenario, 0 might be used to ask new sheep to decide to use cluster default 
size.

Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can
handle 0 though it has different meanings.

Table for this bit as 0:
Qe: qemu
SD: Sheep daemon
CDS: Cluster Default Size
Ign: Ignored by the sheep daemon

Qe/sd   newold
new CDSIgn
old CDSNULL

Does Ign mean that VDI is handled as 4MB object size?


Yes, old sheep can only handle 4MB object and doesn't check this field at all.





I think this approach is acceptable. The difference to your patch is that
we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and
SD_OP_GET_CLUSTER_DEFAULT can be removed.

When users create a new VDI with qemu-img, qemu's Sheepdog backend
driver calculates max limit VDI size.



But if block_size_shift option is not specified, qemu's Sheepdog backend
driver can't calculate max limit VDI size.


If block_size_shift not specified, this means

1 for old sheep, use 4MB size
2 for new sheep, use cluster wide default value.

And sheep then can calculate it on its own, no?


Dog command(client) calculate max size, so I think
that qemu's Sheepdog backend driver should calculate it
like dog command.

Is that policy changeable?


I checked the QEMU code and got your idea. In the past it was fixed size so very
easy to hardcode the check in the client, no communication with sheep needed.

Yes, if it is reasonable, we can change it.

I think we can push the size calculation logic into sheep, if not the right size
return INVALID_PARAMETER to clients. Clients just check this and report error
back to users.

There is no backward compability for this approach, since 4MB is the smallest
size.

OLD QEMU will limit the max_size as 4TB, which is no problem for new sheep.


I have checked the Qemu and sheepdog code.
When we resize VDI, sd_truncate() is called and
resize value is handled by Qemu.
(Sorry I haven't noticed this operation)

Then, sd_truncate() writes Sheepdog inode object directly.
So Sheepdog server can't handle maximum VDI size.

As I thought, should we use SD_OP_GET_CLUSTER_DEFAULT?
Should maxmimum VDI size be calculated on client program?

Thanks,
Teruaki



Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-12 Thread Teruaki Ishizaki

(2015/02/13 11:01), Liu Yuan wrote:

On Fri, Feb 13, 2015 at 10:33:04AM +0900, Teruaki Ishizaki wrote:

(2015/02/12 11:55), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 11:33:16AM +0900, Teruaki Ishizaki wrote:

(2015/02/12 11:19), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote:

(2015/02/10 20:12), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18


This might not be necessary. For old qemu or the qemu-img without setting
option, the block_size_shift will be 0.

If we make 0 to represent 4MB object, then we don't need to get the default
cluster object size.

We migth even get rid of the idea of cluster default size. The downsize is that,
if we want to create a vdi with different size not the default 4MB,
we have to write it every time for qemu-img or dog.

If we choose to keep the idea of cluster default size, I think we'd also try to
avoid call this request from QEMU to make backward compatibility easier. In this
scenario, 0 might be used to ask new sheep to decide to use cluster default 
size.

Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can
handle 0 though it has different meanings.

Table for this bit as 0:
Qe: qemu
SD: Sheep daemon
CDS: Cluster Default Size
Ign: Ignored by the sheep daemon

Qe/sd   newold
new CDSIgn
old CDSNULL

Does Ign mean that VDI is handled as 4MB object size?


Yes, old sheep can only handle 4MB object and doesn't check this field at all.





I think this approach is acceptable. The difference to your patch is that
we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and
SD_OP_GET_CLUSTER_DEFAULT can be removed.

When users create a new VDI with qemu-img, qemu's Sheepdog backend
driver calculates max limit VDI size.



But if block_size_shift option is not specified, qemu's Sheepdog backend
driver can't calculate max limit VDI size.


If block_size_shift not specified, this means

1 for old sheep, use 4MB size
2 for new sheep, use cluster wide default value.

And sheep then can calculate it on its own, no?


Dog command(client) calculate max size, so I think
that qemu's Sheepdog backend driver should calculate it
like dog command.

Is that policy changeable?


I checked the QEMU code and got your idea. In the past it was fixed size so very
easy to hardcode the check in the client, no communication with sheep needed.

Yes, if it is reasonable, we can change it.

I think we can push the size calculation logic into sheep, if not the right size
return INVALID_PARAMETER to clients. Clients just check this and report error
back to users.

There is no backward compability for this approach, since 4MB is the smallest
size.

OLD QEMU will limit the max_size as 4TB, which is no problem for new sheep.


I have checked the Qemu and sheepdog code.
When we resize VDI, sd_truncate() is called and
resize value is handled by Qemu.
(Sorry I haven't noticed this operation)

Then, sd_truncate() writes Sheepdog inode object directly.
So Sheepdog server can't handle maximum VDI size.

As I thought, should we use SD_OP_GET_CLUSTER_DEFAULT?
Should maxmimum VDI size be calculated on client program?


Based on your description, yes, we have to use

Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-11 Thread Teruaki Ishizaki

(2015/02/10 20:12), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18


This might not be necessary. For old qemu or the qemu-img without setting
option, the block_size_shift will be 0.

If we make 0 to represent 4MB object, then we don't need to get the default
cluster object size.

We migth even get rid of the idea of cluster default size. The downsize is that,
if we want to create a vdi with different size not the default 4MB,
we have to write it every time for qemu-img or dog.

If we choose to keep the idea of cluster default size, I think we'd also try to
avoid call this request from QEMU to make backward compatibility easier. In this
scenario, 0 might be used to ask new sheep to decide to use cluster default 
size.

Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can
handle 0 though it has different meanings.

Table for this bit as 0:
Qe: qemu
SD: Sheep daemon
CDS: Cluster Default Size
Ign: Ignored by the sheep daemon

Qe/sd   newold
new CDSIgn
old CDSNULL

Does Ign mean that VDI is handled as 4MB object size?



I think this approach is acceptable. The difference to your patch is that
we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and
SD_OP_GET_CLUSTER_DEFAULT can be removed.

When users create a new VDI with qemu-img, qemu's Sheepdog backend
driver calculates max limit VDI size.
But if block_size_shift option is not specified, qemu's Sheepdog backend
driver can't calculate max limit VDI size.

So, I think that qemu's Sheepdog backend driver must get cluster default
value from sheep daemon.

Thanks,
Teruaki





Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-11 Thread Teruaki Ishizaki

(2015/02/12 11:19), Liu Yuan wrote:

On Thu, Feb 12, 2015 at 10:51:25AM +0900, Teruaki Ishizaki wrote:

(2015/02/10 20:12), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18


This might not be necessary. For old qemu or the qemu-img without setting
option, the block_size_shift will be 0.

If we make 0 to represent 4MB object, then we don't need to get the default
cluster object size.

We migth even get rid of the idea of cluster default size. The downsize is that,
if we want to create a vdi with different size not the default 4MB,
we have to write it every time for qemu-img or dog.

If we choose to keep the idea of cluster default size, I think we'd also try to
avoid call this request from QEMU to make backward compatibility easier. In this
scenario, 0 might be used to ask new sheep to decide to use cluster default 
size.

Both old qemu and new QEMU will send 0 to sheep and both old and new sheep can
handle 0 though it has different meanings.

Table for this bit as 0:
Qe: qemu
SD: Sheep daemon
CDS: Cluster Default Size
Ign: Ignored by the sheep daemon

Qe/sd   newold
new CDSIgn
old CDSNULL

Does Ign mean that VDI is handled as 4MB object size?


Yes, old sheep can only handle 4MB object and doesn't check this field at all.





I think this approach is acceptable. The difference to your patch is that
we don't send SD_OP_GET_CLUSTER_DEFAULT to sheep daemon and
SD_OP_GET_CLUSTER_DEFAULT can be removed.

When users create a new VDI with qemu-img, qemu's Sheepdog backend
driver calculates max limit VDI size.



But if block_size_shift option is not specified, qemu's Sheepdog backend
driver can't calculate max limit VDI size.


If block_size_shift not specified, this means

1 for old sheep, use 4MB size
2 for new sheep, use cluster wide default value.

And sheep then can calculate it on its own, no?


Dog command(client) calculate max size, so I think
that qemu's Sheepdog backend driver should calculate it
like dog command.

Is that policy changeable?
Is there no policy?

Thanks,
Teruaki



Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-10 Thread Teruaki Ishizaki

(2015/02/10 12:10), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18

  #define SD_FLAG_CMD_WRITE0x01
  #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
  uint32_t pad[5];
  } SheepdogVdiRsp;

+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
  typedef struct SheepdogInode {
  char name[SD_MAX_VDI_LEN];
  char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);

@@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  static int sd_prealloc(const char *filename, Error **errp)
  {
  BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
+unsigned long buf_size;
  uint32_t idx, max_idx;
+uint32_t object_size;
  int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
  int ret;

  ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error 
**errp)
  ret = vdi_size;
  goto out;
  }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+buf = g_malloc0(buf_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, buf_size);

  for (idx = 0; idx  max_idx; idx++) {
  /*
   * The created image can be a cloned image, so we need to read
   * a data from the source image.
   */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
  if (ret  0) {
  goto out;
  }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
  if (ret  0) {
  goto out;
  }
@@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
  return 0;
  }

+static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift =
+(uint8_t)qemu_opt_get_number_del(opt, block_size_shift, 0);
+if (inode

Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-10 Thread Teruaki Ishizaki

(2015/02/10 17:58), Liu Yuan wrote:

On Tue, Feb 10, 2015 at 05:22:02PM +0900, Teruaki Ishizaki wrote:

(2015/02/10 12:10), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
  - Limit a read/write buffer size for creating a preallocated VDI.
  - Replace a parse function for the block_size_shift option.
  - Fix an error message.

V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header.
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  138 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18

  #define SD_FLAG_CMD_WRITE0x01
  #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
  uint32_t pad[5];
  } SheepdogVdiRsp;

+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
  typedef struct SheepdogInode {
  char name[SD_MAX_VDI_LEN];
  char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);

@@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  static int sd_prealloc(const char *filename, Error **errp)
  {
  BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
+unsigned long buf_size;
  uint32_t idx, max_idx;
+uint32_t object_size;
  int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
  int ret;

  ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error 
**errp)
  ret = vdi_size;
  goto out;
  }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+buf = g_malloc0(buf_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, buf_size);

  for (idx = 0; idx  max_idx; idx++) {
  /*
   * The created image can be a cloned image, so we need to read
   * a data from the source image.
   */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
  if (ret  0) {
  goto out;
  }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
  if (ret  0) {
  goto out;
  }
@@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
  return 0;
  }

+static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode

Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-06 Thread Teruaki Ishizaki

(2015/02/06 11:18), Liu Yuan wrote:

On Wed, Feb 04, 2015 at 01:54:19PM +0900, Teruaki Ishizaki wrote:

(2015/02/02 15:52), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M


Is it possible to make this option more user friendly? such as

  $ qemu-img create -o object_size=8M sheepdog:test 1G


At first, I thought that the object_size was user friendly.
But, Sheepdog has already the value of block_size_shift
in the inode layout that means like object_size.

'object_size' doesn't always fit right in 'block_size_shift'.
On the other hands, 'block_size_shift' always fit right in
'object_size'.

I think that existing layout shouldn't be changed easily and
it seems that it is difficult for users to specify
the object_size value that fit right in 'block_size_shift'.


I don't think we need to change the layout. block_size_shift is what QEMU talks
to sheep, and QEMU options is what users talks to QEMU. We can convert the user
friendly object size into block_size_shift internally in the driver before
sending it tosheep daemon, no?


For example, users specify 12MB for object size, block_size_shift
doesn't fit exactly to an integer.

I suppose that normally an administrator do format sheepdog cluster
with specifying block_size_shift and users usually do qemu-img command
without a block_size_shift option.

I think that the block_size_shift option of qemu-img command is used
for a experimental purpose.

Thanks,
Teruaki Ishizaki




Re: [Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-02-03 Thread Teruaki Ishizaki

(2015/02/02 15:52), Liu Yuan wrote:

On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M


Is it possible to make this option more user friendly? such as

  $ qemu-img create -o object_size=8M sheepdog:test 1G


At first, I thought that the object_size was user friendly.
But, Sheepdog has already the value of block_size_shift
in the inode layout that means like object_size.

'object_size' doesn't always fit right in 'block_size_shift'.
On the other hands, 'block_size_shift' always fit right in
'object_size'.

I think that existing layout shouldn't be changed easily and
it seems that it is difficult for users to specify
the object_size value that fit right in 'block_size_shift'.

Thanks,
Teruaki Ishizaki




[Qemu-devel] [PATCH v4] sheepdog: selectable object size support

2015-01-27 Thread Teruaki Ishizaki
Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

 # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

 # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V4:
 - Limit a read/write buffer size for creating a preallocated VDI.
 - Replace a parse function for the block_size_shift option.
 - Fix an error message.

V3:
 - Delete the needless operation of buffer.
 - Delete the needless operations of request header.
   for SD_OP_GET_CLUSTER_DEFAULT.
 - Fix coding style problems.

V2:
 - Fix coding style problem (white space).
 - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
 - Initialize request header to use block_size_shift specified by user.
---
 block/sheepdog.c  |  138 ++---
 include/block/block_int.h |1 +
 2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..a43b947 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
 #define SD_OP_READ_VDIS  0x15
 #define SD_OP_FLUSH_VDI  0x16
 #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18
 
 #define SD_FLAG_CMD_WRITE0x01
 #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
 uint32_t base_vdi_id;
 uint8_t copies;
 uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
 uint32_t snapid;
 uint32_t type;
 uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
 uint32_t pad[5];
 } SheepdogVdiRsp;
 
+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
 typedef struct SheepdogInode {
 char name[SD_MAX_VDI_LEN];
 char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 hdr.vdi_size = s-inode.vdi_size;
 hdr.copy_policy = s-inode.copy_policy;
 hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;
 
 ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);
 
@@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
 BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
+unsigned long buf_size;
 uint32_t idx, max_idx;
+uint32_t object_size;
 int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
 int ret;
 
 ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error 
**errp)
 ret = vdi_size;
 goto out;
 }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+buf = g_malloc0(buf_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, buf_size);
 
 for (idx = 0; idx  max_idx; idx++) {
 /*
  * The created image can be a cloned image, so we need to read
  * a data from the source image.
  */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
 if (ret  0) {
 goto out;
 }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
 if (ret  0) {
 goto out;
 }
@@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
 return 0;
 }
 
+static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift =
+(uint8_t)qemu_opt_get_number_del(opt, block_size_shift, 0);
+if (inode-block_size_shift == 0) {
+/* block_size_shift is set for cluster default value by sheepdog */
+return 0;
+} else if (inode-block_size_shift  20

Re: [Qemu-devel] [PATCH v3] sheepdog: selectable object size support

2015-01-26 Thread Teruaki Ishizaki

(2015/01/26 19:33), Kevin Wolf wrote:

Am 26.01.2015 um 10:52 hat Teruaki Ishizaki geschrieben:

Hi, Kevin

Thanks for your review!

(2015/01/23 22:53), Kevin Wolf wrote:

Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  140 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..c9f06db 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18

  #define SD_FLAG_CMD_WRITE0x01
  #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
  uint32_t pad[5];
  } SheepdogVdiRsp;

+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
  typedef struct SheepdogInode {
  char name[SD_MAX_VDI_LEN];
  char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);

@@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  static int sd_prealloc(const char *filename, Error **errp)
  {
  BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
  uint32_t idx, max_idx;
+uint32_t object_size;
  int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
  int ret;

  ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error 
**errp)
  ret = vdi_size;
  goto out;
  }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf = g_malloc0(object_size);


If I understand correctly, block_size_shift can be up to 31, i.e. this
is a 2 GB allocation. Do you really think this is a good idea?

I think that it is not best idea.
Sheepdog internal limit of block_size_shift is 31, so I set logical
limit of block_size_shift for 31.


I think there is no real requirement to use the object size here, this
is only the buffer size for some bdrv_read/write calls, which works fine
in smaller chunks.

So you could still allow block_size_shift up to 31, but use a different
number here. Perhaps always leaving it at 4 MB is good enough, otherwise
you could use something like MIN(24, base-inode.block_size_shift).


At least use g_try_malloc0() here, so that a memory allocation failure
doesn't crash qemu. (Same goes for all potentially huge allocations that
you make in the whole codebase.)

As you pointed out, memory allocation was needed for that value.
I suppose that block_size_shift should be up to 26(64MB) or 27(128M)
actually.

Should it be checked actual limits in that source code?


I suggest limiting the maximum buffer size as above so that a failure
becomes

Re: [Qemu-devel] [PATCH v3] sheepdog: selectable object size support

2015-01-26 Thread Teruaki Ishizaki

Hi, Kevin

Thanks for your review!

(2015/01/23 22:53), Kevin Wolf wrote:

Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben:

Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V3:
  - Delete the needless operation of buffer.
  - Delete the needless operations of request header
for SD_OP_GET_CLUSTER_DEFAULT.
  - Fix coding style problems.

V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  140 ++---
  include/block/block_int.h |1 +
  2 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..c9f06db 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
  #define SD_OP_READ_VDIS  0x15
  #define SD_OP_FLUSH_VDI  0x16
  #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18

  #define SD_FLAG_CMD_WRITE0x01
  #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
  uint32_t pad[5];
  } SheepdogVdiRsp;

+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
  typedef struct SheepdogInode {
  char name[SD_MAX_VDI_LEN];
  char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);

@@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
  static int sd_prealloc(const char *filename, Error **errp)
  {
  BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
  uint32_t idx, max_idx;
+uint32_t object_size;
  int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
  int ret;

  ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error 
**errp)
  ret = vdi_size;
  goto out;
  }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf = g_malloc0(object_size);


If I understand correctly, block_size_shift can be up to 31, i.e. this
is a 2 GB allocation. Do you really think this is a good idea?

I think that it is not best idea.
Sheepdog internal limit of block_size_shift is 31, so I set logical 
limit of block_size_shift for 31.




At least use g_try_malloc0() here, so that a memory allocation failure
doesn't crash qemu. (Same goes for all potentially huge allocations that
you make in the whole codebase.)

As you pointed out, memory allocation was needed for that value.
I suppose that block_size_shift should be up to 26(64MB) or 27(128M)
actually.

Should it be checked actual limits in that source code?




+max_idx = DIV_ROUND_UP(vdi_size, object_size);

  for (idx = 0; idx  max_idx; idx++) {
  /*
   * The created image can be a cloned image, so we need to read
   * a data from the source image.
   */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * object_size, buf, object_size);
  if (ret  0) {
  goto out;
  }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret

[Qemu-devel] [PATCH v3] sheepdog: selectable object size support

2015-01-23 Thread Teruaki Ishizaki
Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

 # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

 # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V3:
 - Delete the needless operation of buffer.
 - Delete the needless operations of request header
   for SD_OP_GET_CLUSTER_DEFAULT.
 - Fix coding style problems.

V2:
 - Fix coding style problem (white space).
 - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
 - Initialize request header to use block_size_shift specified by user.
---
 block/sheepdog.c  |  140 ++---
 include/block/block_int.h |1 +
 2 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..c9f06db 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
 #define SD_OP_READ_VDIS  0x15
 #define SD_OP_FLUSH_VDI  0x16
 #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18
 
 #define SD_FLAG_CMD_WRITE0x01
 #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
 uint32_t base_vdi_id;
 uint8_t copies;
 uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
 uint32_t snapid;
 uint32_t type;
 uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
 uint32_t pad[5];
 } SheepdogVdiRsp;
 
+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
 typedef struct SheepdogInode {
 char name[SD_MAX_VDI_LEN];
 char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 hdr.vdi_size = s-inode.vdi_size;
 hdr.copy_policy = s-inode.copy_policy;
 hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;
 
 ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);
 
@@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
 BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
 uint32_t idx, max_idx;
+uint32_t object_size;
 int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
 int ret;
 
 ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error 
**errp)
 ret = vdi_size;
 goto out;
 }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf = g_malloc0(object_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, object_size);
 
 for (idx = 0; idx  max_idx; idx++) {
 /*
  * The created image can be a cloned image, so we need to read
  * a data from the source image.
  */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
@@ -1610,7 +1635,9 @@ out_with_err_set:
 if (bs) {
 bdrv_unref(bs);
 }
-g_free(buf);
+if (buf) {
+g_free(buf);
+}
 
 return ret;
 }
@@ -1669,6 +1696,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
 return 0;
 }
 
+static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift = (uint8_t)atoi(opt);
+if (inode-block_size_shift  20 || inode-block_size_shift  31) {
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int sd_create(const char *filename, QemuOpts *opts,
  Error **errp)
 {
@@ -1679,6 +1717,7 @@ static int sd_create(const char

Re: [Qemu-devel] [sheepdog] [PATCH v2] sheepdog: selectable object size support

2015-01-22 Thread Teruaki Ishizaki

Hi, Hitoshi

Thanks for your review!

(2015/01/22 14:22), Hitoshi Mitake wrote:

At Tue, 20 Jan 2015 16:14:28 +0900,
Teruaki Ishizaki wrote:


Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V2:
  - Fix coding style problem (white space).
  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
  - Initialize request header to use block_size_shift specified by user.
---
  block/sheepdog.c  |  142 ++---
  include/block/block_int.h |1 +
  2 files changed, 121 insertions(+), 22 deletions(-)




@@ -1757,6 +1800,48 @@ static int sd_create(const char *filename, QemuOpts 
*opts,
  }

  s-aio_context = qemu_get_aio_context();
+
+/* if block_size_shift is not specified, get cluster default value */
+if (s-inode.block_size_shift == 0) {
+SheepdogVdiReq hdr;
+SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)hdr;
+Error *local_err = NULL;
+int fd;
+unsigned int wlen = 0, rlen = 0;
+
+fd = connect_to_sdog(s, local_err);
+if (fd  0) {
+error_report(%s, error_get_pretty(local_err));
+error_free(local_err);
+ret = -EIO;
+goto out;
+}
+
+memset(hdr, 0, sizeof(hdr));


SD_OP_GET_CLUSTER_DEFAULT doesn't require succeeding data, so the below 
memset() for buf and


+memset(buf, 0, sizeof(buf));
+hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
+hdr.proto_ver = SD_PROTO_VER;


the below two statements aren't required.


+hdr.data_length = wlen;
+hdr.flags = SD_FLAG_CMD_WRITE;


I comprehend above things you pointed out.
I'll fix the patch!

Thanks,
Teruaki Ishizaki




[Qemu-devel] [PATCH v2] sheepdog: selectable object size support

2015-01-19 Thread Teruaki Ishizaki
Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

 # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

 # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
V2:
 - Fix coding style problem (white space).
 - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
 - Initialize request header to use block_size_shift specified by user.
---
 block/sheepdog.c  |  142 ++---
 include/block/block_int.h |1 +
 2 files changed, 121 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..ac18966 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
 #define SD_OP_READ_VDIS  0x15
 #define SD_OP_FLUSH_VDI  0x16
 #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18
 
 #define SD_FLAG_CMD_WRITE0x01
 #define SD_FLAG_CMD_COW  0x02
@@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
 uint32_t base_vdi_id;
 uint8_t copies;
 uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
 uint32_t snapid;
 uint32_t type;
 uint32_t pad[2];
@@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
 uint32_t pad[5];
 } SheepdogVdiRsp;
 
+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
 typedef struct SheepdogInode {
 char name[SD_MAX_VDI_LEN];
 char tag[SD_MAX_VDI_TAG_LEN];
@@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 hdr.vdi_size = s-inode.vdi_size;
 hdr.copy_policy = s-inode.copy_policy;
 hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;
 
 ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen, rlen);
 
@@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
 BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
 uint32_t idx, max_idx;
+uint32_t object_size;
 int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
 int ret;
 
 ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error 
**errp)
 ret = vdi_size;
 goto out;
 }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf = g_malloc0(object_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, object_size);
 
 for (idx = 0; idx  max_idx; idx++) {
 /*
  * The created image can be a cloned image, so we need to read
  * a data from the source image.
  */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
@@ -1610,7 +1635,8 @@ out_with_err_set:
 if (bs) {
 bdrv_unref(bs);
 }
-g_free(buf);
+if (buf)
+   g_free(buf);
 
 return ret;
 }
@@ -1669,6 +1695,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
 return 0;
 }
 
+static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift = (uint8_t)atoi(opt);
+if(inode-block_size_shift  20 || inode-block_size_shift  31){
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int sd_create(const char *filename, QemuOpts *opts,
  Error **errp)
 {
@@ -1679,6 +1716,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
 BDRVSheepdogState *s;
 char tag[SD_MAX_VDI_TAG_LEN];
 uint32_t snapid;
+uint64_t max_vdi_size;
 bool prealloc = false;
 
 s

Re: [Qemu-devel] [sheepdog] [PATCH] sheepdog: selectable object size support

2015-01-16 Thread Teruaki Ishizaki

Hi Hitoshi,

Thanks for your check.

(2015/01/15 15:05), Hitoshi Mitake wrote:

At Tue, 13 Jan 2015 17:41:12 +0900,
Teruaki Ishizaki wrote:


Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

  # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
  block/sheepdog.c  |  138 +---
  include/block/block_int.h |1 +
  2 files changed, 117 insertions(+), 22 deletions(-)





@@ -1610,7 +1633,8 @@ out_with_err_set:
  if (bs) {
  bdrv_unref(bs);
  }
-g_free(buf);
+if (buf)


The above line has a style problem (white space).


I'll change it.




@@ -1669,6 +1693,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
  return 0;
  }

+static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift = (uint8_t)atoi(opt);


This patch cannot create VDI with specified block size shift. Below is
the reason:

Initializing block_size_shift of the inode object here (in
parse_block_size_shift()) doesn't make sense. Because the
block_size_shift of newly created VDI is specified by block_size_shift
of SheepdogVdiReq (in sheepdog source code, sd_req.vdi). You need to
synchronize the struct and initialize the parameter. Below is a slack
solution:

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 525254e..3dc3359 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -168,7 +168,8 @@ typedef struct SheepdogVdiReq {
  uint32_t base_vdi_id;
  uint8_t copies;
  uint8_t copy_policy;
-uint8_t reserved[2];
+uint8_t store_policy;
+uint8_t block_size_shift;
  uint32_t snapid;
  uint32_t type;
  uint32_t pad[2];
@@ -1560,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s,
uint32_t *vdi_id, int snapshot,
  hdr.vdi_size = s-inode.vdi_size;
  hdr.copy_policy = s-inode.copy_policy;
  hdr.copies = s-inode.nr_copies;
+hdr.block_size_shift = s-inode.block_size_shift;

  ret = do_req(fd, s-aio_context, (SheepdogReq *)hdr, buf, wlen,
  rlen);



Oh, sorry for my miss-edit for the patch
I had tested the source code as you sugested fixing.

I'll fix the patch like that!

 # This is a very slack solution. You need to avoid using inode as a
 # temporal space of the parameter.
Originally, do_sd_create() had argument struct BDRVSheepdogState
which included inode, and parse_redundancy() substitute copy_policy
and copies substituted for inode.

So, I created parse_block_size_shift() like parse_redundancy()
to handle block_size_shift like copy_policy and copies options.

Which point is temporal?


Thanks,
Teruaki Ishizaki




[Qemu-devel] [PATCH] sheepdog: selectable object size support

2015-01-13 Thread Teruaki Ishizaki
Previously, qemu block driver of sheepdog used hard-coded VDI object size.
This patch enables users to handle block_size_shift value for
calculating VDI object size.

When you start qemu, you don't need to specify additional command option.

But when you create the VDI which doesn't have default object size
with qemu-img command, you specify block_size_shift option.

If you want to create a VDI of 8MB(1  23) object size,
you need to specify following command option.

 # qemu-img create -o block_size_shift=23 sheepdog:test1 100M

In addition, when you don't specify qemu-img command option,
a default value of sheepdog cluster is used for creating VDI.

 # qemu-img create sheepdog:test2 100M

Signed-off-by: Teruaki Ishizaki ishizaki.teru...@lab.ntt.co.jp
---
 block/sheepdog.c  |  138 +---
 include/block/block_int.h |1 +
 2 files changed, 117 insertions(+), 22 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..525254e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,6 +37,7 @@
 #define SD_OP_READ_VDIS  0x15
 #define SD_OP_FLUSH_VDI  0x16
 #define SD_OP_DEL_VDI0x17
+#define SD_OP_GET_CLUSTER_DEFAULT   0x18
 
 #define SD_FLAG_CMD_WRITE0x01
 #define SD_FLAG_CMD_COW  0x02
@@ -186,6 +187,21 @@ typedef struct SheepdogVdiRsp {
 uint32_t pad[5];
 } SheepdogVdiRsp;
 
+typedef struct SheepdogClusterRsp {
+uint8_t proto_ver;
+uint8_t opcode;
+uint16_t flags;
+uint32_t epoch;
+uint32_t id;
+uint32_t data_length;
+uint32_t result;
+uint8_t nr_copies;
+uint8_t copy_policy;
+uint8_t block_size_shift;
+uint8_t __pad1;
+uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
 typedef struct SheepdogInode {
 char name[SD_MAX_VDI_LEN];
 char tag[SD_MAX_VDI_TAG_LEN];
@@ -1569,9 +1585,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t 
*vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
 BlockDriverState *bs = NULL;
+BDRVSheepdogState *base = NULL;
 uint32_t idx, max_idx;
+uint32_t object_size;
 int64_t vdi_size;
-void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+void *buf = NULL;
 int ret;
 
 ret = bdrv_open(bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1585,18 +1603,23 @@ static int sd_prealloc(const char *filename, Error 
**errp)
 ret = vdi_size;
 goto out;
 }
-max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+base = bs-opaque;
+object_size = (UINT32_C(1)  base-inode.block_size_shift);
+buf = g_malloc0(object_size);
+
+max_idx = DIV_ROUND_UP(vdi_size, object_size);
 
 for (idx = 0; idx  max_idx; idx++) {
 /*
  * The created image can be a cloned image, so we need to read
  * a data from the source image.
  */
-ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pread(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
-ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ret = bdrv_pwrite(bs, idx * object_size, buf, object_size);
 if (ret  0) {
 goto out;
 }
@@ -1610,7 +1633,8 @@ out_with_err_set:
 if (bs) {
 bdrv_unref(bs);
 }
-g_free(buf);
+if (buf) 
+   g_free(buf);
 
 return ret;
 }
@@ -1669,6 +1693,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const 
char *opt)
 return 0;
 }
 
+static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt)
+{
+struct SheepdogInode *inode = s-inode;
+inode-block_size_shift = (uint8_t)atoi(opt);
+if(inode-block_size_shift  20 || inode-block_size_shift  31){
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int sd_create(const char *filename, QemuOpts *opts,
  Error **errp)
 {
@@ -1679,6 +1714,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
 BDRVSheepdogState *s;
 char tag[SD_MAX_VDI_TAG_LEN];
 uint32_t snapid;
+uint64_t max_vdi_size;
 bool prealloc = false;
 
 s = g_new0(BDRVSheepdogState, 1);
@@ -1717,11 +1753,14 @@ static int sd_create(const char *filename, QemuOpts 
*opts,
 goto out;
 }
 }
-
-if (s-inode.vdi_size  SD_MAX_VDI_SIZE) {
-error_setg(errp, too big image size);
-ret = -EINVAL;
-goto out;
+buf = qemu_opt_get_del(opts, BLOCK_OPT_BLOCK_SIZE_SHIFT);
+if (buf) {
+ret = parse_block_size_shift(s, buf);
+if (ret  0) {
+error_setg(errp, Invalid block_size_shift:
+ '%s'. please use 20-31, buf);
+goto out;
+}
 }
 
 if (backing_file) {
@@ -1757,6 +1796,48 @@ static int sd_create(const char *filename, QemuOpts 
*opts,
 }
 
 s-aio_context = qemu_get_aio_context();
+
+/* if block_size_shift is not specified, get cluster