Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-26 Thread Sam Li
Damien Le Moal  于2022年8月17日周三 01:50写道:
>
> On 2022/08/15 23:25, Sam Li wrote:
> > By adding zone management operations in BlockDriver, storage controller
> > emulation can use the new block layer APIs including Report Zone and
> > four zone management operations (open, close, finish, reset).
> >
> > Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
> > zone_close(zc), zone_reset(zrs), zone_finish(zf).
> >
> > For example, to test zone_report, use following command:
> > $ ./build/qemu-io --image-opts driver=zoned_host_device, 
> > filename=/dev/nullb0
> > -c "zrp offset nr_zones"
> >
> > Signed-off-by: Sam Li 
> > Reviewed-by: Hannes Reinecke 
> > ---
> >  block/block-backend.c |  50 +
> >  block/file-posix.c| 341 +-
> >  block/io.c|  41 
> >  include/block/block-common.h  |   1 -
> >  include/block/block-io.h  |  13 ++
> >  include/block/block_int-common.h  |  22 +-
> >  include/block/raw-aio.h   |   6 +-
> >  include/sysemu/block-backend-io.h |   6 +
> >  meson.build   |   1 +
> >  qapi/block-core.json  |   8 +-
> >  qemu-io-cmds.c| 143 +
> >  11 files changed, 625 insertions(+), 7 deletions(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index d4a5df2ac2..fc639b0cd7 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +/*
> > + * Send a zone_report command.
> > + * offset is a byte offset from the start of the device. No alignment
> > + * required for offset.
> > + * nr_zones represents IN maximum and OUT actual.
> > + */
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +unsigned int *nr_zones,
> > +BlockZoneDescriptor *zones)
> > +{
> > +int ret;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk); /* increase before waiting */
> > +blk_wait_while_drained(blk);
> > +if (!blk_is_available(blk)) {
> > +blk_dec_in_flight(blk);
> > +return -ENOMEDIUM;
> > +}
> > +ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> > +/*
> > + * Send a zone_management command.
> > + * offset is the starting zone specified as a sector offset.
> > + * len is the maximum number of sectors the command should operate on.
>
> You should mention that len should be zone size aligned. Also, for 
> completness,
> add a short description of the op argument too ?
>
> > + */
> > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> > +int64_t offset, int64_t len)
> > +{
> > +int ret;
> > +IO_CODE();
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +blk_inc_in_flight(blk);
> > +blk_wait_while_drained(blk);
> > +if (!blk_is_available(blk)) {
> > +blk_dec_in_flight(blk);
> > +return -ENOMEDIUM;
> > +}
> > +ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> >  void blk_drain(BlockBackend *blk)
> >  {
> >  BlockDriverState *bs = blk_bs(blk);
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 727389488c..29f67082d9 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -67,6 +67,9 @@
> >  #include 
> >  #include 
> >  #include 
> > +#if defined(CONFIG_BLKZONED)
> > +#include 
> > +#endif
> >  #include 
> >  #include 
> >  #include 
> > @@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
> >  PreallocMode prealloc;
> >  Error **errp;
> >  } truncate;
> > +struct {
> > +unsigned int *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +struct {
> > +unsigned long ioctl_op;
>
> May be clarify this field usage by calling it zone_op ?
>
> > +} zone_mgmt;
> >  };
> >  } RawPosixAIOData;
> >
> > @@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> > Error **errp)
> >  #endif
> >
> >  if (bs->sg || S_ISBLK(st.st_mode)) {
> > -int ret = hdev_get_max_hw_transfer(s->fd, &st);
> > +ret = hdev_get_max_hw_transfer(s->fd, &st);
> >
> >  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> >  bs->bl.max_hw_transfer = ret;
> > @@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState 
> > *bs, Error **errp)
> >  }
> >  }
> >
> > -ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
> > +ret = get_sysfs_zoned_model(&st, &zoned);
> >  if (ret < 0) {
> >  zoned = BLK_Z_NONE;
> >  }
> >  bs->bl.zoned = zoned;
> > 

Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-24 Thread Damien Le Moal
On 2022/08/24 16:46, Damien Le Moal wrote:
> On 2022/08/22 21:12, Sam Li wrote:
>> Stefan Hajnoczi  于2022年8月23日周二 08:49写道:
>>>
>>> On Tue, Aug 16, 2022 at 02:25:18PM +0800, Sam Li wrote:
[...] +blkz = (struct blk_zone *)(rep + 1);
 +while (n < nrz) {
 +memset(rep, 0, rep_size);
 +rep->sector = sector;
 +rep->nr_zones = nrz - n;
 +
 +ret = ioctl(fd, BLKREPORTZONE, rep);
>>>
>>> Does this ioctl() need "do { ... } while (ret == -1 && errno == EINTR)"?
>>
>> No? We discussed this before. I guess even EINTR should be propagated
>> back to the guest. Maybe Damien can talk more about why.
> 
> In the kernel, completion of zone management IO requests are waited for using
> wait_for_completion_io() which uses TASK_UNINTERRUPTIBLE. So a signal will not
> abort anything. So I do not think that the do { } while() loop is necessary.

Note: I do not think the loop to be necessary, but it will not hurt :)

-- 
Damien Le Moal
Western Digital Research



Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-24 Thread Damien Le Moal
On 2022/08/22 21:12, Sam Li wrote:
> Stefan Hajnoczi  于2022年8月23日周二 08:49写道:
>>
>> On Tue, Aug 16, 2022 at 02:25:18PM +0800, Sam Li wrote:
>>> By adding zone management operations in BlockDriver, storage controller
>>> emulation can use the new block layer APIs including Report Zone and
>>> four zone management operations (open, close, finish, reset).
>>>
>>> Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
>>> zone_close(zc), zone_reset(zrs), zone_finish(zf).
>>>
>>> For example, to test zone_report, use following command:
>>> $ ./build/qemu-io --image-opts driver=zoned_host_device, 
>>> filename=/dev/nullb0
>>> -c "zrp offset nr_zones"
>>>
>>> Signed-off-by: Sam Li 
>>> Reviewed-by: Hannes Reinecke 
>>> ---
>>>  block/block-backend.c |  50 +
>>>  block/file-posix.c| 341 +-
>>>  block/io.c|  41 
>>>  include/block/block-common.h  |   1 -
>>>  include/block/block-io.h  |  13 ++
>>>  include/block/block_int-common.h  |  22 +-
>>>  include/block/raw-aio.h   |   6 +-
>>>  include/sysemu/block-backend-io.h |   6 +
>>>  meson.build   |   1 +
>>>  qapi/block-core.json  |   8 +-
>>>  qemu-io-cmds.c| 143 +
>>>  11 files changed, 625 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/block/block-backend.c b/block/block-backend.c
>>> index d4a5df2ac2..fc639b0cd7 100644
>>> --- a/block/block-backend.c
>>> +++ b/block/block-backend.c
>>> @@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>>>  return ret;
>>>  }
>>>
>>> +/*
>>> + * Send a zone_report command.
>>> + * offset is a byte offset from the start of the device. No alignment
>>> + * required for offset.
>>> + * nr_zones represents IN maximum and OUT actual.
>>> + */
>>> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
>>> +unsigned int *nr_zones,
>>> +BlockZoneDescriptor *zones)
>>> +{
>>> +int ret;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk); /* increase before waiting */
>>> +blk_wait_while_drained(blk);
>>> +if (!blk_is_available(blk)) {
>>> +blk_dec_in_flight(blk);
>>> +return -ENOMEDIUM;
>>> +}
>>> +ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
>>> +blk_dec_in_flight(blk);
>>> +return ret;
>>> +}
>>> +
>>> +/*
>>> + * Send a zone_management command.
>>> + * offset is the starting zone specified as a sector offset.
>>> + * len is the maximum number of sectors the command should operate on.
>>> + */
>>> +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
>>> +int64_t offset, int64_t len)
>>> +{
>>> +int ret;
>>> +IO_CODE();
>>> +
>>> +ret = blk_check_byte_request(blk, offset, len);
>>> +if (ret < 0) {
>>> +return ret;
>>> +}
>>
>> blk_check_byte_request() calls blk_is_available() and returns -ENOMEDIUM
>> when it fails. You can therefore move this down and replace "if
>> (!blk_is_available(blk)) {".
>>
>>> +blk_inc_in_flight(blk);
>>> +blk_wait_while_drained(blk);
>>> +if (!blk_is_available(blk)) {
>>> +blk_dec_in_flight(blk);
>>> +return -ENOMEDIUM;
>>> +}
>>> +ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
>>> +blk_dec_in_flight(blk);
>>> +return ret;
>>> +}
>>> +
>>>  void blk_drain(BlockBackend *blk)
>>>  {
>>>  BlockDriverState *bs = blk_bs(blk);
>>> diff --git a/block/file-posix.c b/block/file-posix.c
>>> index 727389488c..29f67082d9 100644
>>> --- a/block/file-posix.c
>>> +++ b/block/file-posix.c
>>> @@ -67,6 +67,9 @@
>>>  #include 
>>>  #include 
>>>  #include 
>>> +#if defined(CONFIG_BLKZONED)
>>> +#include 
>>> +#endif
>>>  #include 
>>>  #include 
>>>  #include 
>>> @@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
>>>  PreallocMode prealloc;
>>>  Error **errp;
>>>  } truncate;
>>> +struct {
>>> +unsigned int *nr_zones;
>>> +BlockZoneDescriptor *zones;
>>> +} zone_report;
>>> +struct {
>>> +unsigned long ioctl_op;
>>> +} zone_mgmt;
>>>  };
>>>  } RawPosixAIOData;
>>>
>>> @@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
>>> Error **errp)
>>>  #endif
>>>
>>>  if (bs->sg || S_ISBLK(st.st_mode)) {
>>> -int ret = hdev_get_max_hw_transfer(s->fd, &st);
>>> +ret = hdev_get_max_hw_transfer(s->fd, &st);
>>>
>>>  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
>>>  bs->bl.max_hw_transfer = ret;
>>> @@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState 
>>> *bs, Error **errp)
>>>  }
>>>  }
>>>
>>> -ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
>>> +ret = get_sysfs_zoned_model(&st, &zoned);
>>>  if (ret < 0) {
>>>  zoned = BLK_Z_NONE;
>>>   

Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-23 Thread Stefan Hajnoczi
On Tue, Aug 23, 2022 at 12:12:44PM +0800, Sam Li wrote:
> Stefan Hajnoczi  于2022年8月23日周二 08:49写道:
> > On Tue, Aug 16, 2022 at 02:25:18PM +0800, Sam Li wrote:
> > > +static int handle_aiocb_zone_report(void *opaque) {
> > > +#if defined(CONFIG_BLKZONED)
> > > +RawPosixAIOData *aiocb = opaque;
> > > +int fd = aiocb->aio_fildes;
> > > +unsigned int *nr_zones = aiocb->zone_report.nr_zones;
> > > +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> > > +int64_t sector = aiocb->aio_offset;
> > > +
> > > +struct blk_zone *blkz;
> > > +int64_t rep_size;
> > > +unsigned int nrz;
> > > +int ret, n = 0, i = 0;
> > > +
> > > +nrz = *nr_zones;
> > > +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> > > blk_zone);
> > > +g_autofree struct blk_zone_report *rep = NULL;
> > > +rep = g_malloc(rep_size);
> > > +
> > > +blkz = (struct blk_zone *)(rep + 1);
> > > +while (n < nrz) {
> > > +memset(rep, 0, rep_size);
> > > +rep->sector = sector;
> > > +rep->nr_zones = nrz - n;
> > > +
> > > +ret = ioctl(fd, BLKREPORTZONE, rep);
> >
> > Does this ioctl() need "do { ... } while (ret == -1 && errno == EINTR)"?
> 
> No? We discussed this before. I guess even EINTR should be propagated
> back to the guest. Maybe Damien can talk more about why.

No, EINTR is an internal error that must be handled by QEMU. It means
the QEMU process' syscall was interrupted by a signal and the syscall
must be retried. The guest shouldn't see EINTR (and there is no
virtio-blk error code defined for it).


signature.asc
Description: PGP signature


Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-22 Thread Sam Li
Stefan Hajnoczi  于2022年8月23日周二 08:49写道:
>
> On Tue, Aug 16, 2022 at 02:25:18PM +0800, Sam Li wrote:
> > By adding zone management operations in BlockDriver, storage controller
> > emulation can use the new block layer APIs including Report Zone and
> > four zone management operations (open, close, finish, reset).
> >
> > Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
> > zone_close(zc), zone_reset(zrs), zone_finish(zf).
> >
> > For example, to test zone_report, use following command:
> > $ ./build/qemu-io --image-opts driver=zoned_host_device, 
> > filename=/dev/nullb0
> > -c "zrp offset nr_zones"
> >
> > Signed-off-by: Sam Li 
> > Reviewed-by: Hannes Reinecke 
> > ---
> >  block/block-backend.c |  50 +
> >  block/file-posix.c| 341 +-
> >  block/io.c|  41 
> >  include/block/block-common.h  |   1 -
> >  include/block/block-io.h  |  13 ++
> >  include/block/block_int-common.h  |  22 +-
> >  include/block/raw-aio.h   |   6 +-
> >  include/sysemu/block-backend-io.h |   6 +
> >  meson.build   |   1 +
> >  qapi/block-core.json  |   8 +-
> >  qemu-io-cmds.c| 143 +
> >  11 files changed, 625 insertions(+), 7 deletions(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index d4a5df2ac2..fc639b0cd7 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +/*
> > + * Send a zone_report command.
> > + * offset is a byte offset from the start of the device. No alignment
> > + * required for offset.
> > + * nr_zones represents IN maximum and OUT actual.
> > + */
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +unsigned int *nr_zones,
> > +BlockZoneDescriptor *zones)
> > +{
> > +int ret;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk); /* increase before waiting */
> > +blk_wait_while_drained(blk);
> > +if (!blk_is_available(blk)) {
> > +blk_dec_in_flight(blk);
> > +return -ENOMEDIUM;
> > +}
> > +ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> > +/*
> > + * Send a zone_management command.
> > + * offset is the starting zone specified as a sector offset.
> > + * len is the maximum number of sectors the command should operate on.
> > + */
> > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> > +int64_t offset, int64_t len)
> > +{
> > +int ret;
> > +IO_CODE();
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
>
> blk_check_byte_request() calls blk_is_available() and returns -ENOMEDIUM
> when it fails. You can therefore move this down and replace "if
> (!blk_is_available(blk)) {".
>
> > +blk_inc_in_flight(blk);
> > +blk_wait_while_drained(blk);
> > +if (!blk_is_available(blk)) {
> > +blk_dec_in_flight(blk);
> > +return -ENOMEDIUM;
> > +}
> > +ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> >  void blk_drain(BlockBackend *blk)
> >  {
> >  BlockDriverState *bs = blk_bs(blk);
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 727389488c..29f67082d9 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -67,6 +67,9 @@
> >  #include 
> >  #include 
> >  #include 
> > +#if defined(CONFIG_BLKZONED)
> > +#include 
> > +#endif
> >  #include 
> >  #include 
> >  #include 
> > @@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
> >  PreallocMode prealloc;
> >  Error **errp;
> >  } truncate;
> > +struct {
> > +unsigned int *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +struct {
> > +unsigned long ioctl_op;
> > +} zone_mgmt;
> >  };
> >  } RawPosixAIOData;
> >
> > @@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> > Error **errp)
> >  #endif
> >
> >  if (bs->sg || S_ISBLK(st.st_mode)) {
> > -int ret = hdev_get_max_hw_transfer(s->fd, &st);
> > +ret = hdev_get_max_hw_transfer(s->fd, &st);
> >
> >  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> >  bs->bl.max_hw_transfer = ret;
> > @@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState 
> > *bs, Error **errp)
> >  }
> >  }
> >
> > -ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
> > +ret = get_sysfs_zoned_model(&st, &zoned);
> >  if (ret < 0) {
> >  zoned = BLK_Z_NONE;
> >  }
> >  bs->bl.zoned = zoned;
> > +   

Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-22 Thread Stefan Hajnoczi
On Tue, Aug 16, 2022 at 02:25:18PM +0800, Sam Li wrote:
> By adding zone management operations in BlockDriver, storage controller
> emulation can use the new block layer APIs including Report Zone and
> four zone management operations (open, close, finish, reset).
> 
> Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
> zone_close(zc), zone_reset(zrs), zone_finish(zf).
> 
> For example, to test zone_report, use following command:
> $ ./build/qemu-io --image-opts driver=zoned_host_device, filename=/dev/nullb0
> -c "zrp offset nr_zones"
> 
> Signed-off-by: Sam Li 
> Reviewed-by: Hannes Reinecke 
> ---
>  block/block-backend.c |  50 +
>  block/file-posix.c| 341 +-
>  block/io.c|  41 
>  include/block/block-common.h  |   1 -
>  include/block/block-io.h  |  13 ++
>  include/block/block_int-common.h  |  22 +-
>  include/block/raw-aio.h   |   6 +-
>  include/sysemu/block-backend-io.h |   6 +
>  meson.build   |   1 +
>  qapi/block-core.json  |   8 +-
>  qemu-io-cmds.c| 143 +
>  11 files changed, 625 insertions(+), 7 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index d4a5df2ac2..fc639b0cd7 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>  return ret;
>  }
>  
> +/*
> + * Send a zone_report command.
> + * offset is a byte offset from the start of the device. No alignment
> + * required for offset.
> + * nr_zones represents IN maximum and OUT actual.
> + */
> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> +unsigned int *nr_zones,
> +BlockZoneDescriptor *zones)
> +{
> +int ret;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk); /* increase before waiting */
> +blk_wait_while_drained(blk);
> +if (!blk_is_available(blk)) {
> +blk_dec_in_flight(blk);
> +return -ENOMEDIUM;
> +}
> +ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
> +/*
> + * Send a zone_management command.
> + * offset is the starting zone specified as a sector offset.
> + * len is the maximum number of sectors the command should operate on.
> + */
> +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> +int64_t offset, int64_t len)
> +{
> +int ret;
> +IO_CODE();
> +
> +ret = blk_check_byte_request(blk, offset, len);
> +if (ret < 0) {
> +return ret;
> +}

blk_check_byte_request() calls blk_is_available() and returns -ENOMEDIUM
when it fails. You can therefore move this down and replace "if
(!blk_is_available(blk)) {".

> +blk_inc_in_flight(blk);
> +blk_wait_while_drained(blk);
> +if (!blk_is_available(blk)) {
> +blk_dec_in_flight(blk);
> +return -ENOMEDIUM;
> +}
> +ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
>  void blk_drain(BlockBackend *blk)
>  {
>  BlockDriverState *bs = blk_bs(blk);
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 727389488c..29f67082d9 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -67,6 +67,9 @@
>  #include 
>  #include 
>  #include 
> +#if defined(CONFIG_BLKZONED)
> +#include 
> +#endif
>  #include 
>  #include 
>  #include 
> @@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
>  PreallocMode prealloc;
>  Error **errp;
>  } truncate;
> +struct {
> +unsigned int *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +struct {
> +unsigned long ioctl_op;
> +} zone_mgmt;
>  };
>  } RawPosixAIOData;
>  
> @@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  #endif
>  
>  if (bs->sg || S_ISBLK(st.st_mode)) {
> -int ret = hdev_get_max_hw_transfer(s->fd, &st);
> +ret = hdev_get_max_hw_transfer(s->fd, &st);
>  
>  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
>  bs->bl.max_hw_transfer = ret;
> @@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  }
>  }
>  
> -ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
> +ret = get_sysfs_zoned_model(&st, &zoned);
>  if (ret < 0) {
>  zoned = BLK_Z_NONE;
>  }
>  bs->bl.zoned = zoned;
> +if (zoned != BLK_Z_NONE) {
> +ret = get_sysfs_long_val(&st, "chunk_sectors");
> +if (ret > 0) {
> +bs->bl.zone_sectors = ret;
> +}
> +
> +ret = get_sysfs_long_val(&st, "zone_append_max_bytes");
> +if (ret > 0) {
> +bs->bl.zone_append_max_bytes = ret;
> +

Re: [PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-16 Thread Damien Le Moal
On 2022/08/15 23:25, Sam Li wrote:
> By adding zone management operations in BlockDriver, storage controller
> emulation can use the new block layer APIs including Report Zone and
> four zone management operations (open, close, finish, reset).
> 
> Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
> zone_close(zc), zone_reset(zrs), zone_finish(zf).
> 
> For example, to test zone_report, use following command:
> $ ./build/qemu-io --image-opts driver=zoned_host_device, filename=/dev/nullb0
> -c "zrp offset nr_zones"
> 
> Signed-off-by: Sam Li 
> Reviewed-by: Hannes Reinecke 
> ---
>  block/block-backend.c |  50 +
>  block/file-posix.c| 341 +-
>  block/io.c|  41 
>  include/block/block-common.h  |   1 -
>  include/block/block-io.h  |  13 ++
>  include/block/block_int-common.h  |  22 +-
>  include/block/raw-aio.h   |   6 +-
>  include/sysemu/block-backend-io.h |   6 +
>  meson.build   |   1 +
>  qapi/block-core.json  |   8 +-
>  qemu-io-cmds.c| 143 +
>  11 files changed, 625 insertions(+), 7 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index d4a5df2ac2..fc639b0cd7 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>  return ret;
>  }
>  
> +/*
> + * Send a zone_report command.
> + * offset is a byte offset from the start of the device. No alignment
> + * required for offset.
> + * nr_zones represents IN maximum and OUT actual.
> + */
> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> +unsigned int *nr_zones,
> +BlockZoneDescriptor *zones)
> +{
> +int ret;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk); /* increase before waiting */
> +blk_wait_while_drained(blk);
> +if (!blk_is_available(blk)) {
> +blk_dec_in_flight(blk);
> +return -ENOMEDIUM;
> +}
> +ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
> +/*
> + * Send a zone_management command.
> + * offset is the starting zone specified as a sector offset.
> + * len is the maximum number of sectors the command should operate on.

You should mention that len should be zone size aligned. Also, for completness,
add a short description of the op argument too ?

> + */
> +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> +int64_t offset, int64_t len)
> +{
> +int ret;
> +IO_CODE();
> +
> +ret = blk_check_byte_request(blk, offset, len);
> +if (ret < 0) {
> +return ret;
> +}
> +blk_inc_in_flight(blk);
> +blk_wait_while_drained(blk);
> +if (!blk_is_available(blk)) {
> +blk_dec_in_flight(blk);
> +return -ENOMEDIUM;
> +}
> +ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
>  void blk_drain(BlockBackend *blk)
>  {
>  BlockDriverState *bs = blk_bs(blk);
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 727389488c..29f67082d9 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -67,6 +67,9 @@
>  #include 
>  #include 
>  #include 
> +#if defined(CONFIG_BLKZONED)
> +#include 
> +#endif
>  #include 
>  #include 
>  #include 
> @@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
>  PreallocMode prealloc;
>  Error **errp;
>  } truncate;
> +struct {
> +unsigned int *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +struct {
> +unsigned long ioctl_op;

May be clarify this field usage by calling it zone_op ?

> +} zone_mgmt;
>  };
>  } RawPosixAIOData;
>  
> @@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  #endif
>  
>  if (bs->sg || S_ISBLK(st.st_mode)) {
> -int ret = hdev_get_max_hw_transfer(s->fd, &st);
> +ret = hdev_get_max_hw_transfer(s->fd, &st);
>  
>  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
>  bs->bl.max_hw_transfer = ret;
> @@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  }
>  }
>  
> -ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
> +ret = get_sysfs_zoned_model(&st, &zoned);
>  if (ret < 0) {
>  zoned = BLK_Z_NONE;
>  }
>  bs->bl.zoned = zoned;
> +if (zoned != BLK_Z_NONE) {
> +ret = get_sysfs_long_val(&st, "chunk_sectors");
> +if (ret > 0) {
> +bs->bl.zone_sectors = ret;
> +}
> +
> +ret = get_sysfs_long_val(&st, "zone_append_max_bytes");
> +if (ret > 0) {
> +bs->bl.zone_append_max_bytes = ret;
> +

[PATCH v7 4/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-08-15 Thread Sam Li
By adding zone management operations in BlockDriver, storage controller
emulation can use the new block layer APIs including Report Zone and
four zone management operations (open, close, finish, reset).

Add zoned storage commands of the device: zone_report(zrp), zone_open(zo),
zone_close(zc), zone_reset(zrs), zone_finish(zf).

For example, to test zone_report, use following command:
$ ./build/qemu-io --image-opts driver=zoned_host_device, filename=/dev/nullb0
-c "zrp offset nr_zones"

Signed-off-by: Sam Li 
Reviewed-by: Hannes Reinecke 
---
 block/block-backend.c |  50 +
 block/file-posix.c| 341 +-
 block/io.c|  41 
 include/block/block-common.h  |   1 -
 include/block/block-io.h  |  13 ++
 include/block/block_int-common.h  |  22 +-
 include/block/raw-aio.h   |   6 +-
 include/sysemu/block-backend-io.h |   6 +
 meson.build   |   1 +
 qapi/block-core.json  |   8 +-
 qemu-io-cmds.c| 143 +
 11 files changed, 625 insertions(+), 7 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index d4a5df2ac2..fc639b0cd7 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1775,6 +1775,56 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
 return ret;
 }
 
+/*
+ * Send a zone_report command.
+ * offset is a byte offset from the start of the device. No alignment
+ * required for offset.
+ * nr_zones represents IN maximum and OUT actual.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+unsigned int *nr_zones,
+BlockZoneDescriptor *zones)
+{
+int ret;
+IO_CODE();
+
+blk_inc_in_flight(blk); /* increase before waiting */
+blk_wait_while_drained(blk);
+if (!blk_is_available(blk)) {
+blk_dec_in_flight(blk);
+return -ENOMEDIUM;
+}
+ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
+blk_dec_in_flight(blk);
+return ret;
+}
+
+/*
+ * Send a zone_management command.
+ * offset is the starting zone specified as a sector offset.
+ * len is the maximum number of sectors the command should operate on.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+int64_t offset, int64_t len)
+{
+int ret;
+IO_CODE();
+
+ret = blk_check_byte_request(blk, offset, len);
+if (ret < 0) {
+return ret;
+}
+blk_inc_in_flight(blk);
+blk_wait_while_drained(blk);
+if (!blk_is_available(blk)) {
+blk_dec_in_flight(blk);
+return -ENOMEDIUM;
+}
+ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
+blk_dec_in_flight(blk);
+return ret;
+}
+
 void blk_drain(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
diff --git a/block/file-posix.c b/block/file-posix.c
index 727389488c..29f67082d9 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -67,6 +67,9 @@
 #include 
 #include 
 #include 
+#if defined(CONFIG_BLKZONED)
+#include 
+#endif
 #include 
 #include 
 #include 
@@ -216,6 +219,13 @@ typedef struct RawPosixAIOData {
 PreallocMode prealloc;
 Error **errp;
 } truncate;
+struct {
+unsigned int *nr_zones;
+BlockZoneDescriptor *zones;
+} zone_report;
+struct {
+unsigned long ioctl_op;
+} zone_mgmt;
 };
 } RawPosixAIOData;
 
@@ -1328,7 +1338,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 #endif
 
 if (bs->sg || S_ISBLK(st.st_mode)) {
-int ret = hdev_get_max_hw_transfer(s->fd, &st);
+ret = hdev_get_max_hw_transfer(s->fd, &st);
 
 if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
 bs->bl.max_hw_transfer = ret;
@@ -1340,11 +1350,32 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 }
 }
 
-ret = get_sysfs_zoned_model(s->fd, &st, &zoned);
+ret = get_sysfs_zoned_model(&st, &zoned);
 if (ret < 0) {
 zoned = BLK_Z_NONE;
 }
 bs->bl.zoned = zoned;
+if (zoned != BLK_Z_NONE) {
+ret = get_sysfs_long_val(&st, "chunk_sectors");
+if (ret > 0) {
+bs->bl.zone_sectors = ret;
+}
+
+ret = get_sysfs_long_val(&st, "zone_append_max_bytes");
+if (ret > 0) {
+bs->bl.zone_append_max_bytes = ret;
+}
+
+ret = get_sysfs_long_val(&st, "max_open_zones");
+if (ret > 0) {
+bs->bl.max_open_zones = ret;
+}
+
+ret = get_sysfs_long_val(&st, "max_active_zones");
+if (ret > 0) {
+bs->bl.max_active_zones = ret;
+}
+}
 }
 
 static int check_for_dasd(int fd)
@@ -1839,6 +1870,134 @@ static off_t copy_file_range(int in_fd, off_t *in_off, 
int out_fd,
 }
 #endif
 
+/*
+ * parse_zone - Fill a zone descriptor
+ */
+#if defined(CONFIG_BLKZONED)