Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Sam Li
Damien Le Moal  于2022年6月29日周三 10:32写道:
>
> On 6/29/22 10:50, Sam Li wrote:
> >>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> >>> blk_zone);
> >>> +g_autofree struct blk_zone_report *rep = g_new(struct 
> >>> blk_zone_report, nrz);
> >>
> >> g_new() looks incorrect. There should be 1 struct blk_zone_report
> >> followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
> >> instead.
> >
> > Yes! However, it still has a memory leak error when using g_autofree
> > && g_malloc.
> 
>  That may be because you are changing the value of the rep pointer while
>  parsing the report ?
> >>>
> >>> I am not sure it is the case. Can you show me some way to find the 
> >>> problem?
> >>
> >> Not sure. I never used this g_malloc()/g_autofree() before so not sure how
> >> it works. It may be that g_autofree() work only with g_new() ?
> >> Could you try separating the declaration and allocation ? e.g.
> >>
> >> Declare at the beginning of the function:
> >> g_autofree struct blk_zone_report *rep = NULL;
> >>
> >> And then when needed do:
> >>
> >> rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
> >> rep = g_malloc(rep_size);
> >
> > Actually, the memory leak occurs in that way. When using zone_mgmt,
> > memory leak still occurs. Asan gives the error information not much so
> > I haven't tracked down the problem yet.
>
> See this:
>
> https://blog.fishsoup.net/2015/11/05/attributecleanup-mixed-declarations-and-code-and-goto/
>
> Maybe you can find some hints.

Thanks!

>
> --
> Damien Le Moal
> Western Digital Research



Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Damien Le Moal
On 6/29/22 10:50, Sam Li wrote:
>>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
>>> blk_zone);
>>> +g_autofree struct blk_zone_report *rep = g_new(struct 
>>> blk_zone_report, nrz);
>>
>> g_new() looks incorrect. There should be 1 struct blk_zone_report
>> followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
>> instead.
>
> Yes! However, it still has a memory leak error when using g_autofree
> && g_malloc.

 That may be because you are changing the value of the rep pointer while
 parsing the report ?
>>>
>>> I am not sure it is the case. Can you show me some way to find the problem?
>>
>> Not sure. I never used this g_malloc()/g_autofree() before so not sure how
>> it works. It may be that g_autofree() work only with g_new() ?
>> Could you try separating the declaration and allocation ? e.g.
>>
>> Declare at the beginning of the function:
>> g_autofree struct blk_zone_report *rep = NULL;
>>
>> And then when needed do:
>>
>> rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
>> rep = g_malloc(rep_size);
> 
> Actually, the memory leak occurs in that way. When using zone_mgmt,
> memory leak still occurs. Asan gives the error information not much so
> I haven't tracked down the problem yet.

See this:

https://blog.fishsoup.net/2015/11/05/attributecleanup-mixed-declarations-and-code-and-goto/

Maybe you can find some hints.

-- 
Damien Le Moal
Western Digital Research



Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Sam Li
Damien Le Moal  于2022年6月29日周三 09:43写道:
>
> On 6/28/22 19:23, Sam Li wrote:
> > Damien Le Moal  于2022年6月28日周二 17:05写道:
> >>
> >> On 6/28/22 17:05, Sam Li wrote:
> >>> Stefan Hajnoczi  于2022年6月28日周二 14:52写道:
> 
>  On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index e0e1aff4b1..786f964d02 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +/*
> > + * Return zone_report from BlockDriver. Offset can be any number within
> > + * the zone size. No alignment for offset and len.
> 
>  What is the purpose of len? Is it the maximum number of zones to return
>  in nr_zones[]?
> >>>
> >>> len is actually not used in bdrv_co_zone_report. It is needed by
> >>> blk_check_byte_request.
> >>
> >> There is no IO buffer being passed. Only an array of zone descriptors and
> >> an in-out number of zones. len is definitely not needed. Not sure what
> >> blk_check_byte_request() is intended to check though.
> >
> > Can I just remove len argument and blk_check_byte_request() if it's not 
> > used?
>
> If it is unused, yes, you must remove it.
>
> >
> >>
> >>>
>  How does the caller know how many zones were returned?
> >>>
> >>> nr_zones represents IN maximum and OUT actual. The caller will know by
> >>> nr_zones which is changed in bdrv_co_zone_report. I'll add it in the
> >>> comments.
> >>>
> 
> > + */
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +   int64_t len, int64_t *nr_zones,
> > +   BlockZoneDescriptor *zones)
> > +{
> > +int ret;
> > +BlockDriverState *bs;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk); /* increase before waiting */
> > +blk_wait_while_drained(blk);
> > +bs = blk_bs(blk);
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +
> > +bdrv_inc_in_flight(bs);
> 
>  The bdrv_inc/dec_in_flight() call should be inside
>  bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.
> 
> > +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> > +  nr_zones, zones);
> > +bdrv_dec_in_flight(bs);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> > +/*
> > + * Return zone_mgmt from BlockDriver.
> 
>  Maybe this should be:
> 
>    Send a zone management command.
> >>>
> >>> Yes, it's more accurate.
> >>>
> 
> > @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
> >  PreallocMode prealloc;
> >  Error **errp;
> >  } truncate;
> > +struct {
> > +int64_t *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +zone_op op;
> 
>  It's cleaner to put op inside a struct zone_mgmt so its purpose is
>  self-explanatory:
> 
>    struct {
>    zone_op op;
>    } zone_mgmt;
> 
> > +static int handle_aiocb_zone_report(void *opaque) {
> > +RawPosixAIOData *aiocb = opaque;
> > +int fd = aiocb->aio_fildes;
> > +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> > +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> > +int64_t offset = aiocb->aio_offset;
> > +int64_t len = aiocb->aio_nbytes;
> >>
> >> Since you have the zone array and number of zones (size of that array) I
> >> really do not see why you need len.
> >>
> > +
> > +struct blk_zone *blkz;
> > +int64_t rep_size, nrz;
> > +int ret, n = 0, i = 0;
> > +
> > +nrz = *nr_zones;
> > +if (len == -1) {
> > +return -errno;
> 
>  Where is errno set? Should this be an errno constant instead like
>  -EINVAL?
> >>>
> >>> That's right! Noted.
> >>>
> 
> > +}
> > +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> > blk_zone);
> > +g_autofree struct blk_zone_report *rep = g_new(struct 
> > blk_zone_report, nrz);
> 
>  g_new() looks incorrect. There should be 1 struct blk_zone_report
>  followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
>  instead.
> >>>
> >>> Yes! However, it still has a memory leak error when using g_autofree
> >>> && g_malloc.
> >>
> >> That may be because you are changing the value of the rep pointer while
> >> parsing the report ?
> >
> > I am not sure it is the case. Can you show me some way to find the problem?
>
> Not sure. I never used this g_malloc()/g_autofree() before so not sure how
> it works. It may be that g_autofree() work only with g_new() ?
> 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Damien Le Moal
On 6/28/22 19:23, Sam Li wrote:
> Damien Le Moal  于2022年6月28日周二 17:05写道:
>>
>> On 6/28/22 17:05, Sam Li wrote:
>>> Stefan Hajnoczi  于2022年6月28日周二 14:52写道:

 On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> diff --git a/block/block-backend.c b/block/block-backend.c
> index e0e1aff4b1..786f964d02 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
>  return ret;
>  }
>
> +/*
> + * Return zone_report from BlockDriver. Offset can be any number within
> + * the zone size. No alignment for offset and len.

 What is the purpose of len? Is it the maximum number of zones to return
 in nr_zones[]?
>>>
>>> len is actually not used in bdrv_co_zone_report. It is needed by
>>> blk_check_byte_request.
>>
>> There is no IO buffer being passed. Only an array of zone descriptors and
>> an in-out number of zones. len is definitely not needed. Not sure what
>> blk_check_byte_request() is intended to check though.
> 
> Can I just remove len argument and blk_check_byte_request() if it's not used?

If it is unused, yes, you must remove it.

> 
>>
>>>
 How does the caller know how many zones were returned?
>>>
>>> nr_zones represents IN maximum and OUT actual. The caller will know by
>>> nr_zones which is changed in bdrv_co_zone_report. I'll add it in the
>>> comments.
>>>

> + */
> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> +   int64_t len, int64_t *nr_zones,
> +   BlockZoneDescriptor *zones)
> +{
> +int ret;
> +BlockDriverState *bs;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk); /* increase before waiting */
> +blk_wait_while_drained(blk);
> +bs = blk_bs(blk);
> +
> +ret = blk_check_byte_request(blk, offset, len);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +bdrv_inc_in_flight(bs);

 The bdrv_inc/dec_in_flight() call should be inside
 bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.

> +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> +  nr_zones, zones);
> +bdrv_dec_in_flight(bs);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
> +/*
> + * Return zone_mgmt from BlockDriver.

 Maybe this should be:

   Send a zone management command.
>>>
>>> Yes, it's more accurate.
>>>

> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
>  PreallocMode prealloc;
>  Error **errp;
>  } truncate;
> +struct {
> +int64_t *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +zone_op op;

 It's cleaner to put op inside a struct zone_mgmt so its purpose is
 self-explanatory:

   struct {
   zone_op op;
   } zone_mgmt;

> +static int handle_aiocb_zone_report(void *opaque) {
> +RawPosixAIOData *aiocb = opaque;
> +int fd = aiocb->aio_fildes;
> +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> +int64_t offset = aiocb->aio_offset;
> +int64_t len = aiocb->aio_nbytes;
>>
>> Since you have the zone array and number of zones (size of that array) I
>> really do not see why you need len.
>>
> +
> +struct blk_zone *blkz;
> +int64_t rep_size, nrz;
> +int ret, n = 0, i = 0;
> +
> +nrz = *nr_zones;
> +if (len == -1) {
> +return -errno;

 Where is errno set? Should this be an errno constant instead like
 -EINVAL?
>>>
>>> That's right! Noted.
>>>

> +}
> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> blk_zone);
> +g_autofree struct blk_zone_report *rep = g_new(struct 
> blk_zone_report, nrz);

 g_new() looks incorrect. There should be 1 struct blk_zone_report
 followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
 instead.
>>>
>>> Yes! However, it still has a memory leak error when using g_autofree
>>> && g_malloc.
>>
>> That may be because you are changing the value of the rep pointer while
>> parsing the report ?
> 
> I am not sure it is the case. Can you show me some way to find the problem?

Not sure. I never used this g_malloc()/g_autofree() before so not sure how
it works. It may be that g_autofree() work only with g_new() ?
Could you try separating the declaration and allocation ? e.g.

Declare at the beginning of the function:
g_autofree struct blk_zone_report *rep = NULL;

And then when needed do:

rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
rep = g_malloc(rep_size);

> 
> Thanks for reviewing!
> 
> 
>>
>>>

> 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Sam Li
Damien Le Moal  于2022年6月28日周二 17:05写道:
>
> On 6/28/22 17:05, Sam Li wrote:
> > Stefan Hajnoczi  于2022年6月28日周二 14:52写道:
> >>
> >> On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> >>> diff --git a/block/block-backend.c b/block/block-backend.c
> >>> index e0e1aff4b1..786f964d02 100644
> >>> --- a/block/block-backend.c
> >>> +++ b/block/block-backend.c
> >>> @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
> >>>  return ret;
> >>>  }
> >>>
> >>> +/*
> >>> + * Return zone_report from BlockDriver. Offset can be any number within
> >>> + * the zone size. No alignment for offset and len.
> >>
> >> What is the purpose of len? Is it the maximum number of zones to return
> >> in nr_zones[]?
> >
> > len is actually not used in bdrv_co_zone_report. It is needed by
> > blk_check_byte_request.
>
> There is no IO buffer being passed. Only an array of zone descriptors and
> an in-out number of zones. len is definitely not needed. Not sure what
> blk_check_byte_request() is intended to check though.

Can I just remove len argument and blk_check_byte_request() if it's not used?

>
> >
> >> How does the caller know how many zones were returned?
> >
> > nr_zones represents IN maximum and OUT actual. The caller will know by
> > nr_zones which is changed in bdrv_co_zone_report. I'll add it in the
> > comments.
> >
> >>
> >>> + */
> >>> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> >>> +   int64_t len, int64_t *nr_zones,
> >>> +   BlockZoneDescriptor *zones)
> >>> +{
> >>> +int ret;
> >>> +BlockDriverState *bs;
> >>> +IO_CODE();
> >>> +
> >>> +blk_inc_in_flight(blk); /* increase before waiting */
> >>> +blk_wait_while_drained(blk);
> >>> +bs = blk_bs(blk);
> >>> +
> >>> +ret = blk_check_byte_request(blk, offset, len);
> >>> +if (ret < 0) {
> >>> +return ret;
> >>> +}
> >>> +
> >>> +bdrv_inc_in_flight(bs);
> >>
> >> The bdrv_inc/dec_in_flight() call should be inside
> >> bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.
> >>
> >>> +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> >>> +  nr_zones, zones);
> >>> +bdrv_dec_in_flight(bs);
> >>> +blk_dec_in_flight(blk);
> >>> +return ret;
> >>> +}
> >>> +
> >>> +/*
> >>> + * Return zone_mgmt from BlockDriver.
> >>
> >> Maybe this should be:
> >>
> >>   Send a zone management command.
> >
> > Yes, it's more accurate.
> >
> >>
> >>> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
> >>>  PreallocMode prealloc;
> >>>  Error **errp;
> >>>  } truncate;
> >>> +struct {
> >>> +int64_t *nr_zones;
> >>> +BlockZoneDescriptor *zones;
> >>> +} zone_report;
> >>> +zone_op op;
> >>
> >> It's cleaner to put op inside a struct zone_mgmt so its purpose is
> >> self-explanatory:
> >>
> >>   struct {
> >>   zone_op op;
> >>   } zone_mgmt;
> >>
> >>> +static int handle_aiocb_zone_report(void *opaque) {
> >>> +RawPosixAIOData *aiocb = opaque;
> >>> +int fd = aiocb->aio_fildes;
> >>> +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> >>> +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> >>> +int64_t offset = aiocb->aio_offset;
> >>> +int64_t len = aiocb->aio_nbytes;
>
> Since you have the zone array and number of zones (size of that array) I
> really do not see why you need len.
>
> >>> +
> >>> +struct blk_zone *blkz;
> >>> +int64_t rep_size, nrz;
> >>> +int ret, n = 0, i = 0;
> >>> +
> >>> +nrz = *nr_zones;
> >>> +if (len == -1) {
> >>> +return -errno;
> >>
> >> Where is errno set? Should this be an errno constant instead like
> >> -EINVAL?
> >
> > That's right! Noted.
> >
> >>
> >>> +}
> >>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> >>> blk_zone);
> >>> +g_autofree struct blk_zone_report *rep = g_new(struct 
> >>> blk_zone_report, nrz);
> >>
> >> g_new() looks incorrect. There should be 1 struct blk_zone_report
> >> followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
> >> instead.
> >
> > Yes! However, it still has a memory leak error when using g_autofree
> > && g_malloc.
>
> That may be because you are changing the value of the rep pointer while
> parsing the report ?

I am not sure it is the case. Can you show me some way to find the problem?

Thanks for reviewing!


>
> >
> >>
> >>> +offset = offset / 512; /* get the unit of the start sector: sector 
> >>> size is 512 bytes. */
> >>> +printf("start to report zone with offset: 0x%lx\n", offset);
> >>> +
> >>> +blkz = (struct blk_zone *)(rep + 1);
> >>> +while (n < nrz) {
> >>> +memset(rep, 0, rep_size);
> >>> +rep->sector = offset;
> >>> +rep->nr_zones = nrz;
> >>
> >> What prevents zones[] overflowing? nrz isn't being reduced in the loop,
> >> so maybe the rep->nr_zones return value will eventually exceed the
> >> 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Damien Le Moal
On 6/28/22 17:05, Sam Li wrote:
> Stefan Hajnoczi  于2022年6月28日周二 14:52写道:
>>
>> On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
>>> diff --git a/block/block-backend.c b/block/block-backend.c
>>> index e0e1aff4b1..786f964d02 100644
>>> --- a/block/block-backend.c
>>> +++ b/block/block-backend.c
>>> @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
>>>  return ret;
>>>  }
>>>
>>> +/*
>>> + * Return zone_report from BlockDriver. Offset can be any number within
>>> + * the zone size. No alignment for offset and len.
>>
>> What is the purpose of len? Is it the maximum number of zones to return
>> in nr_zones[]?
> 
> len is actually not used in bdrv_co_zone_report. It is needed by
> blk_check_byte_request.

There is no IO buffer being passed. Only an array of zone descriptors and
an in-out number of zones. len is definitely not needed. Not sure what
blk_check_byte_request() is intended to check though.

> 
>> How does the caller know how many zones were returned?
> 
> nr_zones represents IN maximum and OUT actual. The caller will know by
> nr_zones which is changed in bdrv_co_zone_report. I'll add it in the
> comments.
> 
>>
>>> + */
>>> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
>>> +   int64_t len, int64_t *nr_zones,
>>> +   BlockZoneDescriptor *zones)
>>> +{
>>> +int ret;
>>> +BlockDriverState *bs;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk); /* increase before waiting */
>>> +blk_wait_while_drained(blk);
>>> +bs = blk_bs(blk);
>>> +
>>> +ret = blk_check_byte_request(blk, offset, len);
>>> +if (ret < 0) {
>>> +return ret;
>>> +}
>>> +
>>> +bdrv_inc_in_flight(bs);
>>
>> The bdrv_inc/dec_in_flight() call should be inside
>> bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.
>>
>>> +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
>>> +  nr_zones, zones);
>>> +bdrv_dec_in_flight(bs);
>>> +blk_dec_in_flight(blk);
>>> +return ret;
>>> +}
>>> +
>>> +/*
>>> + * Return zone_mgmt from BlockDriver.
>>
>> Maybe this should be:
>>
>>   Send a zone management command.
> 
> Yes, it's more accurate.
> 
>>
>>> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
>>>  PreallocMode prealloc;
>>>  Error **errp;
>>>  } truncate;
>>> +struct {
>>> +int64_t *nr_zones;
>>> +BlockZoneDescriptor *zones;
>>> +} zone_report;
>>> +zone_op op;
>>
>> It's cleaner to put op inside a struct zone_mgmt so its purpose is
>> self-explanatory:
>>
>>   struct {
>>   zone_op op;
>>   } zone_mgmt;
>>
>>> +static int handle_aiocb_zone_report(void *opaque) {
>>> +RawPosixAIOData *aiocb = opaque;
>>> +int fd = aiocb->aio_fildes;
>>> +int64_t *nr_zones = aiocb->zone_report.nr_zones;
>>> +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
>>> +int64_t offset = aiocb->aio_offset;
>>> +int64_t len = aiocb->aio_nbytes;

Since you have the zone array and number of zones (size of that array) I
really do not see why you need len.

>>> +
>>> +struct blk_zone *blkz;
>>> +int64_t rep_size, nrz;
>>> +int ret, n = 0, i = 0;
>>> +
>>> +nrz = *nr_zones;
>>> +if (len == -1) {
>>> +return -errno;
>>
>> Where is errno set? Should this be an errno constant instead like
>> -EINVAL?
> 
> That's right! Noted.
> 
>>
>>> +}
>>> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
>>> blk_zone);
>>> +g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, 
>>> nrz);
>>
>> g_new() looks incorrect. There should be 1 struct blk_zone_report
>> followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
>> instead.
> 
> Yes! However, it still has a memory leak error when using g_autofree
> && g_malloc.

That may be because you are changing the value of the rep pointer while
parsing the report ?

> 
>>
>>> +offset = offset / 512; /* get the unit of the start sector: sector 
>>> size is 512 bytes. */
>>> +printf("start to report zone with offset: 0x%lx\n", offset);
>>> +
>>> +blkz = (struct blk_zone *)(rep + 1);
>>> +while (n < nrz) {
>>> +memset(rep, 0, rep_size);
>>> +rep->sector = offset;
>>> +rep->nr_zones = nrz;
>>
>> What prevents zones[] overflowing? nrz isn't being reduced in the loop,
>> so maybe the rep->nr_zones return value will eventually exceed the
>> number of elements still available in zones[n..]?
> 
> I suppose the number of zones[] is restricted in the subsequent for
> loop where zones[] copy one zone at a time as n increases. Even if
> rep->zones exceeds the available room in zones[], the extra zone will
> not be copied.
> 
>>
>>> +static int handle_aiocb_zone_mgmt(void *opaque) {
>>> +RawPosixAIOData *aiocb = opaque;
>>> +int fd = aiocb->aio_fildes;
>>> +int64_t offset = aiocb->aio_offset;
>>> +int64_t len = 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Sam Li
Stefan Hajnoczi  于2022年6月28日周二 14:52写道:
>
> On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index e0e1aff4b1..786f964d02 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +/*
> > + * Return zone_report from BlockDriver. Offset can be any number within
> > + * the zone size. No alignment for offset and len.
>
> What is the purpose of len? Is it the maximum number of zones to return
> in nr_zones[]?

len is actually not used in bdrv_co_zone_report. It is needed by
blk_check_byte_request.

> How does the caller know how many zones were returned?

nr_zones represents IN maximum and OUT actual. The caller will know by
nr_zones which is changed in bdrv_co_zone_report. I'll add it in the
comments.

>
> > + */
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +   int64_t len, int64_t *nr_zones,
> > +   BlockZoneDescriptor *zones)
> > +{
> > +int ret;
> > +BlockDriverState *bs;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk); /* increase before waiting */
> > +blk_wait_while_drained(blk);
> > +bs = blk_bs(blk);
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +
> > +bdrv_inc_in_flight(bs);
>
> The bdrv_inc/dec_in_flight() call should be inside
> bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.
>
> > +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> > +  nr_zones, zones);
> > +bdrv_dec_in_flight(bs);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> > +/*
> > + * Return zone_mgmt from BlockDriver.
>
> Maybe this should be:
>
>   Send a zone management command.

Yes, it's more accurate.

>
> > @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
> >  PreallocMode prealloc;
> >  Error **errp;
> >  } truncate;
> > +struct {
> > +int64_t *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +zone_op op;
>
> It's cleaner to put op inside a struct zone_mgmt so its purpose is
> self-explanatory:
>
>   struct {
>   zone_op op;
>   } zone_mgmt;
>
> > +static int handle_aiocb_zone_report(void *opaque) {
> > +RawPosixAIOData *aiocb = opaque;
> > +int fd = aiocb->aio_fildes;
> > +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> > +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> > +int64_t offset = aiocb->aio_offset;
> > +int64_t len = aiocb->aio_nbytes;
> > +
> > +struct blk_zone *blkz;
> > +int64_t rep_size, nrz;
> > +int ret, n = 0, i = 0;
> > +
> > +nrz = *nr_zones;
> > +if (len == -1) {
> > +return -errno;
>
> Where is errno set? Should this be an errno constant instead like
> -EINVAL?

That's right! Noted.

>
> > +}
> > +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> > blk_zone);
> > +g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, 
> > nrz);
>
> g_new() looks incorrect. There should be 1 struct blk_zone_report
> followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
> instead.

Yes! However, it still has a memory leak error when using g_autofree
&& g_malloc.

>
> > +offset = offset / 512; /* get the unit of the start sector: sector 
> > size is 512 bytes. */
> > +printf("start to report zone with offset: 0x%lx\n", offset);
> > +
> > +blkz = (struct blk_zone *)(rep + 1);
> > +while (n < nrz) {
> > +memset(rep, 0, rep_size);
> > +rep->sector = offset;
> > +rep->nr_zones = nrz;
>
> What prevents zones[] overflowing? nrz isn't being reduced in the loop,
> so maybe the rep->nr_zones return value will eventually exceed the
> number of elements still available in zones[n..]?

I suppose the number of zones[] is restricted in the subsequent for
loop where zones[] copy one zone at a time as n increases. Even if
rep->zones exceeds the available room in zones[], the extra zone will
not be copied.

>
> > +static int handle_aiocb_zone_mgmt(void *opaque) {
> > +RawPosixAIOData *aiocb = opaque;
> > +int fd = aiocb->aio_fildes;
> > +int64_t offset = aiocb->aio_offset;
> > +int64_t len = aiocb->aio_nbytes;
> > +zone_op op = aiocb->op;
> > +
> > +struct blk_zone_range range;
> > +const char *ioctl_name;
> > +unsigned long ioctl_op;
> > +int64_t zone_size;
> > +int64_t zone_size_mask;
> > +int ret;
> > +
> > +ret = ioctl(fd, BLKGETZONESZ, _size);
>
> Can this value be stored in bs (maybe in BlockLimits) to avoid calling
> ioctl(BLKGETZONESZ) each time?

Yes, zone_size is in the zbd_dev field. I'll update BlockLimits after
I think through the configurations. In addition, it's a temporary

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-28 Thread Stefan Hajnoczi
On Mon, Jun 27, 2022 at 08:19:13AM +0800, Sam Li wrote:
> diff --git a/block/block-backend.c b/block/block-backend.c
> index e0e1aff4b1..786f964d02 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
>  return ret;
>  }
>  
> +/*
> + * Return zone_report from BlockDriver. Offset can be any number within
> + * the zone size. No alignment for offset and len.

What is the purpose of len? Is it the maximum number of zones to return
in nr_zones[]?

How does the caller know how many zones were returned?

> + */
> +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> +   int64_t len, int64_t *nr_zones,
> +   BlockZoneDescriptor *zones)
> +{
> +int ret;
> +BlockDriverState *bs;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk); /* increase before waiting */
> +blk_wait_while_drained(blk);
> +bs = blk_bs(blk);
> +
> +ret = blk_check_byte_request(blk, offset, len);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +bdrv_inc_in_flight(bs);

The bdrv_inc/dec_in_flight() call should be inside
bdrv_co_zone_report(). See bdrv_co_ioctl() for an example.

> +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> +  nr_zones, zones);
> +bdrv_dec_in_flight(bs);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
> +/*
> + * Return zone_mgmt from BlockDriver.

Maybe this should be:

  Send a zone management command.

> @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
>  PreallocMode prealloc;
>  Error **errp;
>  } truncate;
> +struct {
> +int64_t *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +zone_op op;

It's cleaner to put op inside a struct zone_mgmt so its purpose is
self-explanatory:

  struct {
  zone_op op;
  } zone_mgmt;

> +static int handle_aiocb_zone_report(void *opaque) {
> +RawPosixAIOData *aiocb = opaque;
> +int fd = aiocb->aio_fildes;
> +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> +int64_t offset = aiocb->aio_offset;
> +int64_t len = aiocb->aio_nbytes;
> +
> +struct blk_zone *blkz;
> +int64_t rep_size, nrz;
> +int ret, n = 0, i = 0;
> +
> +nrz = *nr_zones;
> +if (len == -1) {
> +return -errno;

Where is errno set? Should this be an errno constant instead like
-EINVAL?

> +}
> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> blk_zone);
> +g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, 
> nrz);

g_new() looks incorrect. There should be 1 struct blk_zone_report
followed by nrz struct blk_zone structs. Please use g_malloc(rep_size)
instead.

> +offset = offset / 512; /* get the unit of the start sector: sector size 
> is 512 bytes. */
> +printf("start to report zone with offset: 0x%lx\n", offset);
> +
> +blkz = (struct blk_zone *)(rep + 1);
> +while (n < nrz) {
> +memset(rep, 0, rep_size);
> +rep->sector = offset;
> +rep->nr_zones = nrz;

What prevents zones[] overflowing? nrz isn't being reduced in the loop,
so maybe the rep->nr_zones return value will eventually exceed the
number of elements still available in zones[n..]?

> +static int handle_aiocb_zone_mgmt(void *opaque) {
> +RawPosixAIOData *aiocb = opaque;
> +int fd = aiocb->aio_fildes;
> +int64_t offset = aiocb->aio_offset;
> +int64_t len = aiocb->aio_nbytes;
> +zone_op op = aiocb->op;
> +
> +struct blk_zone_range range;
> +const char *ioctl_name;
> +unsigned long ioctl_op;
> +int64_t zone_size;
> +int64_t zone_size_mask;
> +int ret;
> +
> +ret = ioctl(fd, BLKGETZONESZ, _size);

Can this value be stored in bs (maybe in BlockLimits) to avoid calling
ioctl(BLKGETZONESZ) each time?

> +if (ret) {
> +return -1;

The return value should be a negative errno. -1 is -EPERM but it's
probably not that error code you wanted. This should be:

  return -errno;

> +}
> +
> +zone_size_mask = zone_size - 1;
> +if (offset & zone_size_mask) {
> +error_report("offset is not the start of a zone");
> +return -1;

return -EINVAL;

> +}
> +
> +if (len & zone_size_mask) {
> +error_report("len is not aligned to zones");
> +return -1;

return -EINVAL;

> +static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t 
> offset,
> +int64_t len, int64_t *nr_zones,
> +BlockZoneDescriptor *zones) {
> +BDRVRawState *s = bs->opaque;
> +RawPosixAIOData acb;
> +
> +acb = (RawPosixAIOData) {
> +.bs = bs,
> +.aio_fildes = s->fd,
> +.aio_type   = QEMU_AIO_IOCTL,
> +.aio_offset = offset,
> +.aio_nbytes = len,
> +.zone_report= {
> +.nr_zones   = 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-27 Thread Sam Li
Hi Hannes,

Hannes Reinecke  于2022年6月27日周一 15:21写道:

>
> On 6/27/22 02:19, Sam Li wrote:
> > By adding zone management operations in BlockDriver, storage
> > controller emulation can use the new block layer APIs including
> > zone_report and zone_mgmt(open, close, finish, reset).
> > ---
> >   block/block-backend.c|  56 
> >   block/coroutines.h   |   5 +
> >   block/file-posix.c   | 238 +++
> >   include/block/block-common.h |  43 +-
> >   include/block/block_int-common.h |  20 +++
> >   5 files changed, 361 insertions(+), 1 deletion(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index e0e1aff4b1..786f964d02 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
> >   return ret;
> >   }
> >
> > +/*
> > + * Return zone_report from BlockDriver. Offset can be any number within
> > + * the zone size. No alignment for offset and len.
> > + */
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +   int64_t len, int64_t *nr_zones,
> > +   BlockZoneDescriptor *zones)
> > +{
> > +int ret;
> > +BlockDriverState *bs;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk); /* increase before waiting */
> > +blk_wait_while_drained(blk);
> > +bs = blk_bs(blk);
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +
> > +bdrv_inc_in_flight(bs);
> > +ret = bdrv_co_zone_report(blk->root->bs, offset, len,
> > +  nr_zones, zones);
> > +bdrv_dec_in_flight(bs);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> > +/*
> > + * Return zone_mgmt from BlockDriver.
> > + * Offset is the start of a zone and len is aligned to zones.
> > + */
> > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
> > +int64_t offset, int64_t len)
> > +{
> > +int ret;
> > +BlockDriverState *bs;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +blk_wait_while_drained(blk);
> > +bs = blk_bs(blk);
> > +
> > +ret = blk_check_byte_request(blk, offset, len);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +
> > +bdrv_inc_in_flight(bs);
> > +ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len);
> > +bdrv_dec_in_flight(bs);
> > +blk_dec_in_flight(blk);
> > +return ret;
> > +}
> > +
> >   void blk_drain(BlockBackend *blk)
> >   {
> >   BlockDriverState *bs = blk_bs(blk);
> > diff --git a/block/coroutines.h b/block/coroutines.h
> > index 830ecaa733..a114d7bc30 100644
> > --- a/block/coroutines.h
> > +++ b/block/coroutines.h
> > @@ -80,6 +80,11 @@ int coroutine_fn
> >   blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
> >
> >   int coroutine_fn blk_co_do_flush(BlockBackend *blk);
> > +int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
> > +int64_t len, int64_t *nr_zones,
> > +BlockZoneDescriptor *zones);
> > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
> > +int64_t offset, int64_t len);
> >
> >
> >   /*
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 48cd096624..1b8b0d351f 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -67,6 +67,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >   #include 
> > @@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
> >   PreallocMode prealloc;
> >   Error **errp;
> >   } truncate;
> > +struct {
> > +int64_t *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +zone_op op;
> >   };
> >   } RawPosixAIOData;
> >
> > @@ -1801,6 +1807,135 @@ static off_t copy_file_range(int in_fd, off_t 
> > *in_off, int out_fd,
> >   }
> >   #endif
> >
> > +/*
> > + * parse_zone - Fill a zone descriptor
> > + */
> > +static inline void parse_zone(struct BlockZoneDescriptor *zone,
> > +  struct blk_zone *blkz) {
> > +zone->start = blkz->start;
> > +zone->length = blkz->len;
> > +zone->cap = blkz->capacity;
> > +zone->wp = blkz->wp - blkz->start;
> > +zone->type = blkz->type;
> > +zone->cond = blkz->cond;
> > +}
> > +
> > +static int handle_aiocb_zone_report(void *opaque) {
> > +RawPosixAIOData *aiocb = opaque;
> > +int fd = aiocb->aio_fildes;
> > +int64_t *nr_zones = aiocb->zone_report.nr_zones;
> > +BlockZoneDescriptor *zones = aiocb->zone_report.zones;
> > +int64_t offset = aiocb->aio_offset;
> > +int64_t len = aiocb->aio_nbytes;
> > +
> > +struct blk_zone *blkz;
> > +int64_t rep_size, nrz;
> > +int ret, n = 0, i = 0;
> > +
> 

Re: [RFC v3 1/5] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls.

2022-06-27 Thread Hannes Reinecke

On 6/27/22 02:19, Sam Li wrote:

By adding zone management operations in BlockDriver, storage
controller emulation can use the new block layer APIs including
zone_report and zone_mgmt(open, close, finish, reset).
---
  block/block-backend.c|  56 
  block/coroutines.h   |   5 +
  block/file-posix.c   | 238 +++
  include/block/block-common.h |  43 +-
  include/block/block_int-common.h |  20 +++
  5 files changed, 361 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index e0e1aff4b1..786f964d02 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1810,6 +1810,62 @@ int blk_flush(BlockBackend *blk)
  return ret;
  }
  
+/*

+ * Return zone_report from BlockDriver. Offset can be any number within
+ * the zone size. No alignment for offset and len.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+   int64_t len, int64_t *nr_zones,
+   BlockZoneDescriptor *zones)
+{
+int ret;
+BlockDriverState *bs;
+IO_CODE();
+
+blk_inc_in_flight(blk); /* increase before waiting */
+blk_wait_while_drained(blk);
+bs = blk_bs(blk);
+
+ret = blk_check_byte_request(blk, offset, len);
+if (ret < 0) {
+return ret;
+}
+
+bdrv_inc_in_flight(bs);
+ret = bdrv_co_zone_report(blk->root->bs, offset, len,
+  nr_zones, zones);
+bdrv_dec_in_flight(bs);
+blk_dec_in_flight(blk);
+return ret;
+}
+
+/*
+ * Return zone_mgmt from BlockDriver.
+ * Offset is the start of a zone and len is aligned to zones.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
+int64_t offset, int64_t len)
+{
+int ret;
+BlockDriverState *bs;
+IO_CODE();
+
+blk_inc_in_flight(blk);
+blk_wait_while_drained(blk);
+bs = blk_bs(blk);
+
+ret = blk_check_byte_request(blk, offset, len);
+if (ret < 0) {
+return ret;
+}
+
+bdrv_inc_in_flight(bs);
+ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len);
+bdrv_dec_in_flight(bs);
+blk_dec_in_flight(blk);
+return ret;
+}
+
  void blk_drain(BlockBackend *blk)
  {
  BlockDriverState *bs = blk_bs(blk);
diff --git a/block/coroutines.h b/block/coroutines.h
index 830ecaa733..a114d7bc30 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -80,6 +80,11 @@ int coroutine_fn
  blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
  
  int coroutine_fn blk_co_do_flush(BlockBackend *blk);

+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+int64_t len, int64_t *nr_zones,
+BlockZoneDescriptor *zones);
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
+int64_t offset, int64_t len);
  
  
  /*

diff --git a/block/file-posix.c b/block/file-posix.c
index 48cd096624..1b8b0d351f 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -67,6 +67,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -216,6 +217,11 @@ typedef struct RawPosixAIOData {
  PreallocMode prealloc;
  Error **errp;
  } truncate;
+struct {
+int64_t *nr_zones;
+BlockZoneDescriptor *zones;
+} zone_report;
+zone_op op;
  };
  } RawPosixAIOData;
  
@@ -1801,6 +1807,135 @@ static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,

  }
  #endif
  
+/*

+ * parse_zone - Fill a zone descriptor
+ */
+static inline void parse_zone(struct BlockZoneDescriptor *zone,
+  struct blk_zone *blkz) {
+zone->start = blkz->start;
+zone->length = blkz->len;
+zone->cap = blkz->capacity;
+zone->wp = blkz->wp - blkz->start;
+zone->type = blkz->type;
+zone->cond = blkz->cond;
+}
+
+static int handle_aiocb_zone_report(void *opaque) {
+RawPosixAIOData *aiocb = opaque;
+int fd = aiocb->aio_fildes;
+int64_t *nr_zones = aiocb->zone_report.nr_zones;
+BlockZoneDescriptor *zones = aiocb->zone_report.zones;
+int64_t offset = aiocb->aio_offset;
+int64_t len = aiocb->aio_nbytes;
+
+struct blk_zone *blkz;
+int64_t rep_size, nrz;
+int ret, n = 0, i = 0;
+
+nrz = *nr_zones;
+if (len == -1) {
+return -errno;
+}
+rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
+g_autofree struct blk_zone_report *rep = g_new(struct blk_zone_report, 
nrz);
+offset = offset / 512; /* get the unit of the start sector: sector size is 
512 bytes. */
+printf("start to report zone with offset: 0x%lx\n", offset);
+
+blkz = (struct blk_zone *)(rep + 1);
+while (n < nrz) {
+memset(rep, 0, rep_size);
+rep->sector = offset;
+rep->nr_zones = nrz;
+
+ret = ioctl(fd, BLKREPORTZONE, rep);
+if