On 9/30/24 01:03, Sam Li wrote:
> When the file-posix driver emulates append write, it holds the lock
> whenever accessing wp, which limits the IO queue depth to one.
> 
> The write IO flow can be optimized to allow concurrent writes. The lock
> is held in two cases:
> 1. Assumed that the write IO succeeds, update the wp before issuing the
> write.
> 2. If the write IO fails, report that zone and use the reported value
> as the current wp.
> 
> Signed-off-by: Sam Li <faithilike...@gmail.com>
> ---
>  block/file-posix.c | 33 ++++++++++++++++-----------------
>  1 file changed, 16 insertions(+), 17 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index ff928b5e85..64a57fadb1 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -2489,11 +2489,19 @@ static int coroutine_fn raw_co_prw(BlockDriverState 
> *bs, int64_t *offset_ptr,
>  #if defined(CONFIG_BLKZONED)
>      if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
>          bs->bl.zoned != BLK_Z_NONE) {
> -        qemu_co_mutex_lock(&bs->wps->colock);
> +        BlockZoneWps *wps = bs->wps;
> +        int index = offset / bs->bl.zone_size;

A blank line after this declaration (to separate declarations from code) woule
make this code more readable...

> +        qemu_co_mutex_lock(&wps->colock);
> +        uint64_t *wp = &wps->wp[index];
>          if (type & QEMU_AIO_ZONE_APPEND) {
> -            int index = offset / bs->bl.zone_size;
> -            offset = bs->wps->wp[index];
> +            offset = *wp;
> +            *offset_ptr = offset;
> +        }
> +        /* Advance the wp if needed */
> +        if (offset + bytes > *wp) {

Why the if ? offset MUST be equal to wp for writes, and for zone append we do
not need to check the offset at all. So advancing the wp should be 
unconditional.

BUT ! where are the checks for "zone is full" and "offset == wp" for write
operations ? These must be checked while holding the zone lock.

> +            *wp = offset + bytes;
>          }
> +        qemu_co_mutex_unlock(&bs->wps->colock);
>      }
>  #endif
>  
> @@ -2540,28 +2548,19 @@ out:
>  #if defined(CONFIG_BLKZONED)
>      if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
>          bs->bl.zoned != BLK_Z_NONE) {
> -        BlockZoneWps *wps = bs->wps;
>          if (ret == 0) {
> -            uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
> -            if (!BDRV_ZT_IS_CONV(*wp)) {
> -                if (type & QEMU_AIO_ZONE_APPEND) {
> -                    *offset_ptr = *wp;
> -                    trace_zbd_zone_append_complete(bs, *offset_ptr
> -                        >> BDRV_SECTOR_BITS);
> -                }
> -                /* Advance the wp if needed */
> -                if (offset + bytes > *wp) {
> -                    *wp = offset + bytes;
> -                }
> +            if (type & QEMU_AIO_ZONE_APPEND) {
> +                trace_zbd_zone_append_complete(bs, *offset_ptr
> +                    >> BDRV_SECTOR_BITS);
>              }
>          } else {
> +            qemu_co_mutex_lock(&bs->wps->colock);
>              /*
>               * write and append write are not allowed to cross zone 
> boundaries
>               */
>              update_zones_wp(bs, s->fd, offset, 1);
> +            qemu_co_mutex_unlock(&bs->wps->colock);
>          }
> -
> -        qemu_co_mutex_unlock(&wps->colock);
>      }
>  #endif
>      return ret;


-- 
Damien Le Moal
Western Digital Research

Reply via email to