On Wed, May 08, 2024 at 05:36:21PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations
> at the block level. The following operations
> are included:
> 
> - read_keys:        retrieves the list of registered keys.
> - read_reservation: retrieves the current reservation status.
> - register:         registers a new reservation key.
> - reserve:          initiates a reservation for a specific key.
> - release:          releases a reservation for a specific key.
> - clear:            clears all existing reservations.
> - preempt:          preempts a reservation held by another key.
> 
> Signed-off-by: Changqi Lu <luchangqi....@bytedance.com>
> Signed-off-by: zhenwei pi <pizhen...@bytedance.com>
> ---
>  block/block-backend.c             | 386 ++++++++++++++++++++++++++++++
>  block/io.c                        | 161 +++++++++++++
>  include/block/block-common.h      |   9 +
>  include/block/block-io.h          |  19 ++
>  include/block/block_int-common.h  |  31 +++
>  include/sysemu/block-backend-io.h |  22 ++
>  6 files changed, 628 insertions(+)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index db6f9b92a3..ec67937d28 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1770,6 +1770,392 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned 
> long int req, void *buf,
>      return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, 
> opaque);
>  }
>  
> +typedef struct BlkPrInCo {
> +    BlockBackend *blk;
> +    uint32_t *generation;
> +    uint32_t num_keys;
> +    BlockPrType *type;
> +    uint64_t *keys;
> +    int ret;
> +} BlkPrInCo;
> +
> +typedef struct BlkPrInCB {
> +    BlockAIOCB common;
> +    BlkPrInCo prco;
> +    bool has_returned;
> +} BlkPrInCB;
> +
> +static const AIOCBInfo blk_pr_in_aiocb_info = {
> +    .aiocb_size         = sizeof(BlkPrInCB),
> +};
> +
> +static void blk_pr_in_complete(BlkPrInCB *acb)
> +{
> +    if (acb->has_returned) {
> +        acb->common.cb(acb->common.opaque, acb->prco.ret);
> +        blk_dec_in_flight(acb->prco.blk);

Please add a comment identifying which blk_inc_in_flight() call this dec
is paired with. That makes it easier for people trying to understand
in-flight reference counting.

> +        qemu_aio_unref(acb);
> +    }
> +}
> +
> +static void blk_pr_in_complete_bh(void *opaque)
> +{
> +    BlkPrInCB *acb = opaque;
> +    assert(acb->has_returned);
> +    blk_pr_in_complete(acb);
> +}
> +
> +static BlockAIOCB *blk_aio_pr_in(BlockBackend *blk, uint32_t *generation,
> +                                 uint32_t num_keys, BlockPrType *type,
> +                                 uint64_t *keys, CoroutineEntry co_entry,
> +                                 BlockCompletionFunc *cb, void *opaque)
> +{
> +    BlkPrInCB *acb;
> +    Coroutine *co;
> +
> +    blk_inc_in_flight(blk);

Please add a comment identifying which blk_dec_in_flight() call this inc
is paired with.

> +    acb = blk_aio_get(&blk_pr_in_aiocb_info, blk, cb, opaque);
> +    acb->prco = (BlkPrInCo) {
> +        .blk        = blk,
> +        .generation = generation,
> +        .num_keys   = num_keys,
> +        .type       = type,
> +        .ret        = NOT_DONE,
> +        .keys       = keys,
> +    };
> +    acb->has_returned = false;
> +
> +    co = qemu_coroutine_create(co_entry, acb);
> +    aio_co_enter(qemu_get_current_aio_context(), co);
> +
> +    acb->has_returned = true;
> +    if (acb->prco.ret != NOT_DONE) {
> +        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
> +                                         blk_pr_in_complete_bh, acb);
> +    }
> +
> +    return &acb->common;
> +}
> +
> +
> +static int coroutine_fn
> +blk_aio_pr_do_read_keys(BlockBackend *blk, uint32_t *generation,
> +                        uint32_t num_keys, uint64_t *keys)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_read_keys(blk_bs(blk), generation, num_keys, keys);
> +}
> +
> +static void coroutine_fn blk_aio_pr_read_keys_entry(void *opaque)
> +{
> +    BlkPrInCB *acb = opaque;
> +    BlkPrInCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_read_keys(prco->blk, prco->generation,
> +                                        prco->num_keys, prco->keys);
> +    blk_pr_in_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_pr_read_keys(BlockBackend *blk, uint32_t *generation,
> +                                 uint32_t num_keys, uint64_t *keys,
> +                                 BlockCompletionFunc *cb, void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_in(blk, generation, num_keys, NULL, keys,
> +                         blk_aio_pr_read_keys_entry, cb, opaque);
> +}
> +
> +
> +static int coroutine_fn
> +blk_aio_pr_do_read_reservation(BlockBackend *blk, uint32_t *generation,
> +                               uint64_t *key, BlockPrType *type)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_read_reservation(blk_bs(blk), generation, key, type);
> +}
> +
> +static void coroutine_fn blk_aio_pr_read_reservation_entry(void *opaque)
> +{
> +    BlkPrInCB *acb = opaque;
> +    BlkPrInCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_read_reservation(prco->blk, prco->generation,
> +                                               prco->keys, prco->type);
> +    blk_pr_in_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_pr_read_reservation(BlockBackend *blk, uint32_t 
> *generation,
> +                                        uint64_t *key, BlockPrType *type,
> +                                        BlockCompletionFunc *cb, void 
> *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_in(blk, generation, 0, type, key,
> +                         blk_aio_pr_read_reservation_entry, cb, opaque);
> +}
> +
> +
> +typedef struct BlkPrOutCo {
> +    BlockBackend *blk;
> +    uint64_t old_key;
> +    uint64_t new_key;
> +    BlockPrType type;
> +    bool ignore_key;
> +    bool abort;
> +    int ret;
> +} BlkPrOutCo;
> +
> +typedef struct BlkPrOutCB {
> +    BlockAIOCB common;
> +    BlkPrOutCo prco;
> +    bool has_returned;
> +} BlkPrOutCB;
> +
> +static const AIOCBInfo blk_pr_out_aiocb_info = {
> +    .aiocb_size         = sizeof(BlkPrOutCB),
> +};
> +
> +static void blk_pr_out_complete(BlkPrOutCB *acb)
> +{
> +    if (acb->has_returned) {
> +        acb->common.cb(acb->common.opaque, acb->prco.ret);
> +        blk_dec_in_flight(acb->prco.blk);

Same here.

> +        qemu_aio_unref(acb);
> +    }
> +}
> +
> +static void blk_pr_out_complete_bh(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    assert(acb->has_returned);
> +    blk_pr_out_complete(acb);
> +}
> +
> +static BlockAIOCB *blk_aio_pr_out(BlockBackend *blk, uint64_t old_key,
> +                                  uint64_t new_key, BlockPrType type,
> +                                  bool ignore_key, bool abort,
> +                                  CoroutineEntry co_entry,
> +                                  BlockCompletionFunc *cb, void *opaque)
> +{
> +    BlkPrOutCB *acb;
> +    Coroutine *co;
> +
> +    blk_inc_in_flight(blk);

Same here.

> +    acb = blk_aio_get(&blk_pr_out_aiocb_info, blk, cb, opaque);
> +    acb->prco = (BlkPrOutCo) {
> +        .blk        = blk,
> +        .old_key    = old_key,
> +        .new_key    = new_key,
> +        .type       = type,
> +        .ignore_key = ignore_key,
> +        .abort      = abort,
> +        .ret        = NOT_DONE,
> +    };
> +    acb->has_returned = false;
> +
> +    co = qemu_coroutine_create(co_entry, acb);
> +    aio_co_enter(qemu_get_current_aio_context(), co);
> +
> +    acb->has_returned = true;
> +    if (acb->prco.ret != NOT_DONE) {
> +        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
> +                                         blk_pr_out_complete_bh, acb);
> +    }
> +
> +    return &acb->common;
> +}
> +
> +static int coroutine_fn
> +blk_aio_pr_do_register(BlockBackend *blk, uint64_t old_key,
> +                       uint64_t new_key, BlockPrType type,
> +                       bool ignore_key)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_register(blk_bs(blk), old_key, new_key, type, 
> ignore_key);
> +}
> +
> +static void coroutine_fn blk_aio_pr_register_entry(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    BlkPrOutCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_register(prco->blk, prco->old_key, 
> prco->new_key,
> +                                       prco->type, prco->ignore_key);
> +    blk_pr_out_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_pr_register(BlockBackend *blk, uint64_t old_key,
> +                                uint64_t new_key, BlockPrType type,
> +                                bool ignore_key, BlockCompletionFunc *cb,
> +                                void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_out(blk, old_key, new_key, type, ignore_key, false,
> +                          blk_aio_pr_register_entry, cb, opaque);
> +}
> +
> +static int coroutine_fn
> +blk_aio_pr_do_reserve(BlockBackend *blk, uint64_t key, BlockPrType type)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_reserve(blk_bs(blk), key, type);
> +}
> +
> +static void coroutine_fn blk_aio_pr_reserve_entry(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    BlkPrOutCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_reserve(prco->blk, prco->old_key, prco->type);
> +    blk_pr_out_complete(acb);
> +}
> +
> +
> +BlockAIOCB *blk_aio_pr_reserve(BlockBackend *blk,  uint64_t key,
> +                               BlockPrType type, BlockCompletionFunc *cb,
> +                               void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_out(blk, key, 0, type, false, false,
> +                          blk_aio_pr_reserve_entry, cb, opaque);
> +}
> +
> +static int coroutine_fn
> +blk_aio_pr_do_release(BlockBackend *blk, uint64_t key, BlockPrType type)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_release(blk_bs(blk), key, type);
> +}
> +
> +static void coroutine_fn blk_aio_pr_release_entry(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    BlkPrOutCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_release(prco->blk, prco->old_key, prco->type);
> +    blk_pr_out_complete(acb);
> +}
> +
> +
> +BlockAIOCB *blk_aio_pr_release(BlockBackend *blk, uint64_t key,
> +                               BlockPrType type, BlockCompletionFunc *cb,
> +                               void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_out(blk, key, 0, type, false, false,
> +                          blk_aio_pr_release_entry, cb, opaque);
> +}
> +
> +static int coroutine_fn
> +blk_aio_pr_do_clear(BlockBackend *blk, uint64_t key)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_clear(blk_bs(blk), key);
> +}
> +
> +static void coroutine_fn blk_aio_pr_clear_entry(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    BlkPrOutCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_clear(prco->blk, prco->old_key);
> +    blk_pr_out_complete(acb);
> +}
> +
> +
> +BlockAIOCB *blk_aio_pr_clear(BlockBackend *blk, uint64_t key,
> +                             BlockCompletionFunc *cb, void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_out(blk, key, 0, 0, false, false,
> +                          blk_aio_pr_clear_entry, cb, opaque);
> +}
> +
> +static int coroutine_fn
> +blk_aio_pr_do_preempt(BlockBackend *blk, uint64_t old_key,
> +                      uint64_t new_key, BlockPrType type, bool abort)
> +{
> +    IO_CODE();
> +
> +    blk_wait_while_drained(blk);
> +    GRAPH_RDLOCK_GUARD();
> +
> +    if (!blk_co_is_available(blk)) {
> +        return -ENOMEDIUM;
> +    }
> +
> +    return bdrv_co_pr_preempt(blk_bs(blk), old_key, new_key, type, abort);
> +}
> +
> +static void coroutine_fn blk_aio_pr_preempt_entry(void *opaque)
> +{
> +    BlkPrOutCB *acb = opaque;
> +    BlkPrOutCo *prco = &acb->prco;
> +
> +    prco->ret = blk_aio_pr_do_preempt(prco->blk, prco->old_key,
> +                                      prco->new_key, prco->type,
> +                                      prco->abort);
> +    blk_pr_out_complete(acb);
> +}
> +
> +
> +BlockAIOCB *blk_aio_pr_preempt(BlockBackend *blk, uint64_t old_key,
> +                               uint64_t new_key, BlockPrType type,
> +                               bool abort, BlockCompletionFunc *cb,
> +                               void *opaque)
> +{
> +    IO_CODE();
> +    return blk_aio_pr_out(blk, old_key, new_key, type, false, abort,
> +                          blk_aio_pr_preempt_entry, cb, opaque);
> +}
> +
>  /* To be called between exactly one pair of blk_inc/dec_in_flight() */
>  static int coroutine_fn
>  blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
> diff --git a/block/io.c b/block/io.c
> index 7217cf811b..98a358c6b2 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -3220,6 +3220,167 @@ out:
>      return co.ret;
>  }
>  
> +int coroutine_fn bdrv_co_pr_read_keys(BlockDriverState *bs,
> +                 uint32_t *generation, uint32_t num_keys,
> +                 uint64_t *keys)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_read_keys) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_read_keys(bs, generation, num_keys, keys);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_read_reservation(BlockDriverState *bs,
> +                 uint32_t *generation, uint64_t *key, BlockPrType *type)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_read_reservation) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_read_reservation(bs, generation, key, type);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_register(BlockDriverState *bs, uint64_t old_key,
> +                 uint64_t new_key, BlockPrType type, bool ignore_key)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_register) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_register(bs, old_key, new_key, type, 
> ignore_key);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_reserve(BlockDriverState *bs, uint64_t key,
> +                                    BlockPrType type)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_reserve) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_reserve(bs, key, type);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_release(BlockDriverState *bs, uint64_t key,
> +                                    BlockPrType type)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_release) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_release(bs, key, type);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_clear(BlockDriverState *bs, uint64_t key)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_clear) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_clear(bs, key);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
> +int coroutine_fn bdrv_co_pr_preempt(BlockDriverState *bs, uint64_t old_key,
> +                       uint64_t new_key, BlockPrType type, bool abort)
> +{
> +    BlockDriver *drv = bs->drv;
> +    CoroutineIOCompletion co = {
> +        .coroutine = qemu_coroutine_self(),
> +    };
> +
> +    IO_CODE();
> +    assert_bdrv_graph_readable();
> +
> +    bdrv_inc_in_flight(bs);
> +    if (!drv || !drv->bdrv_co_pr_preempt) {
> +        co.ret = -ENOTSUP;
> +        goto out;
> +    }
> +
> +    co.ret = drv->bdrv_co_pr_preempt(bs, old_key, new_key, type, abort);
> +out:
> +    bdrv_dec_in_flight(bs);
> +    return co.ret;
> +}
> +
>  int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
>                          unsigned int *nr_zones,
>                          BlockZoneDescriptor *zones)
> diff --git a/include/block/block-common.h b/include/block/block-common.h
> index a846023a09..c078d69f79 100644
> --- a/include/block/block-common.h
> +++ b/include/block/block-common.h
> @@ -524,6 +524,15 @@ typedef enum {
>      BDRV_FIX_ERRORS   = 2,
>  } BdrvCheckMode;
>  
> +typedef enum {
> +    BLK_PR_WRITE_EXCLUSIVE              = 0x1,
> +    BLK_PR_EXCLUSIVE_ACCESS             = 0x2,
> +    BLK_PR_WRITE_EXCLUSIVE_REGS_ONLY    = 0x3,
> +    BLK_PR_EXCLUSIVE_ACCESS_REGS_ONLY   = 0x4,
> +    BLK_PR_WRITE_EXCLUSIVE_ALL_REGS     = 0x5,
> +    BLK_PR_EXCLUSIVE_ACCESS_ALL_REGS    = 0x6,

Documentation needed. If you don't want to document the full meaning
here, please include a comment referring to a specific section in a
SCSI/NVMe standard that explains the semantics.

> +} BlockPrType;
> +
>  typedef struct BlockSizes {
>      uint32_t phys;
>      uint32_t log;
> diff --git a/include/block/block-io.h b/include/block/block-io.h
> index b49e0537dd..f2827cd74e 100644
> --- a/include/block/block-io.h
> +++ b/include/block/block-io.h
> @@ -106,6 +106,25 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
>  int coroutine_fn GRAPH_RDLOCK
>  bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
>  
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_read_keys(BlockDriverState *bs, uint32_t *generation,
> +                     uint32_t num_keys, uint64_t *keys);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_read_reservation(BlockDriverState *bs, uint32_t *generation,
> +                            uint64_t *key, BlockPrType *type);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_register(BlockDriverState *bs, uint64_t old_key,
> +                    uint64_t new_key, BlockPrType type, bool ignore_key);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_reserve(BlockDriverState *bs, uint64_t key, BlockPrType type);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_release(BlockDriverState *bs, uint64_t key, BlockPrType type);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_clear(BlockDriverState *bs, uint64_t key);
> +int coroutine_fn GRAPH_RDLOCK
> +bdrv_co_pr_preempt(BlockDriverState *bs, uint64_t old_key, uint64_t new_key,
> +                   BlockPrType type, bool abort);
> +
>  /* Ensure contents are flushed to disk.  */
>  int coroutine_fn GRAPH_RDLOCK bdrv_co_flush(BlockDriverState *bs);
>  
> diff --git a/include/block/block_int-common.h 
> b/include/block/block_int-common.h
> index 761276127e..e5526f1d04 100644
> --- a/include/block/block_int-common.h
> +++ b/include/block/block_int-common.h
> @@ -766,6 +766,37 @@ struct BlockDriver {
>      int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_ioctl)(
>          BlockDriverState *bs, unsigned long int req, void *buf);
>  
> +    /**
> +     * bdrv_co_pr_read_keys is called to read the registered keys,
> +     * @num_keys is the maximum number of keys that can be transmitted.
> +     * On success, store generation in @generation and store keys @keys
> +     * and return the number of @keys.
> +     * On failure return -errno.
> +     */
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_read_keys)(
> +        BlockDriverState *bs, uint32_t *generation,
> +        uint32_t num_keys, uint64_t *keys);
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_read_reservation)(
> +        BlockDriverState *bs, uint32_t *generation,
> +        uint64_t *key, BlockPrType *type);

Plase add doc comments for each new function. It's okay to refer to
specific sections in the SCSI or NVMe standards if explaining the
concepts is too verbose. Someone implementing persistent reservations
support in another block driver must be able to figure out what the
exact model for persistent reservations is.

> +
> +    /**
> +     * Persist Through Power Loss(PTPL) is considered as required in QEMU
> +     * block layer, the block driver need always enable PTPL.
> +     */

What is the reasoning behind this? Will applications that rely on PTPL=0
work?

> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_register)(
> +        BlockDriverState *bs, uint64_t old_key,
> +        uint64_t new_key, BlockPrType type, bool ignore_key);
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_reserve)(
> +        BlockDriverState *bs, uint64_t key, BlockPrType type);
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_release)(
> +        BlockDriverState *bs, uint64_t key, BlockPrType type);
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_clear)(
> +        BlockDriverState *bs, uint64_t key);
> +    int coroutine_fn GRAPH_RDLOCK_PTR(*bdrv_co_pr_preempt)(
> +        BlockDriverState *bs, uint64_t old_key,
> +        uint64_t new_key, BlockPrType type, bool abort);

Does a block driver need to implement all or none of these callbacks? I
guess they are not optional on a per-callback basis.

> +
>      /*
>       * Returns 0 for completed check, -errno for internal errors.
>       * The check results are stored in result.
> diff --git a/include/sysemu/block-backend-io.h 
> b/include/sysemu/block-backend-io.h
> index d174275a5c..f87c61c06e 100644
> --- a/include/sysemu/block-backend-io.h
> +++ b/include/sysemu/block-backend-io.h
> @@ -62,6 +62,28 @@ void blk_aio_cancel_async(BlockAIOCB *acb);
>  BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void 
> *buf,
>                            BlockCompletionFunc *cb, void *opaque);
>  
> +BlockAIOCB *blk_aio_pr_read_keys(BlockBackend *blk, uint32_t *generation,
> +                                 uint32_t num_keys, uint64_t *keys,
> +                                 BlockCompletionFunc *cb, void *opaque);
> +BlockAIOCB *blk_aio_pr_read_reservation(BlockBackend *blk, uint32_t 
> *generation,
> +                                        uint64_t *key, BlockPrType *type,
> +                                        BlockCompletionFunc *cb, void 
> *opaque);
> +BlockAIOCB *blk_aio_pr_register(BlockBackend *blk, uint64_t old_key,
> +                                uint64_t new_key, BlockPrType type,
> +                                bool ignore_key, BlockCompletionFunc *cb,
> +                                void *opaque);
> +BlockAIOCB *blk_aio_pr_reserve(BlockBackend *blk,  uint64_t key,
> +                               BlockPrType type, BlockCompletionFunc *cb,
> +                               void *opaque);
> +BlockAIOCB *blk_aio_pr_release(BlockBackend *blk, uint64_t key,
> +                               BlockPrType type, BlockCompletionFunc *cb,
> +                               void *opaque);
> +BlockAIOCB *blk_aio_pr_clear(BlockBackend *blk, uint64_t key,
> +                             BlockCompletionFunc *cb, void *opaque);
> +BlockAIOCB *blk_aio_pr_preempt(BlockBackend *blk, uint64_t old_key,
> +                               uint64_t new_key, BlockPrType type, bool 
> abort,
> +                               BlockCompletionFunc *cb, void *opaque);
> +
>  void blk_inc_in_flight(BlockBackend *blk);
>  void blk_dec_in_flight(BlockBackend *blk);
>  
> -- 
> 2.20.1
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to