[PATCH 6/7] block/rbd: add write zeroes support

2020-12-27 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/block/rbd.c b/block/rbd.c
index 2d77d0007f..27b4404adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@ typedef enum {
 RBD_AIO_READ,
 RBD_AIO_WRITE,
 RBD_AIO_DISCARD,
-RBD_AIO_FLUSH
+RBD_AIO_FLUSH,
+RBD_AIO_WRITE_ZEROES
 } RBDAIOCmd;
 
 typedef struct BDRVRBDState {
@@ -221,8 +222,12 @@ done:
 
 static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+BDRVRBDState *s = bs->opaque;
 /* XXX Does RBD support AIO on less than 512-byte alignment? */
 bs->bl.request_alignment = 512;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+bs->bl.pwrite_zeroes_alignment = s->object_size;
+#endif
 }
 
 
@@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 s->aio_context = bdrv_get_aio_context(bs);
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+#endif
 
 /* When extending regular files, we get zeros from the OS */
 bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
@@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState 
*bs,
 case RBD_AIO_FLUSH:
 r = rbd_aio_flush(s->image, c);
 break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+case RBD_AIO_WRITE_ZEROES:
+r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
+break;
+#endif
 default:
 r = -EINVAL;
 }
@@ -880,6 +893,19 @@ static int coroutine_fn 
qemu_rbd_co_pdiscard(BlockDriverState *bs,
 return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
 }
 
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+  int count, BdrvRequestFlags flags)
+{
+if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+return -ENOTSUP;
+}
+return qemu_rbd_start_co(bs, offset, count, NULL, flags,
+ RBD_AIO_WRITE_ZEROES);
+}
+#endif
+
 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
 BDRVRBDState *s = bs->opaque;
@@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = {
 .bdrv_co_pwritev= qemu_rbd_co_pwritev,
 .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
 .bdrv_co_pdiscard   = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+.bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
+#endif
 
 .bdrv_snapshot_create   = qemu_rbd_snap_create,
 .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
-- 
2.17.1





[PATCH 4/7] block/rbd: add bdrv_{attach,detach}_aio_context

2020-12-27 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index a2da70e37f..27b232f4d8 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -91,6 +91,7 @@ typedef struct BDRVRBDState {
 char *namespace;
 uint64_t image_size;
 uint64_t object_size;
+AioContext *aio_context;
 } BDRVRBDState;
 
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -749,6 +750,8 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 }
 
+s->aio_context = bdrv_get_aio_context(bs);
+
 /* When extending regular files, we get zeros from the OS */
 bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
 
@@ -839,8 +842,7 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB 
*rcb)
 rcb->ret = rbd_aio_get_return_value(c);
 rbd_aio_release(c);
 
-replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
- rbd_finish_bh, rcb);
+replay_bh_schedule_oneshot_event(acb->s->aio_context, rbd_finish_bh, rcb);
 }
 
 static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
@@ -1151,6 +1153,18 @@ static const char *const qemu_rbd_strong_runtime_opts[] 
= {
 NULL
 };
 
+static void qemu_rbd_attach_aio_context(BlockDriverState *bs,
+   AioContext *new_context)
+{
+BDRVRBDState *s = bs->opaque;
+s->aio_context = new_context;
+}
+
+static void qemu_rbd_detach_aio_context(BlockDriverState *bs)
+{
+
+}
+
 static BlockDriver bdrv_rbd = {
 .format_name= "rbd",
 .instance_size  = sizeof(BDRVRBDState),
@@ -1180,6 +1194,9 @@ static BlockDriver bdrv_rbd = {
 .bdrv_snapshot_goto = qemu_rbd_snap_rollback,
 .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
 
+.bdrv_attach_aio_context  = qemu_rbd_attach_aio_context,
+.bdrv_detach_aio_context  = qemu_rbd_detach_aio_context,
+
 .strong_runtime_opts= qemu_rbd_strong_runtime_opts,
 };
 
-- 
2.17.1





[PATCH 7/7] block/rbd: change request alignment to 1 byte

2020-12-27 Thread Peter Lieven
since we implement byte interfaces and librbd supports aio on byte granularity 
we can lift
the 512 byte alignment.

Signed-off-by: Peter Lieven 
---
 block/rbd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 27b4404adf..8673e8f553 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -223,8 +223,6 @@ done:
 static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 BDRVRBDState *s = bs->opaque;
-/* XXX Does RBD support AIO on less than 512-byte alignment? */
-bs->bl.request_alignment = 512;
 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
 bs->bl.pwrite_zeroes_alignment = s->object_size;
 #endif
-- 
2.17.1





[PATCH 3/7] block/rbd: use stored image_size in qemu_rbd_getlength

2020-12-27 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index bc8cf8af9b..a2da70e37f 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -956,15 +956,7 @@ static int qemu_rbd_getinfo(BlockDriverState *bs, 
BlockDriverInfo *bdi)
 static int64_t qemu_rbd_getlength(BlockDriverState *bs)
 {
 BDRVRBDState *s = bs->opaque;
-rbd_image_info_t info;
-int r;
-
-r = rbd_stat(s->image, , sizeof(info));
-if (r < 0) {
-return r;
-}
-
-return info.size;
+return s->image_size;
 }
 
 static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
-- 
2.17.1





[PATCH 2/7] block/rbd: store object_size in BDRVRBDState

2020-12-27 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 650e27c351..bc8cf8af9b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -90,6 +90,7 @@ typedef struct BDRVRBDState {
 char *snap;
 char *namespace;
 uint64_t image_size;
+uint64_t object_size;
 } BDRVRBDState;
 
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@@ -663,6 +664,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 const QDictEntry *e;
 Error *local_err = NULL;
 char *keypairs, *secretid;
+rbd_image_info_t info;
 int r;
 
 keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
@@ -727,13 +729,15 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto failed_open;
 }
 
-r = rbd_get_size(s->image, >image_size);
+r = rbd_stat(s->image, , sizeof(info));
 if (r < 0) {
-error_setg_errno(errp, -r, "error getting image size from %s",
+error_setg_errno(errp, -r, "error getting image info from %s",
  s->image_name);
 rbd_close(s->image);
 goto failed_open;
 }
+s->image_size = info.size;
+s->object_size = info.obj_size;
 
 /* If we are using an rbd snapshot, we must be r/o, otherwise
  * leave as-is */
@@ -945,15 +949,7 @@ static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
 BDRVRBDState *s = bs->opaque;
-rbd_image_info_t info;
-int r;
-
-r = rbd_stat(s->image, , sizeof(info));
-if (r < 0) {
-return r;
-}
-
-bdi->cluster_size = info.obj_size;
+bdi->cluster_size = s->object_size;
 return 0;
 }
 
-- 
2.17.1





[PATCH 1/7] block/rbd: bump librbd requirement to luminous release

2020-12-27 Thread Peter Lieven
even luminous (version 12.2) is unmaintained for over 3 years now.
Bump the requirement to get rid of the ifdef'ry in the code.

Signed-off-by: Peter Lieven 
---
 block/rbd.c | 120 
 configure   |   7 +--
 2 files changed, 12 insertions(+), 115 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 9bd2bce716..650e27c351 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -55,24 +55,10 @@
  * leading "\".
  */
 
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
 #define RBD_MAX_SNAPS 100
 
-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
-
 typedef enum {
 RBD_AIO_READ,
 RBD_AIO_WRITE,
@@ -84,7 +70,6 @@ typedef struct RBDAIOCB {
 BlockAIOCB common;
 int64_t ret;
 QEMUIOVector *qiov;
-char *bounce;
 RBDAIOCmd cmd;
 int error;
 struct BDRVRBDState *s;
@@ -94,7 +79,6 @@ typedef struct RADOSCB {
 RBDAIOCB *acb;
 struct BDRVRBDState *s;
 int64_t size;
-char *buf;
 int64_t ret;
 } RADOSCB;
 
@@ -332,13 +316,9 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const 
char *keypairs_json,
 
 static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
 {
-if (LIBRBD_USE_IOVEC) {
-RBDAIOCB *acb = rcb->acb;
-iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-   acb->qiov->size - offs);
-} else {
-memset(rcb->buf + offs, 0, rcb->size - offs);
-}
+RBDAIOCB *acb = rcb->acb;
+iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
+   acb->qiov->size - offs);
 }
 
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
@@ -493,13 +473,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 
 g_free(rcb);
 
-if (!LIBRBD_USE_IOVEC) {
-if (acb->cmd == RBD_AIO_READ) {
-qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-}
-qemu_vfree(acb->bounce);
-}
-
 acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
 
 qemu_aio_unref(acb);
@@ -866,28 +839,6 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB 
*rcb)
  rbd_finish_bh, rcb);
 }
 
-static int rbd_aio_discard_wrapper(rbd_image_t image,
-   uint64_t off,
-   uint64_t len,
-   rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
-return rbd_aio_discard(image, off, len, comp);
-#else
-return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
- rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-return rbd_aio_flush(image, comp);
-#else
-return -ENOTSUP;
-#endif
-}
-
 static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
  int64_t off,
  QEMUIOVector *qiov,
@@ -910,21 +861,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 
 rcb = g_new(RADOSCB, 1);
 
-if (!LIBRBD_USE_IOVEC) {
-if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-acb->bounce = NULL;
-} else {
-acb->bounce = qemu_try_blockalign(bs, qiov->size);
-if (acb->bounce == NULL) {
-goto failed;
-}
-}
-if (cmd == RBD_AIO_WRITE) {
-qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-}
-rcb->buf = acb->bounce;
-}
-
 acb->ret = 0;
 acb->error = 0;
 acb->s = s;
@@ -938,7 +874,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 }
 
 switch (cmd) {
-case RBD_AIO_WRITE: {
+case RBD_AIO_WRITE:
 /*
  * RBD APIs don't allow us to write more than actual size, so in order
  * to support growing images, we resize the image before write
@@ -950,25 +886,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 goto failed_completion;
 }
 }
-#ifdef LIBRBD_SUPPORTS_IOVEC
-r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
-r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
+r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
 break;
-}
 case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
-r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
 break;
 case RBD_AIO_DISCARD:
-r = rbd_aio_discard_wrapper(s->image, off, size, c);
+r = rbd_aio_discard(s->image, off, size, c);
 

[PATCH 5/7] block/rbd: migrate from aio to coroutines

2020-12-27 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 247 ++--
 1 file changed, 84 insertions(+), 163 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 27b232f4d8..2d77d0007f 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -66,22 +66,6 @@ typedef enum {
 RBD_AIO_FLUSH
 } RBDAIOCmd;
 
-typedef struct RBDAIOCB {
-BlockAIOCB common;
-int64_t ret;
-QEMUIOVector *qiov;
-RBDAIOCmd cmd;
-int error;
-struct BDRVRBDState *s;
-} RBDAIOCB;
-
-typedef struct RADOSCB {
-RBDAIOCB *acb;
-struct BDRVRBDState *s;
-int64_t size;
-int64_t ret;
-} RADOSCB;
-
 typedef struct BDRVRBDState {
 rados_t cluster;
 rados_ioctx_t io_ctx;
@@ -94,6 +78,13 @@ typedef struct BDRVRBDState {
 AioContext *aio_context;
 } BDRVRBDState;
 
+typedef struct RBDTask {
+BDRVRBDState *s;
+Coroutine *co;
+bool complete;
+int64_t ret;
+} RBDTask;
+
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
 BlockdevOptionsRbd *opts, bool cache,
 const char *keypairs, const char *secretid,
@@ -316,13 +307,6 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const 
char *keypairs_json,
 return ret;
 }
 
-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
-{
-RBDAIOCB *acb = rcb->acb;
-iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-   acb->qiov->size - offs);
-}
-
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
 static int qemu_rbd_do_create(BlockdevCreateOptions *options,
   const char *keypairs, const char 
*password_secret,
@@ -440,46 +424,6 @@ exit:
 return ret;
 }
 
-/*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
- */
-static void qemu_rbd_complete_aio(RADOSCB *rcb)
-{
-RBDAIOCB *acb = rcb->acb;
-int64_t r;
-
-r = rcb->ret;
-
-if (acb->cmd != RBD_AIO_READ) {
-if (r < 0) {
-acb->ret = r;
-acb->error = 1;
-} else if (!acb->error) {
-acb->ret = rcb->size;
-}
-} else {
-if (r < 0) {
-qemu_rbd_memset(rcb, 0);
-acb->ret = r;
-acb->error = 1;
-} else if (r < rcb->size) {
-qemu_rbd_memset(rcb, r);
-if (!acb->error) {
-acb->ret = rcb->size;
-}
-} else if (!acb->error) {
-acb->ret = r;
-}
-}
-
-g_free(rcb);
-
-acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-
-qemu_aio_unref(acb);
-}
-
 static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
 {
 const char **vals;
@@ -817,88 +761,49 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t 
size)
 return 0;
 }
 
-static const AIOCBInfo rbd_aiocb_info = {
-.aiocb_size = sizeof(RBDAIOCB),
-};
-
-static void rbd_finish_bh(void *opaque)
+static void qemu_rbd_finish_bh(void *opaque)
 {
-RADOSCB *rcb = opaque;
-qemu_rbd_complete_aio(rcb);
+RBDTask *task = opaque;
+task->complete = 1;
+aio_co_wake(task->co);
 }
 
-/*
- * This is the callback function for rbd_aio_read and _write
- *
- * Note: this function is being called from a non qemu thread so
- * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
- */
-static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
+static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
 {
-RBDAIOCB *acb = rcb->acb;
-
-rcb->ret = rbd_aio_get_return_value(c);
+task->ret = rbd_aio_get_return_value(c);
 rbd_aio_release(c);
-
-replay_bh_schedule_oneshot_event(acb->s->aio_context, rbd_finish_bh, rcb);
+aio_bh_schedule_oneshot(task->s->aio_context, qemu_rbd_finish_bh, task);
 }
 
-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
- int64_t off,
- QEMUIOVector *qiov,
- int64_t size,
- BlockCompletionFunc *cb,
- void *opaque,
- RBDAIOCmd cmd)
+static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
+  uint64_t offset,
+  uint64_t bytes,
+  QEMUIOVector *qiov,
+  int flags,
+  RBDAIOCmd cmd)
 {
-RBDAIOCB *acb;
-RADOSCB *rcb = NULL;
-rbd_completion_t c;
-int r;
-
 BDRVRBDState *s = bs->opaque;
+RBDTask task = { .s = s, .co = qemu_coroutine_self() };
+rbd_completion_t c;
+int r, ret = -EIO;
 
-acb = qemu_aio_get(_aiocb_info, bs, cb, opaque);
-acb->cmd = cmd;
-

[PATCH 0/7] block/rbd: migrate to coroutines and add write zeroes support

2020-12-27 Thread Peter Lieven
this series migrates the qemu rbd driver from the old aio emulation
to native coroutines and adds write zeroes support which is important
for block operations.

To archive this we first bump the librbd requirement to the already
outdated luminous release of ceph to get rid of some wrappers and
ifdef'ry in the code.

Peter Lieven (7):
  block/rbd: bump librbd requirement to luminous release
  block/rbd: store object_size in BDRVRBDState
  block/rbd: use stored image_size in qemu_rbd_getlength
  block/rbd: add bdrv_{attach,detach}_aio_context
  block/rbd: migrate from aio to coroutines
  block/rbd: add write zeroes support
  block/rbd: change request alignment to 1 byte

 block/rbd.c | 421 +---
 configure   |   7 +-
 2 files changed, 139 insertions(+), 289 deletions(-)

-- 
2.17.1