This patch adds ability to reload ceph configuration for an attached RBD block device. This is necessary for the cases where rebooting a VM and/or detaching-reattaching a RBD drive is not an easy option.
The reload mechanism relies on the bdrv_reopen_* calls to provide a transactional guarantee (using 2PC) for pulling in new configuration parameters. In the _prepare phase we do the grunt-work of creating and establishing new connection and open another instance of the same RBD image. If any issues are observed while creating a connection using the new parameters we _abort the reload. The original connection to the cluster is kept available and all ongoing I/O on it should be fine. Once the _prepare phase completes successfully we enter the _commit phase. In this phase we simple move the I/O over to the new fd for the corresponding image we have already created in the _prepare phase and reclaim the old rados I/O context and connection. It is important to note that because we want to use this feature when a QEMU VM is already running, we need to switch the logic to have values in ceph.conf override the ones present in the -drive file=* string in order for new changes to take place, for same keys present in both places. Signed-off-by: Vaibhav Bhembre <vaib...@digitalocean.com> diff --git a/block/rbd.c b/block/rbd.c index 0a5840d..100f398 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -930,6 +930,125 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, return snap_count; } +static int qemu_rbd_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRBDState *new_s; + rados_t c; + rados_ioctx_t io_ctx; + char pool[RBD_MAX_POOL_NAME_SIZE]; + char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; + char conf[RBD_MAX_CONF_SIZE]; + char clientname_buf[RBD_MAX_CONF_VAL_SIZE]; + char *clientname; + int r; + + new_s = reopen_state->opaque = g_new0(BDRVRBDState, 1); + + r = qemu_rbd_parsename(reopen_state->bs->filename, + pool, sizeof pool, + snap_buf, sizeof snap_buf, + new_s->name, sizeof new_s->name, + conf, sizeof conf, + errp); + if (r < 0) { + return r; + } + + if (snap_buf[0] != '\0') { + new_s->snap = g_strdup(snap_buf); + } + + clientname = qemu_rbd_parse_clientname(conf, clientname_buf); + r = rados_create(&c, clientname); + if (r < 0) { + error_setg_errno(errp, -r, "error creating cluster from config"); + return r; + } + new_s->cluster = c; + + if (conf[0] != '\0') { + r = qemu_rbd_set_conf(c, conf, false, errp); + if (r < 0) { + error_setg_errno(errp, -r, "error setting config"); + return r; + } + } + + if (strstr(conf, "conf=") == NULL) { + r = rados_conf_read_file(c, NULL); + } else if (conf[0] != '\0') { + r = qemu_rbd_set_conf(c, conf, true, errp); + } + + if (r < 0) { + error_setg_errno(errp, -r, "error parsing config"); + return r; + } + + r = rados_connect(c); + if (r < 0) { + error_setg_errno(errp, -r, "error connecting"); + return r; + } + + r = rados_ioctx_create(c, pool, &io_ctx); + if (r < 0) { + error_setg_errno(errp, -r, "error creating ioctx"); + return r; + } + new_s->io_ctx = io_ctx; + + r = rbd_open(io_ctx, new_s->name, &new_s->image, new_s->snap); + if (r < 0) { + error_setg_errno(errp, -r, "error opening rbd"); + return r; + } + + return 0; +} + +static void qemu_rbd_reopen_abort(BDRVReopenState *reopen_state) +{ + BDRVRBDState *new_s = reopen_state->opaque; + + if (new_s->io_ctx) { + rados_ioctx_destroy(new_s->io_ctx); + } + + if (new_s->cluster) { + rados_shutdown(new_s->cluster); + } + + g_free(new_s->snap); + g_free(reopen_state->opaque); + reopen_state->opaque = NULL; +} + +static void qemu_rbd_reopen_commit(BDRVReopenState *reopen_state) +{ + BDRVRBDState *s, *new_s; + + s = reopen_state->bs->opaque; + new_s = reopen_state->opaque; + + rados_aio_flush(s->io_ctx); + + rbd_close(s->image); + rados_ioctx_destroy(s->io_ctx); + g_free(s->snap); + rados_shutdown(s->cluster); + + s->io_ctx = new_s->io_ctx; + s->cluster = new_s->cluster; + s->image = new_s->image; + s->snap = new_s->snap; + reopen_state->bs->read_only = (s->snap != NULL); + + g_free(reopen_state->opaque); + reopen_state->opaque = NULL; +} + #ifdef LIBRBD_SUPPORTS_DISCARD static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs, int64_t sector_num, @@ -989,6 +1108,9 @@ static BlockDriver bdrv_rbd = { .create_opts = &qemu_rbd_create_opts, .bdrv_getlength = qemu_rbd_getlength, .bdrv_truncate = qemu_rbd_truncate, + .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, + .bdrv_reopen_commit = qemu_rbd_reopen_commit, + .bdrv_reopen_abort = qemu_rbd_reopen_abort, .protocol_name = "rbd", .bdrv_aio_readv = qemu_rbd_aio_readv, diff --git a/hmp-commands.hx b/hmp-commands.hx index 98b4b1a..3d06dc0 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1759,3 +1759,17 @@ ETEXI STEXI @end table ETEXI + + + { + .name = "reload-rbd-config", + .args_type = "node:s", + .params = "node", + .help = "reload rbd ceph config live", + .mhandler.cmd = hmp_reload_rbd_config, + }, + +STEXI +@item reload rbd config +Reload ceph config for RBD image. +ETEXI diff --git a/hmp.c b/hmp.c index 0cf5baa..4cf0036 100644 --- a/hmp.c +++ b/hmp.c @@ -2474,3 +2474,16 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) qapi_free_HotpluggableCPUList(saved); } + +void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict) +{ + const char *node = qdict_get_str(qdict, "node"); + Error *err = NULL; + + qmp_reload_rbd_config(node, &err); + if (err) { + monitor_printf(mon, "%s\n", error_get_pretty(err)); + error_free(err); + return; + } +} diff --git a/hmp.h b/hmp.h index f5d9749..8d2edf7 100644 --- a/hmp.h +++ b/hmp.h @@ -133,5 +133,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict); void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict); void hmp_info_dump(Monitor *mon, const QDict *qdict); void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict); +void hmp_reload_rbd_config(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index d2d6506..83921d6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4317,3 +4317,16 @@ # Since: 2.7 ## { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] } + +## +# @reload-rbd-config +# +# Reload the ceph config for a given RBD block device attached to the VM. +# +# @node: Name of the node. +# +# Returns: nothing on success. +# +# Since: 2.7 +## +{'command': 'reload-rbd-config', 'data': { 'node': 'str' } } diff --git a/qmp-commands.hx b/qmp-commands.hx index 6937e83..906b1fe 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -4983,3 +4983,24 @@ Example for pseries machine type started with { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core", "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"} ]}' + +EQMP + + { + .name = "reload-rbd-config", + .args_type = "node:s", + .mhandler.cmd_new = qmp_marshal_reload_rbd_config, + }, + +SQMP +reload-rbd-config +----------------------------------------- + +Reload the ceph config for an RBD block device. + +Arguments: None. + +Example: + +-> { "execute": "reload-rbd-config", "arguments": { "node": "drive-virtio-disk0" } } +<- { "return": {} } diff --git a/qmp.c b/qmp.c index b6d531e..1ff81d6 100644 --- a/qmp.c +++ b/qmp.c @@ -707,3 +707,34 @@ ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp) return head; } + +void qmp_reload_rbd_config(const char *node, Error **errp) +{ + BlockBackend *blk; + BlockDriverState *bs; + Error *local_err = NULL; + int ret; + + blk = blk_by_name(node); + if (!blk) { + error_setg(errp, QERR_INVALID_PARAMETER, "node"); + return; + } + + bs = blk_bs(blk); + if (!bs) { + error_setg(errp, "no BDS found"); + return; + } + + ret = bdrv_reopen(bs, bdrv_get_flags(bs), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (ret) { + error_setg_errno(errp, -ret, "failed reopening node"); + return; + } +} -- 1.9.1