Benoît Canet <benoit.ca...@irqsave.net> writes: > The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote : >> A block device consists of a frontend device model and a backend. >> >> A block backend has a tree of block drivers doing the actual work. >> The tree is managed by the block layer. >> >> We currently use a single abstraction BlockDriverState both for tree >> nodes and the backend as a whole. Drawbacks: >> >> * Its API includes both stuff that makes sense only at the block >> backend level (root of the tree) and stuff that's only for use >> within the block layer. This makes the API bigger and more complex >> than necessary. Moreover, it's not obvious which interfaces are >> meant for device models, and which really aren't. >> >> * Since device models keep a reference to their backend, the backend >> object can't just be destroyed. But for media change, we need to >> replace the tree. Our solution is to make the BlockDriverState >> generic, with actual driver state in a separate object, pointed to >> by member opaque. That lets us replace the tree by deinitializing >> and reinitializing its root. This special need of the root makes >> the data structure awkward everywhere in the tree. >> >> The general plan is to separate the APIs into "block backend", for use >> by device models, monitor and whatever other code dealing with block >> backends, and "block driver", for use by the block layer and whatever >> other code (if any) dealing with trees and tree nodes. >> >> Code dealing with block backends, device models in particular, should >> become completely oblivious of BlockDriverState. This should let us >> clean up both APIs, and the tree data structures. >> >> This commit is a first step. It creates a minimal "block backend" >> API: type BlockBackend and functions to create, destroy and find them. >> BlockBackend objects are created and destroyed, but not yet used for >> anything; that'll come shortly. >> >> BlockBackend is reference-counted. Its reference count never exceeds >> one so far, but that's going to change. >> >> Signed-off-by: Markus Armbruster <arm...@redhat.com> >> --- >> block/Makefile.objs | 2 +- >> block/block-backend.c | 110 >> +++++++++++++++++++++++++++++++++++++++++ >> blockdev.c | 10 +++- >> hw/block/xen_disk.c | 11 +++++ >> include/qemu/typedefs.h | 1 + >> include/sysemu/block-backend.h | 26 ++++++++++ >> qemu-img.c | 46 +++++++++++++++++ >> qemu-io.c | 8 +++ >> qemu-nbd.c | 3 +- >> 9 files changed, 214 insertions(+), 3 deletions(-) >> create mode 100644 block/block-backend.c >> create mode 100644 include/sysemu/block-backend.h >> >> diff --git a/block/Makefile.objs b/block/Makefile.objs >> index f45f939..a70140b 100644 >> --- a/block/Makefile.objs >> +++ b/block/Makefile.objs >> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o >> block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o >> block-obj-$(CONFIG_QUORUM) += quorum.o >> block-obj-y += parallels.o blkdebug.o blkverify.o >> -block-obj-y += snapshot.o qapi.o >> +block-obj-y += block-backend.o snapshot.o qapi.o >> block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o >> block-obj-$(CONFIG_POSIX) += raw-posix.o >> block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o >> diff --git a/block/block-backend.c b/block/block-backend.c >> new file mode 100644 >> index 0000000..833f7d9 >> --- /dev/null >> +++ b/block/block-backend.c >> @@ -0,0 +1,110 @@ >> +/* >> + * QEMU Block backends >> + * >> + * Copyright (C) 2014 Red Hat, Inc. >> + * >> + * Authors: >> + * Markus Armbruster <arm...@redhat.com>, >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or >> + * later. See the COPYING file in the top-level directory. >> + */ >> + >> +#include "sysemu/block-backend.h" >> +#include "block/block_int.h" >> + >> +struct BlockBackend { >> + char *name; >> + int refcnt; >> + QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */ >> +}; >> + >> +static QTAILQ_HEAD(, BlockBackend) blk_backends = >> + QTAILQ_HEAD_INITIALIZER(blk_backends); >> + >> +/** >> + * blk_new: >> + * @name: name, must not be %NULL or empty >> + * @errp: return location for an error to be set on failure, or %NULL >> + * >> + * Create a new BlockBackend, with a reference count of one. Fail if >> + * @name already exists. >> + * >> + * Returns: the BlockBackend on success, %NULL on failure >> + */ >> +BlockBackend *blk_new(const char *name, Error **errp) >> +{ >> + BlockBackend *blk = g_new0(BlockBackend, 1); >> + >> + assert(name && name[0]); >> + if (blk_by_name(name)) { >> + error_setg(errp, "Device with id '%s' already exists", name); >> + return NULL; >> + } >> + blk->name = g_strdup(name); >> + blk->refcnt = 1; >> + QTAILQ_INSERT_TAIL(&blk_backends, blk, link); >> + return blk; >> +} >> + >> +static void blk_delete(BlockBackend *blk) >> +{ >> + assert(!blk->refcnt); >> + QTAILQ_REMOVE(&blk_backends, blk, link); >> + g_free(blk->name); >> + g_free(blk); >> +} >> + >> +/** >> + * blk_ref: >> + * >> + * Increment @blk's reference count. >> + */ >> +void blk_ref(BlockBackend *blk) >> +{ >> + blk->refcnt++; >> +} >> + >> +/** >> + * blk_unref: >> + * >> + * Decrement @blk's reference count. If this drops it to zero, >> + * destroy @blk. >> + */ >> +void blk_unref(BlockBackend *blk) >> +{ >> + if (blk) { >> + g_assert(blk->refcnt > 0); >> + if (!--blk->refcnt) { >> + blk_delete(blk); >> + } >> + } >> +} >> + >> +const char *blk_name(BlockBackend *blk) >> +{ >> + return blk->name; >> +} >> + >> +BlockBackend *blk_by_name(const char *name) >> +{ >> + BlockBackend *blk; >> + >> + QTAILQ_FOREACH(blk, &blk_backends, link) { >> + if (!strcmp(name, blk->name)) { >> + return blk; >> + } >> + } >> + return NULL; >> +} >> + >> +/** >> + * blk_next: >> + * >> + * Returns: the first BlockBackend if @blk is null, else @blk's next >> + * sibling, which is %NULL for the last BlockBackend >> + */ >> +BlockBackend *blk_next(BlockBackend *blk) >> +{ >> + return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends); >> +} >> diff --git a/blockdev.c b/blockdev.c >> index 9fbd888..86596bc 100644 >> --- a/blockdev.c >> +++ b/blockdev.c >> @@ -30,6 +30,7 @@ >> * THE SOFTWARE. >> */ >> >> +#include "sysemu/block-backend.h" >> #include "sysemu/blockdev.h" >> #include "hw/block/block.h" >> #include "block/blockjob.h" >> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo) >> } >> >> bdrv_unref(dinfo->bdrv); >> + blk_unref(blk_by_name(dinfo->id)); >> g_free(dinfo->id); >> QTAILQ_REMOVE(&drives, dinfo, next); >> g_free(dinfo->serial); >> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict >> *bs_opts, >> int ro = 0; >> int bdrv_flags = 0; >> int on_read_error, on_write_error; >> + BlockBackend *blk; >> DriveInfo *dinfo; >> ThrottleConfig cfg; >> int snapshot = 0; >> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict >> *bs_opts, >> } >> >> /* init */ >> + blk = blk_new(qemu_opts_id(opts), errp); >> + if (!blk) { >> + goto early_err; >> + } > > Here you create a new block backend. > And you don't attach it to anything in any way yet.
Yes. Right before creating the root BDS. > So down in the code the following test will leak it: > if (!file || !*file) { > > if (has_driver_specific_opts) { > > file = NULL; > > } else { > > QDECREF(bs_opts); > > qemu_opts_del(opts); > > return dinfo; > > } > > } The root BDS isn't destroyed here, and therefore the BB isn't, either. The BB will be destroyed right when the root BDS is. > I am sure one of your next patchs fixes this but for this > precise commit this do look like a leak. > >> dinfo = g_malloc0(sizeof(*dinfo)); >> dinfo->id = g_strdup(qemu_opts_id(opts)); >> dinfo->bdrv = bdrv_new_named(dinfo->id, &error); >> @@ -525,6 +532,7 @@ err: >> bdrv_new_err: >> g_free(dinfo->id); >> g_free(dinfo); >> + blk_unref(blk); >> early_err: >> qemu_opts_del(opts); >> err_no_opts: >> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, >> QObject **ret_data) >> */ >> if (bdrv_get_attached_dev(bs)) { >> bdrv_make_anon(bs); >> - >> + blk_unref(blk_by_name(id)); >> /* Further I/O must not pause the guest */ >> bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT, >> BLOCKDEV_ON_ERROR_REPORT); >> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c >> index 8bac7ff..730a021 100644 >> --- a/hw/block/xen_disk.c >> +++ b/hw/block/xen_disk.c >> @@ -39,6 +39,7 @@ >> #include "hw/xen/xen_backend.h" >> #include "xen_blkif.h" >> #include "sysemu/blockdev.h" >> +#include "sysemu/block-backend.h" >> >> /* ------------------------------------------------------------- */ >> >> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev) >> blkdev->dinfo = drive_get(IF_XEN, 0, index); >> if (!blkdev->dinfo) { >> Error *local_err = NULL; >> + BlockBackend *blk; >> BlockDriver *drv; >> >> /* setup via xenbus -> create new block driver instance */ >> xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus >> setup)\n"); >> + blk = blk_new(blkdev->dev, NULL); >> + if (!blk) { >> + return -1; >> + } >> blkdev->bs = bdrv_new_named(blkdev->dev, NULL); >> if (!blkdev->bs) { >> + blk_unref(blk); >> return -1; >> } >> >> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev) >> error_get_pretty(local_err)); >> error_free(local_err); >> bdrv_unref(blkdev->bs); >> + blk_unref(blk); >> blkdev->bs = NULL; >> return -1; >> } >> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev) >> if (blkdev->bs) { >> bdrv_detach_dev(blkdev->bs, blkdev); >> bdrv_unref(blkdev->bs); >> + if (!blkdev->dinfo) { >> + blk_unref(blk_by_name(blkdev->dev)); >> + } >> blkdev->bs = NULL; >> } >> xen_be_unbind_evtchn(&blkdev->xendev); >> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h >> index 5f20b0e..198da2e 100644 >> --- a/include/qemu/typedefs.h >> +++ b/include/qemu/typedefs.h >> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass; >> typedef struct NICInfo NICInfo; >> typedef struct HCIInfo HCIInfo; >> typedef struct AudioState AudioState; >> +typedef struct BlockBackend BlockBackend; >> typedef struct BlockDriverState BlockDriverState; >> typedef struct DriveInfo DriveInfo; >> typedef struct DisplayState DisplayState; >> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h >> new file mode 100644 >> index 0000000..3f8371c >> --- /dev/null >> +++ b/include/sysemu/block-backend.h >> @@ -0,0 +1,26 @@ >> +/* >> + * QEMU Block backends >> + * >> + * Copyright (C) 2014 Red Hat, Inc. >> + * >> + * Authors: >> + * Markus Armbruster <arm...@redhat.com>, >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or >> + * later. See the COPYING file in the top-level directory. >> + */ >> + >> +#ifndef BLOCK_BACKEND_H >> +#define BLOCK_BACKEND_H >> + >> +#include "qemu/typedefs.h" >> +#include "qapi/error.h" >> + >> +BlockBackend *blk_new(const char *name, Error **errp); >> +void blk_ref(BlockBackend *blk); >> +void blk_unref(BlockBackend *blk); >> +const char *blk_name(BlockBackend *blk); >> +BlockBackend *blk_by_name(const char *name); >> +BlockBackend *blk_next(BlockBackend *blk); >> + >> +#endif >> diff --git a/qemu-img.c b/qemu-img.c >> index 4490a22..bad3f64 100644 >> --- a/qemu-img.c >> +++ b/qemu-img.c >> @@ -29,6 +29,7 @@ >> #include "qemu/error-report.h" >> #include "qemu/osdep.h" >> #include "sysemu/sysemu.h" >> +#include "sysemu/block-backend.h" >> #include "block/block_int.h" >> #include "block/qapi.h" >> #include <getopt.h> >> @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv) >> int c, ret; >> OutputFormat output_format = OFORMAT_HUMAN; >> const char *filename, *fmt, *output, *cache; >> + BlockBackend *blk; >> BlockDriverState *bs; >> int fix = 0; >> int flags = BDRV_O_FLAGS | BDRV_O_CHECK; >> @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv) >> return 1; >> } >> > >> + blk = blk_new("image", &error_abort); > Hmm we are so sure this will work that we don't do if (!block) ? Matches what bdrv_new_open() does: bs = bdrv_new_named(id, &error_abort); As you noted further down, the tools treat these failures as programming errors. That's appropriate. >> bs = bdrv_new_open("image", filename, fmt, flags, true, quiet); >> if (!bs) { >> return 1; >> @@ -710,6 +713,7 @@ static int img_check(int argc, char **argv) >> fail: >> qapi_free_ImageCheck(check); >> bdrv_unref(bs); >> + blk_unref(blk); >> >> return ret; >> } >> @@ -718,6 +722,7 @@ static int img_commit(int argc, char **argv) >> { >> int c, ret, flags; >> const char *filename, *fmt, *cache; >> + BlockBackend *blk; >> BlockDriverState *bs; >> bool quiet = false; >> >> @@ -756,6 +761,7 @@ static int img_commit(int argc, char **argv) >> return 1; >> } >> >> + blk = blk_new("image", &error_abort); >> bs = bdrv_new_open("image", filename, fmt, flags, true, quiet); >> if (!bs) { >> return 1; >> @@ -780,6 +786,7 @@ static int img_commit(int argc, char **argv) >> } >> >> bdrv_unref(bs); >> + blk_unref(blk); >> if (ret) { >> return 1; >> } >> @@ -942,6 +949,7 @@ static int check_empty_sectors(BlockDriverState *bs, >> int64_t sect_num, >> static int img_compare(int argc, char **argv) >> { >> const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2; >> + BlockBackend *blk1, *blk2; >> BlockDriverState *bs1, *bs2; >> int64_t total_sectors1, total_sectors2; >> uint8_t *buf1 = NULL, *buf2 = NULL; >> @@ -1011,6 +1019,7 @@ static int img_compare(int argc, char **argv) >> goto out3; >> } >> >> + blk1 = blk_new("image 1", &error_abort); >> bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet); >> if (!bs1) { >> error_report("Can't open file %s", filename1); >> @@ -1018,6 +1027,7 @@ static int img_compare(int argc, char **argv) >> goto out3; >> } >> >> + blk2 = blk_new("image 2", &error_abort); >> bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet); >> if (!bs2) { >> error_report("Can't open file %s", filename2); >> @@ -1184,10 +1194,12 @@ static int img_compare(int argc, char **argv) >> >> out: >> bdrv_unref(bs2); >> + blk_unref(blk2); >> qemu_vfree(buf1); >> qemu_vfree(buf2); >> out2: >> bdrv_unref(bs1); >> + blk_unref(blk1); >> out3: >> qemu_progress_end(); >> return ret; >> @@ -1200,6 +1212,7 @@ static int img_convert(int argc, char **argv) >> int progress = 0, flags, src_flags; >> const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, >> *out_filename; >> BlockDriver *drv, *proto_drv; >> + BlockBackend **blk = NULL, *out_blk = NULL; >> BlockDriverState **bs = NULL, *out_bs = NULL; >> int64_t total_sectors, nb_sectors, sector_num, bs_offset; >> int64_t *bs_sectors = NULL; >> @@ -1354,6 +1367,7 @@ static int img_convert(int argc, char **argv) >> >> qemu_progress_print(0, 100); >> >> + blk = g_new0(BlockBackend *, bs_n); >> bs = g_new0(BlockDriverState *, bs_n); >> bs_sectors = g_new(int64_t, bs_n); >> >> @@ -1361,6 +1375,7 @@ static int img_convert(int argc, char **argv) >> for (bs_i = 0; bs_i < bs_n; bs_i++) { >> char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i) >> : g_strdup("source"); >> + blk[bs_i] = blk_new(id, &error_abort); >> bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags, >> true, quiet); >> g_free(id); >> @@ -1486,6 +1501,7 @@ static int img_convert(int argc, char **argv) >> goto out; >> } >> >> + out_blk = blk_new("target", &error_abort); >> out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, >> quiet); >> if (!out_bs) { >> ret = -1; >> @@ -1742,6 +1758,7 @@ out: >> if (out_bs) { >> bdrv_unref(out_bs); >> } >> + blk_unref(out_blk); >> if (bs) { >> for (bs_i = 0; bs_i < bs_n; bs_i++) { >> if (bs[bs_i]) { >> @@ -1750,6 +1767,12 @@ out: >> } >> g_free(bs); >> } >> + if (blk) { >> + for (bs_i = 0; bs_i < bs_n; bs_i++) { >> + blk_unref(blk[bs_i]); >> + } >> + g_free(blk); >> + } >> g_free(bs_sectors); >> fail_getopt: >> g_free(options); >> @@ -1858,6 +1881,7 @@ static ImageInfoList *collect_image_info_list(const >> char *filename, >> filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, >> NULL); >> >> while (filename) { >> + BlockBackend *blk; >> BlockDriverState *bs; >> ImageInfo *info; >> ImageInfoList *elem; >> @@ -1869,6 +1893,7 @@ static ImageInfoList *collect_image_info_list(const >> char *filename, >> } >> g_hash_table_insert(filenames, (gpointer)filename, NULL); >> >> + blk = blk_new("image", &error_abort); >> bs = bdrv_new_open("image", filename, fmt, >> BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false); >> if (!bs) { > > I think it misses an >> + blk_unref(blk); > in if(!bs) branch. Yes. Kevin noted that, too. I'll fix it. [...]