On Mon, Jun 01, 2026 at 11:44:02PM +0200, Sam Li wrote: > To configure the zoned format feature on the qcow2 driver, it > requires settings as: the device size, zone model, zone size, > zone capacity, number of conventional zones, limits on zone > resources (max append bytes, max open zones, and max_active_zones). > > To create a qcow2 image with zoned format feature, use command like > this: > qemu-img create -f qcow2 zbc.qcow2 -o size=768M \ > -o zone.size=64M -o zone.capacity=64M -o zone.conventional_zones=0 \ > -o zone.max_append_bytes=4096 -o zone.max_open_zones=6 \ > -o zone.max_active_zones=8 -o zone.mode=host-managed > > Signed-off-by: Sam Li <[email protected]> > --- > block/file-posix.c | 2 +- > block/qcow2.c | 329 ++++++++++++++++++++++++++++++- > block/qcow2.h | 83 +++++++- > docs/interop/qcow2.rst | 117 ++++++++++- > include/block/block_int-common.h | 15 +- > qapi/block-core.json | 91 ++++++++- > 6 files changed, 628 insertions(+), 9 deletions(-) > > diff --git a/block/file-posix.c b/block/file-posix.c > index e49b13d6ab..14278785b9 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -3607,7 +3607,7 @@ raw_co_zone_append(BlockDriverState *bs, > > if (*offset & zone_size_mask) { > error_report("sector offset %" PRId64 " is not aligned to zone size " > - "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512); > + "%" PRId64 "", *offset / 512, bs->bl.zone_size / 512); > return -EINVAL; > } > > diff --git a/block/qcow2.c b/block/qcow2.c > index 81fd299b4c..29eec33e34 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -73,6 +73,7 @@ typedef struct { > #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 > #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 > #define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441 > +#define QCOW2_EXT_MAGIC_ZONED_FORMAT 0x007a6264 > > static int coroutine_fn > qcow2_co_preadv_compressed(BlockDriverState *bs, > @@ -194,6 +195,93 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char > *fmt, Error **errp) > return cryptoopts_qdict; > } > > +/* > + * Passing by the zoned device configurations by a zoned_header struct, check > + * if the zone device options are under constraints. Return false when some > + * option is invalid > + */
I have trouble parsing the first sentence. Maybe replace this doc
comment with "Returns true if zone_opt is valid, false otherwise"?
> +static bool
> +qcow2_check_zone_options(Qcow2ZonedHeaderExtension *zone_opt, Error **errp)
> +{
> + uint32_t sequential_zones;
> +
> + assert(zone_opt != NULL);
> +
> + if (zone_opt->zoned != QCOW2_Z_NONE && zone_opt->zoned != QCOW2_Z_HM) {
> + error_setg(errp, "Zoned extension header zoned field has unknown "
> + "value %" PRIu8, zone_opt->zoned);
> + return false;
> + }
> +
> + if (!is_power_of_2(zone_opt->zone_size)) {
> + error_setg(errp, "Zoned extension header zone_size %" PRIu64
> + "B is not a power of 2", zone_opt->zone_size);
> + return false;
> + }
> +
> + if (zone_opt->nr_zones > UINT32_MAX / 8) {
> + error_setg(errp, "Zoned extension header nr_zones %" PRIu32
> + " exceeds maximum %u",
> + zone_opt->nr_zones, UINT32_MAX / 8);
> + return false;
> + }
> +
> + if (zone_opt->zone_capacity > zone_opt->zone_size) {
> + error_setg(errp, "zone capacity %" PRIu64 "B exceeds zone size "
> + "%" PRIu64 "B", zone_opt->zone_capacity,
> + zone_opt->zone_size);
> + return false;
> + }
> +
> + if (!QEMU_IS_ALIGNED(zone_opt->max_append_bytes, BDRV_SECTOR_SIZE)) {
> + error_setg(errp, "max append bytes %" PRIu32 "B is not aligned "
> + "to %" PRIu64, zone_opt->max_append_bytes,
> + (uint64_t)BDRV_SECTOR_SIZE);
> + return false;
> + }
> +
> + if (zone_opt->max_append_bytes + BDRV_SECTOR_SIZE >=
> + zone_opt->zone_capacity) {
> + error_setg(errp, "max append bytes %" PRIu32 "B exceeds zone "
> + "capacity %" PRIu64 "B by more than block size",
> + zone_opt->max_append_bytes, zone_opt->zone_capacity);
> + return false;
> + }
> +
> + if (zone_opt->conventional_zones >= zone_opt->nr_zones) {
> + error_setg(errp, "Conventional_zones %" PRIu32 " exceeds "
> + "nr_zones %" PRIu32 ".",
> + zone_opt->conventional_zones, zone_opt->nr_zones);
> + return false;
> + }
> +
> + if (zone_opt->max_active_zones > zone_opt->nr_zones) {
> + error_setg(errp, "Max_active_zones %" PRIu32 " exceeds "
> + "nr_zones %" PRIu32 ". Set it to nr_zones.",
> + zone_opt->max_active_zones, zone_opt->nr_zones);
> + zone_opt->max_active_zones = zone_opt->nr_zones;
> + }
errp is an error that must be handled by the caller, but the function
will return true. The caller may not notice the Error object and it may
be leaked.
Another issue with this approach is that error_setg(errp, "a");
error_setg(errp, "b"); is not allowed (see assert(*errp == NULL) in
error_setv()). So if two of these conditionals are taken then QEMU will
abort.
I think the intention is to print a warning rather than to set an error
here. Use warn_report() without touching errp to avoid these issues.
> +
> + sequential_zones = zone_opt->nr_zones - zone_opt->conventional_zones;
> + if (zone_opt->max_open_zones > sequential_zones) {
> + error_setg(errp, "Max_open_zones field can not be larger than"
> + "the number of SWR zones. Set it to number of SWR"
> + "zones %" PRIu32 ".", sequential_zones);
> + zone_opt->max_open_zones = sequential_zones;
> + }
> + if (zone_opt->max_active_zones != 0 &&
> + zone_opt->max_open_zones > zone_opt->max_active_zones) {
> + error_setg(errp, "Max_open_zones %" PRIu32 " exceeds "
> + "max_active_zones %" PRIu32 ". Set it to "
> + "max_active_zones.",
> + zone_opt->max_open_zones,
> + zone_opt->max_active_zones);
> + zone_opt->max_open_zones = zone_opt->max_active_zones;
> + }
> +
> + return true;
> +}
> +
> /*
> * read qcow2 extension and fill bs
> * start reading from start_offset
> @@ -211,6 +299,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t
> start_offset,
> uint64_t offset;
> int ret;
> Qcow2BitmapHeaderExt bitmaps_ext;
> + Qcow2ZonedHeaderExtension zoned_ext;
>
> if (need_update_header != NULL) {
> *need_update_header = false;
> @@ -432,6 +521,50 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t
> start_offset,
> break;
> }
>
> + case QCOW2_EXT_MAGIC_ZONED_FORMAT:
> + {
> + if (ext.len != sizeof(zoned_ext)) {
> + error_setg(errp, "zoned_ext: unexpected len=%" PRIu32 " "
> + "(expected %zu)", ext.len, sizeof(zoned_ext));
> + return -EINVAL;
> + }
> + ret = bdrv_pread(bs->file, offset, ext.len, &zoned_ext, 0);
> + if (ret < 0) {
> + error_setg_errno(errp, -ret, "zoned_ext: "
> + "Could not read ext header");
> + return ret;
> + }
> +
> + zoned_ext.zone_size = be64_to_cpu(zoned_ext.zone_size);
> + zoned_ext.zone_capacity = be64_to_cpu(zoned_ext.zone_capacity);
> + zoned_ext.conventional_zones =
> + be32_to_cpu(zoned_ext.conventional_zones);
> + zoned_ext.nr_zones = be32_to_cpu(zoned_ext.nr_zones);
> + zoned_ext.max_open_zones = be32_to_cpu(zoned_ext.max_open_zones);
> + zoned_ext.max_active_zones =
> + be32_to_cpu(zoned_ext.max_active_zones);
> + zoned_ext.max_append_bytes =
> + be32_to_cpu(zoned_ext.max_append_bytes);
> + s->zoned_header = zoned_ext;
> +
> + /* refuse to open broken images */
> + if (zoned_ext.nr_zones != DIV_ROUND_UP(bs->total_sectors *
> + BDRV_SECTOR_SIZE, zoned_ext.zone_size)) {
This is not safe when validating untrusted qcow2 files because
zoned_ext.zone_size could be 0 (division-by-zero kills the process) and
bs->total_sectors * BDRV_SECTOR_SIZE can integer overflow (unlikely
because the file would have to be extremely large).
Can you express this in a way that is safe? If necessary, split it into
multiple checks.
> + error_setg(errp, "Zoned extension header nr_zones field "
> + "is wrong");
> + return -EINVAL;
> + }
> + if (!qcow2_check_zone_options(&zoned_ext, errp)) {
> + return -EINVAL;
> + }
> +
> +#ifdef DEBUG_EXT
> + printf("Qcow2: Got zoned format extension: "
> + "offset=%" PRIu32 "\n", offset);
> +#endif
> + break;
> + }
> +
> default:
> /* unknown magic - save it in case we need to rewrite the header
> */
> /* If you add a new feature, make sure to also update the fast
> @@ -2068,6 +2201,25 @@ static void qcow2_refresh_limits(BlockDriverState *bs,
> Error **errp)
> }
> bs->bl.pwrite_zeroes_alignment = s->subcluster_size;
> bs->bl.pdiscard_alignment = s->cluster_size;
> +
> + switch (s->zoned_header.zoned) {
> + case QCOW2_Z_HM:
> + bs->bl.zoned = BLK_Z_HM;
> + break;
> + case QCOW2_Z_NONE:
> + default:
> + bs->bl.zoned = BLK_Z_NONE;
> + break;
> + }
> +
> + bs->bl.nr_zones = s->zoned_header.nr_zones;
> + bs->bl.max_append_sectors = s->zoned_header.max_append_bytes
> + >> BDRV_SECTOR_BITS;
> + bs->bl.max_active_zones = s->zoned_header.max_active_zones;
> + bs->bl.max_open_zones = s->zoned_header.max_open_zones;
> + bs->bl.zone_size = s->zoned_header.zone_size;
> + bs->bl.zone_capacity = s->zoned_header.zone_capacity;
> + bs->bl.write_granularity = BDRV_SECTOR_SIZE;
> }
>
> static int GRAPH_UNLOCKED
> @@ -3170,6 +3322,11 @@ int qcow2_update_header(BlockDriverState *bs)
> .bit = QCOW2_INCOMPAT_EXTL2_BITNR,
> .name = "extended L2 entries",
> },
> + {
> + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
> + .bit = QCOW2_INCOMPAT_ZONED_FORMAT_BITNR,
> + .name = "zoned format",
> + },
> {
> .type = QCOW2_FEAT_TYPE_COMPATIBLE,
> .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
> @@ -3215,6 +3372,31 @@ int qcow2_update_header(BlockDriverState *bs)
> buflen -= ret;
> }
>
> + /* Zoned devices header extension */
> + if (s->zoned_header.zoned == QCOW2_Z_HM) {
> + Qcow2ZonedHeaderExtension zoned_header = {
> + .zoned = s->zoned_header.zoned,
> + .zone_size = cpu_to_be64(s->zoned_header.zone_size),
> + .zone_capacity = cpu_to_be64(s->zoned_header.zone_capacity),
> + .conventional_zones =
> + cpu_to_be32(s->zoned_header.conventional_zones),
> + .nr_zones = cpu_to_be32(s->zoned_header.nr_zones),
> + .max_open_zones =
> cpu_to_be32(s->zoned_header.max_open_zones),
> + .max_active_zones =
> + cpu_to_be32(s->zoned_header.max_active_zones),
> + .max_append_bytes =
> + cpu_to_be32(s->zoned_header.max_append_bytes)
> + };
> + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT,
> + &zoned_header, sizeof(zoned_header),
> + buflen);
> + if (ret < 0) {
> + goto fail;
> + }
> + buf += ret;
> + buflen -= ret;
> + }
> +
> /* Keep unknown header extensions */
> QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
> ret = header_ext_add(buf, uext->magic, uext->data, uext->len,
> buflen);
> @@ -3589,6 +3771,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options,
> Error **errp)
> ERRP_GUARD();
> BlockdevCreateOptionsQcow2 *qcow2_opts;
> QDict *options;
> + Qcow2ZoneCreateOptions *zone_struct;
> + Qcow2ZoneHostManaged *zone_host_managed;
>
> /*
> * Open the image file and write a minimal qcow2 header.
> @@ -3615,6 +3799,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options,
> Error **errp)
>
> assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
> qcow2_opts = &create_options->u.qcow2;
> + zone_struct = create_options->u.qcow2.zone;
A slightly more concise way of writing this:
zone_struct = qcow2_opts->zone;
> + zone_host_managed = &create_options->u.qcow2.zone->u.host_managed;
This must be moved down to avoid deferencing a NULL
create_options->u.qcow2.zone pointer:
if (zone_struct && zone_struct->mode == QCOW2_ZONE_MODEL_HOST_MANAGED) {
Qcow2ZoneHostManaged *zone_host_managed = &zone_struct->u.host_managed;
signature.asc
Description: PGP signature
