Re: [PATCH v6 3/4] qcow2: add zoned emulation capability

2023-11-26 Thread Damien Le Moal
On 11/27/23 13:37, Sam Li wrote:
> By adding zone operations and zoned metadata, the zoned emulation
> capability enables full emulation support of zoned device using
> a qcow2 file. The zoned device metadata includes zone type,
> zoned device state and write pointer of each zone, which is stored
> to an array of unsigned integers.
> 
> Each zone of a zoned device makes state transitions following
> the zone state machine. The zone state machine mainly describes
> five states, IMPLICIT OPEN, EXPLICIT OPEN, FULL, EMPTY and CLOSED.
> READ ONLY and OFFLINE states will generally be affected by device
> internal events. The operations on zones cause corresponding state
> changing.
> 
> Zoned devices have a limit on zone resources, which puts constraints on
> write operations into zones. It is managed by active zone lists
> following LRU policy.
> 
> Signed-off-by: Sam Li 
> ---
>  block/qcow2.c| 741 ++-
>  block/trace-events   |   2 +
>  include/qemu/queue.h |   1 +
>  3 files changed, 742 insertions(+), 2 deletions(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 9a92cd242c..26f2bb4a87 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -195,6 +195,179 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char 
> *fmt, Error **errp)
>  return cryptoopts_qdict;
>  }
>  
> +#define QCOW2_ZT_IS_CONV(wp)(wp & 1ULL << 59)
> +
> +/*
> + * To emulate a real zoned device, closed, empty and full states are
> + * preserved after a power cycle. Open states are in-memory and will
> + * be lost after closing the device. Read-only and offline states are
> + * device-internal events, which are not considered for simplicity.
> + */
> +static inline BlockZoneState qcow2_get_zone_state(BlockDriverState *bs,
> +  uint32_t index)
> +{
> +BDRVQcow2State *s = bs->opaque;
> +Qcow2ZoneListEntry *zone_entry = >zone_list_entries[index];
> +uint64_t zone_wp = bs->wps->wp[index];
> +uint64_t zone_start;
> +
> +if (QCOW2_ZT_IS_CONV(zone_wp)) {
> +return BLK_ZS_NOT_WP;
> +}
> +
> +if (QLIST_IS_INSERTED(zone_entry, exp_open_zone_entry)) {
> +return BLK_ZS_EOPEN;
> +}
> +if (QLIST_IS_INSERTED(zone_entry, imp_open_zone_entry)) {
> +return BLK_ZS_IOPEN;
> +}
> +
> +zone_start = index * bs->bl.zone_size;
> +if (zone_wp == zone_start) {
> +return BLK_ZS_EMPTY;
> +}
> +if (zone_wp >= zone_start + bs->bl.zone_capacity) {
> +return BLK_ZS_FULL;
> +}
> +if (zone_wp > zone_start) {
> +return BLK_ZS_CLOSED;
> +}
> +return BLK_ZS_NOT_WP;
> +}
> +
> +/*
> + * Write the new wp value to the dedicated location of the image file.
> + */
> +static int qcow2_write_wp_at(BlockDriverState *bs, uint64_t *wp,
> + uint32_t index) {
> +BDRVQcow2State *s = bs->opaque;
> +uint64_t wpv = *wp;
> +int ret;
> +
> +ret = bdrv_pwrite(bs->file, s->zoned_header.zonedmeta_offset
> ++ sizeof(uint64_t) * index, sizeof(uint64_t), wp, 0);
> +if (ret < 0) {
> +goto exit;
> +}
> +trace_qcow2_wp_tracking(index, *wp >> BDRV_SECTOR_BITS);
> +return ret;
> +
> +exit:
> +*wp = wpv;
> +error_report("Failed to write metadata with file");
> +return ret;
> +}
> +
> +static bool qcow2_can_activate_zone(BlockDriverState *bs)
> +{
> +BDRVQcow2State *s = bs->opaque;

A white line here after the declaration would be nice.

> +/* When the max active zone is zero, there is no limit on active zones */
> +if (!s->zoned_header.max_active_zones) {
> +return true;
> +}
> +
> +/* The active zones are zones with the states of open and closed */

/* Active zones are zones that are open or closed */

> +if (s->nr_zones_exp_open + s->nr_zones_imp_open + s->nr_zones_closed
> +< s->zoned_header.max_active_zones) {

return s->nr_zones_exp_open + s->nr_zones_imp_open +
s->nr_zones_closed < s->zoned_header.max_active_zones;

> +return true;
> +}
> +
> +return false;
> +}
> +
> +/*
> + * This function manages open zones under active zones limit. It checks
> + * if a zone can transition to open state while maintaining max open and
> + * active zone limits.
> + */
> +static bool qcow2_can_open_zone(BlockDriverState *bs)
> +{
> +BDRVQcow2State *s = bs->opaque;
> +Qcow2ZoneListEntry *zone_entry;
> +
> +/* When the max open zone is zero, there is no limit on open zones */
> +if (!s->zoned_header.max_open_zones) {
> +return true;
> +}
> +
> +/*
> + * The open zones are zones with the states of explicitly and
> + * implicitly open.
> + */
> +if (s->nr_zones_imp_open + s->nr_zones_exp_open <
> +s->zoned_header.max_open_zones) {
> +return true;
> +}
> +
> +/*
> + * Zones are managed once at a time. Thus, the number of implicitly open


[PATCH v6 3/4] qcow2: add zoned emulation capability

2023-11-26 Thread Sam Li
By adding zone operations and zoned metadata, the zoned emulation
capability enables full emulation support of zoned device using
a qcow2 file. The zoned device metadata includes zone type,
zoned device state and write pointer of each zone, which is stored
to an array of unsigned integers.

Each zone of a zoned device makes state transitions following
the zone state machine. The zone state machine mainly describes
five states, IMPLICIT OPEN, EXPLICIT OPEN, FULL, EMPTY and CLOSED.
READ ONLY and OFFLINE states will generally be affected by device
internal events. The operations on zones cause corresponding state
changing.

Zoned devices have a limit on zone resources, which puts constraints on
write operations into zones. It is managed by active zone lists
following LRU policy.

Signed-off-by: Sam Li 
---
 block/qcow2.c| 741 ++-
 block/trace-events   |   2 +
 include/qemu/queue.h |   1 +
 3 files changed, 742 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 9a92cd242c..26f2bb4a87 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -195,6 +195,179 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char 
*fmt, Error **errp)
 return cryptoopts_qdict;
 }
 
+#define QCOW2_ZT_IS_CONV(wp)(wp & 1ULL << 59)
+
+/*
+ * To emulate a real zoned device, closed, empty and full states are
+ * preserved after a power cycle. Open states are in-memory and will
+ * be lost after closing the device. Read-only and offline states are
+ * device-internal events, which are not considered for simplicity.
+ */
+static inline BlockZoneState qcow2_get_zone_state(BlockDriverState *bs,
+  uint32_t index)
+{
+BDRVQcow2State *s = bs->opaque;
+Qcow2ZoneListEntry *zone_entry = >zone_list_entries[index];
+uint64_t zone_wp = bs->wps->wp[index];
+uint64_t zone_start;
+
+if (QCOW2_ZT_IS_CONV(zone_wp)) {
+return BLK_ZS_NOT_WP;
+}
+
+if (QLIST_IS_INSERTED(zone_entry, exp_open_zone_entry)) {
+return BLK_ZS_EOPEN;
+}
+if (QLIST_IS_INSERTED(zone_entry, imp_open_zone_entry)) {
+return BLK_ZS_IOPEN;
+}
+
+zone_start = index * bs->bl.zone_size;
+if (zone_wp == zone_start) {
+return BLK_ZS_EMPTY;
+}
+if (zone_wp >= zone_start + bs->bl.zone_capacity) {
+return BLK_ZS_FULL;
+}
+if (zone_wp > zone_start) {
+return BLK_ZS_CLOSED;
+}
+return BLK_ZS_NOT_WP;
+}
+
+/*
+ * Write the new wp value to the dedicated location of the image file.
+ */
+static int qcow2_write_wp_at(BlockDriverState *bs, uint64_t *wp,
+ uint32_t index) {
+BDRVQcow2State *s = bs->opaque;
+uint64_t wpv = *wp;
+int ret;
+
+ret = bdrv_pwrite(bs->file, s->zoned_header.zonedmeta_offset
++ sizeof(uint64_t) * index, sizeof(uint64_t), wp, 0);
+if (ret < 0) {
+goto exit;
+}
+trace_qcow2_wp_tracking(index, *wp >> BDRV_SECTOR_BITS);
+return ret;
+
+exit:
+*wp = wpv;
+error_report("Failed to write metadata with file");
+return ret;
+}
+
+static bool qcow2_can_activate_zone(BlockDriverState *bs)
+{
+BDRVQcow2State *s = bs->opaque;
+/* When the max active zone is zero, there is no limit on active zones */
+if (!s->zoned_header.max_active_zones) {
+return true;
+}
+
+/* The active zones are zones with the states of open and closed */
+if (s->nr_zones_exp_open + s->nr_zones_imp_open + s->nr_zones_closed
+< s->zoned_header.max_active_zones) {
+return true;
+}
+
+return false;
+}
+
+/*
+ * This function manages open zones under active zones limit. It checks
+ * if a zone can transition to open state while maintaining max open and
+ * active zone limits.
+ */
+static bool qcow2_can_open_zone(BlockDriverState *bs)
+{
+BDRVQcow2State *s = bs->opaque;
+Qcow2ZoneListEntry *zone_entry;
+
+/* When the max open zone is zero, there is no limit on open zones */
+if (!s->zoned_header.max_open_zones) {
+return true;
+}
+
+/*
+ * The open zones are zones with the states of explicitly and
+ * implicitly open.
+ */
+if (s->nr_zones_imp_open + s->nr_zones_exp_open <
+s->zoned_header.max_open_zones) {
+return true;
+}
+
+/*
+ * Zones are managed once at a time. Thus, the number of implicitly open
+ * zone can never be over the open zone limit. When the active zone limit
+ * is not reached, close only one implicitly open zone.
+ */
+if (qcow2_can_activate_zone(bs)) {
+/*
+ * The LRU policy is used for handling active zone lists. When
+ * removing a random zone entry, we discard the least recently used
+ * list item. The list item at the last is the least recently used
+ * one. The zone list maintained this property by removing the last
+ * entry and inserting before the first entry.
+ */
+