All zones of zoned block devices should be reset before writing. Support
this by considering zone reset as a special case of block discard and block
zeroing. Of note is that only zones accepting random writes can be zeroed.

Signed-off-by: Naohiro Aota <naohiro.a...@wdc.com>
---
 utils.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 88 insertions(+), 6 deletions(-)

diff --git a/utils.c b/utils.c
index a26fe7a5743c..c375b32953f7 100644
--- a/utils.c
+++ b/utils.c
@@ -123,6 +123,37 @@ static int discard_range(int fd, u64 start, u64 len)
        return 0;
 }
 
+/*
+ * Discard blocks in the zones of a zoned block device.
+ * Process this with zone size granularity so that blocks in
+ * conventional zones are discarded using discard_range and
+ * blocks in sequential zones are discarded though a zone reset.
+ */
+static int discard_zones(int fd, struct btrfs_zone_info *zinfo)
+{
+#ifdef BTRFS_ZONED
+       unsigned int i;
+
+       /* Zone size granularity */
+       for (i = 0; i < zinfo->nr_zones; i++) {
+               if (zinfo->zones[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
+                       discard_range(fd, zinfo->zones[i].start << 9,
+                                     zinfo->zone_size);
+               } else if (zinfo->zones[i].cond != BLK_ZONE_COND_EMPTY) {
+                       struct blk_zone_range range = {
+                               zinfo->zones[i].start,
+                               zinfo->zone_size >> 9 };
+                       if (ioctl(fd, BLKRESETZONE, &range) < 0)
+                               return errno;
+               }
+       }
+
+       return 0;
+#else
+       return -EIO;
+#endif
+}
+
 /*
  * Discard blocks in the given range in 1G chunks, the process is interruptible
  */
@@ -205,8 +236,38 @@ static int zero_blocks(int fd, off_t start, size_t len)
 
 #define ZERO_DEV_BYTES SZ_2M
 
+static int zero_zone_blocks(int fd, struct btrfs_zone_info *zinfo,
+                           off_t start, size_t len)
+{
+       size_t zone_len = zinfo->zone_size;
+       off_t ofst = start;
+       size_t count;
+       int ret;
+
+       /* Make sure that zero_blocks does not write sequential zones */
+       while (len > 0) {
+
+               /* Limit zero_blocks to a single zone */
+               count = min_t(size_t, len, zone_len);
+               if (count > zone_len - (ofst & (zone_len - 1)))
+                       count = zone_len - (ofst & (zone_len - 1));
+
+               if (zone_is_random_write(zinfo, ofst)) {
+                       ret = zero_blocks(fd, ofst, count);
+                       if (ret != 0)
+                               return ret;
+               }
+
+               len -= count;
+               ofst += count;
+       }
+
+       return 0;
+}
+
 /* don't write outside the device by clamping the region to the device size */
-static int zero_dev_clamped(int fd, off_t start, ssize_t len, u64 dev_size)
+static int zero_dev_clamped(int fd, struct btrfs_zone_info *zinfo,
+                           off_t start, ssize_t len, u64 dev_size)
 {
        off_t end = max(start, start + len);
 
@@ -219,6 +280,9 @@ static int zero_dev_clamped(int fd, off_t start, ssize_t 
len, u64 dev_size)
        start = min_t(u64, start, dev_size);
        end = min_t(u64, end, dev_size);
 
+       if (zinfo->model != ZONED_NONE)
+               return zero_zone_blocks(fd, zinfo, start, end - start);
+
        return zero_blocks(fd, start, end - start);
 }
 
@@ -566,6 +630,7 @@ int btrfs_get_zone_info(int fd, const char *file, int 
hmzoned,
 int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
                u64 max_block_count, unsigned opflags)
 {
+       struct btrfs_zone_info zinfo;
        u64 block_count;
        struct stat st;
        int i, ret;
@@ -584,13 +649,30 @@ int btrfs_prepare_device(int fd, const char *file, u64 
*block_count_ret,
        if (max_block_count)
                block_count = min(block_count, max_block_count);
 
+       ret = btrfs_get_zone_info(fd, file, opflags & PREP_DEVICE_HMZONED,
+                                 &zinfo);
+       if (ret < 0)
+               return 1;
+
        if (opflags & PREP_DEVICE_DISCARD) {
                /*
                 * We intentionally ignore errors from the discard ioctl.  It
                 * is not necessary for the mkfs functionality but just an
-                * optimization.
+                * optimization. However, we cannot ignore zone discard (reset)
+                * errors for a zoned block device as this could result in the
+                * inability to write to non-empty sequential zones of the
+                * device.
                 */
-               if (discard_range(fd, 0, 0) == 0) {
+               if (zinfo.model != ZONED_NONE) {
+                       printf("Resetting device zones %s (%u zones) ...\n",
+                               file, zinfo.nr_zones);
+                       if (discard_zones(fd, &zinfo)) {
+                               fprintf(stderr,
+                                       "ERROR: failed to reset device '%s' 
zones\n",
+                                       file);
+                               return 1;
+                       }
+               } else if (discard_range(fd, 0, 0) == 0) {
                        if (opflags & PREP_DEVICE_VERBOSE)
                                printf("Performing full device TRIM %s (%s) 
...\n",
                                                file, pretty_size(block_count));
@@ -598,12 +680,12 @@ int btrfs_prepare_device(int fd, const char *file, u64 
*block_count_ret,
                }
        }
 
-       ret = zero_dev_clamped(fd, 0, ZERO_DEV_BYTES, block_count);
+       ret = zero_dev_clamped(fd, &zinfo, 0, ZERO_DEV_BYTES, block_count);
        for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++)
-               ret = zero_dev_clamped(fd, btrfs_sb_offset(i),
+               ret = zero_dev_clamped(fd, &zinfo, btrfs_sb_offset(i),
                                       BTRFS_SUPER_INFO_SIZE, block_count);
        if (!ret && (opflags & PREP_DEVICE_ZERO_END))
-               ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES,
+               ret = zero_dev_clamped(fd, &zinfo, block_count - ZERO_DEV_BYTES,
                                       ZERO_DEV_BYTES, block_count);
 
        if (ret < 0) {
-- 
2.21.0

Reply via email to