Previous patches mentioned how the blkdebug filter driver demonstrates
a bug present in our NBD server; the bug is also present with the raw
format driver when probing occurs. Basically, if we specify a
particular alignment > 1, but defer the actual block status to the
underlying file, and the underlying file has a smaller alignment, then
the use of BDRV_BLOCK_RAW to defer to the underlying file can end up
with status split at an alignment unacceptable to our layer.  Many
callers don't care, but NBD does - it is a violation of the NBD
protocol to send status or read results split on an unaligned boundary
(we've taught our client to be tolerant of such violations because of
qemu 3.1; but we still need to fix our server for the sake of other
stricter clients).

This patch lays the groundwork - it adjusts bdrv_block_status to
ensure that any time one layer defers to another via BDRV_BLOCK_RAW,
the deferral is either truncated down to an aligned boundary, or
multiple sub-aligned requests are coalesced into a single
representative answer (using an implicit hole beyond EOF as
needed). Iotest 241 exposes the effect (when format probing occurred,
we don't want block status to subdivide the initial sector, and thus
not any other sector either). Similarly, iotest 221 is a good
candidate to amend to specifically track the effects; a change to a
hole at EOF is not visible if an upper layer does not want alignment
smaller than 512. However, this patch alone is not a complete fix - it
is still possible to have an active layer with large alignment
constraints backed by another layer with smaller constraints; so the
next patch will complete the task.

Signed-off-by: Eric Blake <ebl...@redhat.com>
---
 block/io.c                 | 108 +++++++++++++++++++++++++++++++++++--
 tests/qemu-iotests/221     |  10 ++++
 tests/qemu-iotests/221.out |   6 +++
 tests/qemu-iotests/241.out |   3 +-
 4 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/block/io.c b/block/io.c
index dfc153b8d82..936877d3745 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2021,6 +2021,97 @@ int coroutine_fn 
bdrv_co_block_status_from_backing(BlockDriverState *bs,
     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 }

+static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
+                                             bool want_zero,
+                                             int64_t offset, int64_t bytes,
+                                             int64_t *pnum, int64_t *map,
+                                             BlockDriverState **file);
+
+/*
+ * Returns an aligned allocation status of the specified sectors.
+ * Wrapper around bdrv_co_block_status() which requires the initial
+ * offset and count to be aligned, and guarantees the result will also
+ * be aligned (even if it requires piecing together multiple
+ * sub-aligned queries into an appropriate coalesced answer).
+ */
+static int coroutine_fn bdrv_co_block_status_aligned(BlockDriverState *bs,
+                                                     uint32_t align,
+                                                     bool want_zero,
+                                                     int64_t offset,
+                                                     int64_t bytes,
+                                                     int64_t *pnum,
+                                                     int64_t *map,
+                                                     BlockDriverState **file)
+{
+    int ret;
+
+    assert(is_power_of_2(align) && QEMU_IS_ALIGNED(offset | bytes, align));
+    ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+    if (ret < 0) {
+        return ret;
+    }
+    if (!*pnum) {
+        assert(!bytes || ret & BDRV_BLOCK_EOF);
+        return ret;
+    }
+    if (*pnum >= align) {
+        if (!QEMU_IS_ALIGNED(*pnum, align)) {
+            ret &= ~BDRV_BLOCK_EOF;
+            *pnum = QEMU_ALIGN_DOWN(*pnum, align);
+        }
+        return ret;
+    }
+    /* Merge in status for the rest of align */
+    while (*pnum < align) {
+        int ret2;
+        int64_t pnum2;
+        int64_t map2;
+        BlockDriverState *file2;
+
+        ret2 = bdrv_co_block_status(bs, want_zero, offset + *pnum,
+                                    align - *pnum, &pnum2,
+                                    map ? &map2 : NULL, file ? &file2 : NULL);
+        if (ret2 < 0) {
+            return ret2;
+        }
+        if (ret2 & BDRV_BLOCK_EOF) {
+            ret |= BDRV_BLOCK_EOF;
+            if (!pnum2) {
+                pnum2 = align - *pnum;
+                ret2 |= BDRV_BLOCK_ZERO;
+            }
+        } else {
+            assert(pnum2);
+        }
+        if (ret2 & BDRV_BLOCK_DATA) {
+            ret |= BDRV_BLOCK_DATA;
+        }
+        if (!(ret2 & BDRV_BLOCK_ZERO)) {
+            ret &= ~BDRV_BLOCK_ZERO;
+        }
+        if ((ret & BDRV_BLOCK_OFFSET_VALID) &&
+            (!(ret2 & BDRV_BLOCK_OFFSET_VALID) ||
+             (map && *map + *pnum != map2) || (file && *file != file2))) {
+            ret &= ~BDRV_BLOCK_OFFSET_VALID;
+            if (map) {
+                *map = 0;
+            }
+            if (file) {
+                *file = NULL;
+            }
+        }
+        if (ret2 & BDRV_BLOCK_ALLOCATED) {
+            ret |= BDRV_BLOCK_ALLOCATED;
+        }
+        if (ret2 & BDRV_BLOCK_RAW) {
+            assert(ret & BDRV_BLOCK_RAW);
+            assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        }
+        *pnum += pnum2;
+    }
+    return ret;
+}
+
 /*
  * Returns the allocation status of the specified sectors.
  * Drivers not implementing the functionality are assumed to not support
@@ -2121,6 +2212,19 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
      */
     assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
            align > offset - aligned_offset);
+
+    if (ret & BDRV_BLOCK_RAW) {
+        assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
+        ret = bdrv_co_block_status_aligned(local_file, align, want_zero,
+                                           local_map, *pnum, pnum, &local_map,
+                                           &local_file);
+        if (ret < 0) {
+            goto out;
+        }
+        assert(!(ret & BDRV_BLOCK_RAW));
+        ret |= BDRV_BLOCK_RAW;
+    }
+
     *pnum -= offset - aligned_offset;
     if (*pnum > bytes) {
         *pnum = bytes;
@@ -2130,9 +2234,7 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
     }

     if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
-        ret = bdrv_co_block_status(local_file, want_zero, local_map,
-                                   *pnum, pnum, &local_map, &local_file);
+        ret &= ~BDRV_BLOCK_RAW;
         goto out;
     }

diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221
index 808cd9a289c..3f60fd0fdc8 100755
--- a/tests/qemu-iotests/221
+++ b/tests/qemu-iotests/221
@@ -41,17 +41,27 @@ echo
 echo "=== Check mapping of unaligned raw image ==="
 echo

+# Note that when we enable format probing by omitting -f, the raw
+# layer forces 512-byte alignment and the bytes past EOF take on the
+# same status as the rest of the sector; otherwise, we can see the
+# implicit hole visible past EOF thanks to the block layer rounding
+# sizes up.
+
 _make_test_img 43009 # qemu-img create rounds size up
 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IMG map -f $IMGFMT --output=json "$TEST_IMG" | _filter_qemu_img_map

 truncate --size=43009 "$TEST_IMG" # so we resize it and check again
 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IMG map -f $IMGFMT --output=json "$TEST_IMG" | _filter_qemu_img_map

 $QEMU_IO -c 'w 43008 1' "$TEST_IMG" | _filter_qemu_io # writing also rounds up
 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IMG map -f $IMGFMT --output=json "$TEST_IMG" | _filter_qemu_img_map

 truncate --size=43009 "$TEST_IMG" # so we resize it and check again
 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IMG map -f $IMGFMT --output=json "$TEST_IMG" | _filter_qemu_img_map

 # success, all done
 echo '*** done'
diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out
index a9c0190aadc..723e94037fe 100644
--- a/tests/qemu-iotests/221.out
+++ b/tests/qemu-iotests/221.out
@@ -5,12 +5,18 @@ QA output created by 221
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=43009
 [{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
 [{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
+[{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
+[{ "start": 0, "length": 43520, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
 wrote 1/1 bytes at offset 43008
 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET},
+{ "start": 40960, "length": 2560, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]
+[{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET},
 { "start": 40960, "length": 2049, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 43009, "length": 511, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
 [{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET},
+{ "start": 40960, "length": 2560, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]
+[{ "start": 0, "length": 40960, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET},
 { "start": 40960, "length": 2049, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 43009, "length": 511, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/241.out b/tests/qemu-iotests/241.out
index ef7de1205d2..fd856bb0c8d 100644
--- a/tests/qemu-iotests/241.out
+++ b/tests/qemu-iotests/241.out
@@ -22,8 +22,7 @@ WARNING: Image format was not specified for 
'/home/eblake/qemu/tests/qemu-iotest

   size:  1024
   min block: 1
-[{ "start": 0, "length": 1000, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
-{ "start": 1000, "length": 24, "depth": 0, "zero": true, "data": true, 
"offset": OFFSET}]
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)

 === Encrypted qcow2 file backed by unaligned raw image  ===
-- 
2.20.1


Reply via email to