This allows arbitrarily large block sizes to be used for I/O
requests and matches the behavior of most other block drivers.

This commit is mostly based on the one from the Spirent fork of OSv -
https://github.com/SpirentOrion/osv/commit/45306415040521ac875ec7f6ba0ad6671ea8ad11.

However, it differs slightly by handling the situation when the 
VIRTIO_BLK_F_SEG_MAX
capability is not detected. In this case the driver behaves effectively
as if there was no limit on the size of the request which was the
original behavior.

It is worth noting that we already had a check on the size of the
request in runtime and would return EIO error (see make_request()
method), but this change makes the driver correctly chunk the request
using the multiplexing strategy if the request is larger than the
maximum imposed by the hypervisor.

In practical terms this commit can be tested using the misc-bdev-rw
test which has been adjusted to request reads and writes as large as 2
MB which are higher that the limit on QEMU (the VIRTIO_BLK_F_SEG_MAX is
equal to 254 = eq 1MB - 8K). Before the commit this test would hang
with 'i' equal to 255 and now it behaves correctly.

Finally some hypervisors like Firecracker do not provide the 
VIRTIO_BLK_F_SEG_MAX
capability, in which case our driver behaves as if there was no limit
which is also the old behavior.

Co-authored-by: "Timmons C. Player" <timmons.pla...@spirent.com>
Co-authored-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 drivers/virtio-blk.cc |  8 ++++++--
 tests/misc-bdev-rw.cc | 15 +++++++++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio-blk.cc b/drivers/virtio-blk.cc
index e03d41f7..643ca275 100644
--- a/drivers/virtio-blk.cc
+++ b/drivers/virtio-blk.cc
@@ -54,6 +54,7 @@ int blk::_instance = 0;
 
 
 struct blk_priv {
+    devop_strategy_t strategy;
     blk* drv;
 };
 
@@ -63,7 +64,6 @@ blk_strategy(struct bio *bio)
     struct blk_priv *prv = reinterpret_cast<struct 
blk_priv*>(bio->bio_dev->private_data);
 
     trace_virtio_blk_strategy(bio);
-    bio->bio_offset += bio->bio_dev->offset;
     prv->drv->make_request(bio);
 }
 
@@ -90,7 +90,7 @@ static struct devops blk_devops {
     blk_write,
     no_ioctl,
     no_devctl,
-    blk_strategy,
+    multiplex_strategy,
 };
 
 struct driver blk_driver = {
@@ -180,8 +180,10 @@ blk::blk(virtio_device& virtio_dev)
 
     dev = device_create(&blk_driver, dev_name.c_str(), D_BLK);
     prv = reinterpret_cast<struct blk_priv*>(dev->private_data);
+    prv->strategy = blk_strategy;
     prv->drv = this;
     dev->size = prv->drv->size();
+    dev->max_io_size = _config.seg_max ? (_config.seg_max - 1) * 
mmu::page_size : UINT_MAX;
     read_partition_table(dev);
 
     debugf("virtio-blk: Add blk device instances %d as %s, devsize=%lld\n", 
_id, dev_name.c_str(), dev->size);
@@ -208,6 +210,8 @@ void blk::read_config()
     if (get_guest_feature_bit(VIRTIO_BLK_F_SEG_MAX)) {
         READ_CONFIGURATION_FIELD(blk_config,seg_max,_config.seg_max)
         trace_virtio_blk_read_config_seg_max(_config.seg_max);
+    } else {
+        _config.seg_max = 0;
     }
     if (get_guest_feature_bit(VIRTIO_BLK_F_GEOMETRY)) {
         READ_CONFIGURATION_FIELD(blk_config,geometry,_config.geometry)
diff --git a/tests/misc-bdev-rw.cc b/tests/misc-bdev-rw.cc
index ae8eaf64..ce81c1fa 100644
--- a/tests/misc-bdev-rw.cc
+++ b/tests/misc-bdev-rw.cc
@@ -8,6 +8,17 @@
 
 #define MB (1024*1024)
 
+/*
+This test requires a standalone block device (not a one
+actively used by given filesystem) and can be created and
+mounted like so:
+
+dd if=/dev/zero of=/tmp/test1.raw bs=1M count=512
+qemu-img convert -O qcow2 /tmp/test1.raw /tmp/test1.img
+
+./scripts/run.py -e '/tests/misc-bdev-rw.so vblk1' --cloud-init-image 
/tmp/test1.img
+*/
+
 using namespace std;
 
 atomic<int> bio_inflights(0);
@@ -89,7 +100,7 @@ int main(int argc, char const *argv[])
     long written = 0;
 
     //Do all writes
-    for(auto i = 1; i < 32; i++)
+    for(auto i = 1; i < 511; i++)
     {
         const size_t buff_size = i * memory::page_size;
 
@@ -142,4 +153,4 @@ int main(int argc, char const *argv[])
          << "Test " << (test_failed.load() ? "FAILED" : "PASSED") << endl;
 
     return test_failed.load() ? 1 : 0;
-}
\ No newline at end of file
+}
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220628154711.108483-1-jwkozaczuk%40gmail.com.

Reply via email to