Re: [RFC PATCH v3] hw/nvme:Adding Support for namespace management
On Wed, Nov 10, 2021 at 04:56:29PM +0530, Naveen wrote: > From: Naveen Nagar > > This patch supports namespace management : create and delete operations > This patch has been tested with the following command and size of image > file for unallocated namespaces is taken as 0GB. ns_create will look into > the list of unallocated namespaces and it will initialize the same and > return the nsid of the same. A new mandatory field has been added called > tnvmcap and we have ensured that the total capacity of namespace created > does not exceed tnvmcap > > -device nvme-subsys,id=subsys0,tnvmcap=8 > -device nvme,serial=foo,id=nvme0,subsys=subsys0 > -device nvme,serial=bar,id=nvme1,subsys=subsys0 > > -drive id=ns1,file=ns1.img,if=none > -device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true > -drive id=ns2,file=ns2.img,if=none > -device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true > -drive id=ns3,file=ns3.img,if=none > -device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true > -drive id=ns4,file=ns4.img,if=none > -device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true > > Please review and suggest if any changes are required. > > Signed-off-by: Naveen Nagar > > Since v2: > -Lukasz Maniak found a bug in namespace attachment and proposed > solution is added > Hi Naveen, The current implementation is inconsistent and thus has a bug related to unvmcap support. Namespaces are pre-allocated after boot, and the initial namespace size is the size of the associated blockdev. If the blockdevs are non-zero sized then the first deletion of the namespaces associated with them will increment unvmcap by their size. This will make unvmcap greater than tnvmcap. While the easiest way would be to prohibit the use of non-zero sized blockdev with namespace management, doing so would limit the functionality of the namespaces itself, which we would like to avoid. This fix below addresses issues related to unvmcap and non-zero block devices. The unvmcap value will be properly updated on both the first and subsequent controllers added to the subsystem regardless of the order in which nvme-ns is defined on the command line before or after the controller definition. Additionally, if the block device size of any namespace causes the unvmcap to be exceeded, an error will be returned at the namespace definition point. The fix is based on v3 based on v6.1.0, as v3 does not apply to master. --- hw/nvme/ctrl.c | 7 +++ hw/nvme/ns.c | 23 --- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 63ea2fcb14..dc0ad4155b 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6594,6 +6594,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NvmeIdCtrl *id = >id_ctrl; uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(>bar.cap); +int i; id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -6672,6 +6673,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cmic |= NVME_CMIC_MULTI_CTRL; id->tnvmcap = n->subsys->params.tnvmcap * GiB; id->unvmcap = n->subsys->params.tnvmcap * GiB; + +for (i = 0; i < ARRAY_SIZE(n->subsys->namespaces); i++) { +if (n->subsys->namespaces[i]) { +id->unvmcap -= le64_to_cpu(n->subsys->namespaces[i]->size); +} +} } NVME_CAP_SET_MQES(cap, 0x7ff); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index f62a695132..c87d7f5bd6 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -140,9 +140,12 @@ lbaf_found: return 0; } -static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) +static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeSubsystem *subsys, +Error **errp) { bool read_only; +NvmeCtrl *ctrl; +int i; if (!blkconf_blocksizes(>blkconf, errp)) { return -1; @@ -164,6 +167,21 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) return -1; } +if (subsys) { +for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { +ctrl = nvme_subsys_ctrl(subsys, i); + +if (ctrl) { +if (ctrl->id_ctrl.unvmcap < le64_to_cpu(ns->size)) { +error_setg(errp, "blockdev size %ld exceeds subsystem's " + "unallocated capacity", ns->size); +} else { +ctrl->id_ctrl.unvmcap -= le64_to_cpu(ns->size); +} +} +} +} + return 0; } @@ -480,7 +498,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) } } -if (nvme_ns_init_blk(ns, errp)) { +if (nvme_ns_init_blk(ns, subsys, errp)) { return; } @@ -527,7 +545,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
Re: [PULL 0/7] Python patches
On 11/23/21 3:37 AM, John Snow wrote: The following changes since commit 89d2f9e4c63799f7f03e9180c63b7dc45fc2a04a: Merge tag 'pull-target-arm-20211122' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2021-11-22 16:35:54 +0100) are available in the Git repository at: https://gitlab.com/jsnow/qemu.git tags/python-pull-request for you to fetch changes up to a57cb3e23d5ac918a69d0aab918470ff0b429ff9: python/aqmp: fix send_fd_scm for python 3.6.x (2021-11-22 18:41:21 -0500) Python testing fixes for 6.2 A few more fixes to help eliminate race conditions from device-crash-test, along with a fix that allows the SCM_RIGHTS functionality to work on hosts that only have Python 3.6. If this is too much this late in the RC process, I'd advocate for at least patch 7/7 by itself. John Snow (7): python/machine: add @sock_dir property python/machine: remove _remove_monitor_sockfile property python/machine: add instance disambiguator to default nickname python/machine: move more variable initializations to _pre_launch python/machine: handle "fast" QEMU terminations scripts/device-crash-test: Use a QMP timeout python/aqmp: fix send_fd_scm for python 3.6.x python/qemu/aqmp/qmp_client.py | 9 -- python/qemu/machine/machine.py | 59 -- scripts/device-crash-test | 2 +- 3 files changed, 42 insertions(+), 28 deletions(-) Applied, thanks. r~
Re: [PATCH] block vvfat.c fix leak when failure occurs
On 19.11.21 12:25, Daniella Lee wrote: Based on your suggestions. I made a new patch which contians: 1.format detection 2.replace calloc with g_malloc0 in enable_write_target function 3.use g_free without null pointer detection in vvfat_open function 4.delete line "ret = 0", use return ret directly in vvfat_open function Signed-off-by: Daniella Lee --- block/vvfat.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) Thanks, looks good to me! Two remarks for the next time: When you send a new version of a patch, you should mark it as “v2” (and then “v3” and so on) in the subject, e.g. “[PATCH v2]”. This can be done e.g. with `git format-patch -v2`. Second, for a new iteration, you should generally keep the commit message from the previous one and not replace it with a change log. Having a change log is very nice, don’t get me wrong, but it shouldn’t be part of the commit message. Once you’ve formatted the patch with `git format-patch`, you can edit the file and put the change log below the “---” line, e.g. like it was done here: https://lists.nongnu.org/archive/html/qemu-block/2021-09/msg00453.html No need for you to respin this patch, though, I’ve just replaced the commit message with the one from v1 and applied it to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block/ Thanks again, Hanna
[PULL 0/3] Block patches
The following changes since commit 73e0f70e097b7c92a5ce16ee35b53afe119b20d7: Merge tag 'pull-lu-20211123' of https://gitlab.com/rth7680/qemu into staging (2021-11-23 11:33:14 +0100) are available in the Git repository at: https://gitlab.com/hreitz/qemu.git tags/pull-block-2021-11-23 for you to fetch changes up to 4dd218fd0717ed3cddb69c01eeb9da630107d89d: iotests/149: Skip on unsupported ciphers (2021-11-23 15:39:12 +0100) Block patches for 6.2-rc2: - Fix memory leak in vvfat when vvfat_open() fails - iotest fixes for the gnutls crypto backend Daniella Lee (1): block/vvfat.c fix leak when failure occurs Hanna Reitz (2): iotests: Use aes-128-cbc iotests/149: Skip on unsupported ciphers block/vvfat.c | 16 tests/qemu-iotests/149 | 23 ++- tests/qemu-iotests/206 | 4 ++-- tests/qemu-iotests/206.out | 6 +++--- tests/qemu-iotests/210 | 4 ++-- tests/qemu-iotests/210.out | 6 +++--- 6 files changed, 40 insertions(+), 19 deletions(-) -- 2.33.1
[PULL 1/3] block/vvfat.c fix leak when failure occurs
From: Daniella Lee Function vvfat_open called function enable_write_target and init_directories, and these functions malloc new memory for BDRVVVFATState::qcow_filename, BDRVVVFATState::used_clusters, and BDRVVVFATState::cluster_buff. When the specified folder does not exist ,it may contains memory leak. After init_directories function is executed, the vvfat_open return -EIO, and bdrv_open_driver goto label open_failed, the program use g_free(bs->opaque) to release BDRVVVFATState struct without members mentioned. command line: qemu-system-x86_64 -hdb -usb -device usb-storage,drive=fat16 -drive file=fat:rw:fat-type=16:"", id=fat16,format=raw,if=none enable_write_target called: (gdb) bt at ../block/vvfat.c:3114 flags=155650, errp=0x7fffd780) at ../block/vvfat.c:1236 node_name=0x0, options=0x56fa45d0, open_flags=155650, errp=0x7fffd890) at ../block.c:1558 errp=0x7fffd890) at ../block.c:1852 reference=0x0, options=0x56fa45d0, flags=40962, parent=0x56f98cd0, child_class=0x56b1d6a0 , child_role=19, errp=0x7fffda90) at ../block.c:3779 options=0x56f9cfc0, bdref_key=0x56239bb8 "file", parent=0x56f98cd0, child_class=0x56b1d6a0 , child_role=19, allow_none=true, errp=0x7fffda90) at ../block.c:3419 reference=0x0, options=0x56f9cfc0, flags=8194, parent=0x0, child_class=0x0, child_role=0, errp=0x56c98c40 ) at ../block.c:3726 options=0x56f757b0, flags=0, errp=0x56c98c40 ) at ../block.c:3872 options=0x56f757b0, flags=0, errp=0x56c98c40 ) at ../block/block-backend.c:436 bs_opts=0x56f757b0, errp=0x56c98c40 ) at ../blockdev.c:608 errp=0x56c98c40 ) at ../blockdev.c:992 .. Signed-off-by: Daniella Lee Message-Id: <2029112553.35-1-daniellalee...@gmail.com> [hreitz: Took commit message from v1] Signed-off-by: Hanna Reitz --- block/vvfat.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 05e78e3c27..5dacc6cfac 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1279,8 +1279,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(>lock); -ret = 0; +qemu_opts_del(opts); + +return 0; + fail: +g_free(s->qcow_filename); +s->qcow_filename = NULL; +g_free(s->cluster_buffer); +s->cluster_buffer = NULL; +g_free(s->used_clusters); +s->used_clusters = NULL; + qemu_opts_del(opts); return ret; } @@ -3118,7 +3128,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) int size = sector2cluster(s, s->sector_count); QDict *options; -s->used_clusters = calloc(size, 1); +s->used_clusters = g_malloc0(size); array_init(&(s->commits), sizeof(commit_t)); @@ -3166,8 +3176,6 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) return 0; err: -g_free(s->qcow_filename); -s->qcow_filename = NULL; return ret; } -- 2.33.1
[PULL 3/3] iotests/149: Skip on unsupported ciphers
Whenever qemu-img or qemu-io report that some cipher is unsupported, skip the whole test, because that is probably because qemu has been configured with the gnutls crypto backend. We could taylor the algorithm list to what gnutls supports, but this is a test that is run rather rarely anyway (because it requires password-less sudo), and so it seems better and easier to skip it. When this test is intentionally run to check LUKS compatibility, it seems better not to limit the algorithms but keep the list extensive. Signed-off-by: Hanna Reitz Message-Id: <2027151707.52549-3-hre...@redhat.com> Reviewed-by: Daniel P. Berrangé --- tests/qemu-iotests/149 | 23 ++- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/149 b/tests/qemu-iotests/149 index 328fd05a4c..d49646ca60 100755 --- a/tests/qemu-iotests/149 +++ b/tests/qemu-iotests/149 @@ -230,6 +230,18 @@ def create_image(config, size_mb): fn.truncate(size_mb * 1024 * 1024) +def check_cipher_support(config, output): +"""Check the output of qemu-img or qemu-io for mention of the respective +cipher algorithm being unsupported, and if so, skip this test. +(Returns `output` for convenience.)""" + +if 'Unsupported cipher algorithm' in output: +iotests.notrun('Unsupported cipher algorithm ' + f'{config.cipher}-{config.keylen}-{config.mode}; ' + 'consider configuring qemu with a different crypto ' + 'backend') +return output + def qemu_img_create(config, size_mb): """Create and format a disk image with LUKS using qemu-img""" @@ -253,7 +265,8 @@ def qemu_img_create(config, size_mb): "%dM" % size_mb] iotests.log("qemu-img " + " ".join(args), filters=[iotests.filter_test_dir]) -iotests.log(iotests.qemu_img_pipe(*args), filters=[iotests.filter_test_dir]) +iotests.log(check_cipher_support(config, iotests.qemu_img_pipe(*args)), +filters=[iotests.filter_test_dir]) def qemu_io_image_args(config, dev=False): """Get the args for access an image or device with qemu-io""" @@ -279,8 +292,8 @@ def qemu_io_write_pattern(config, pattern, offset_mb, size_mb, dev=False): args = ["-c", "write -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)] args.extend(qemu_io_image_args(config, dev)) iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir]) -iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir, - iotests.filter_qemu_io]) +iotests.log(check_cipher_support(config, iotests.qemu_io(*args)), +filters=[iotests.filter_test_dir, iotests.filter_qemu_io]) def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False): @@ -291,8 +304,8 @@ def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False): args = ["-c", "read -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)] args.extend(qemu_io_image_args(config, dev)) iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir]) -iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir, - iotests.filter_qemu_io]) +iotests.log(check_cipher_support(config, iotests.qemu_io(*args)), +filters=[iotests.filter_test_dir, iotests.filter_qemu_io]) def test_once(config, qemu_img=False): -- 2.33.1
Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196
On 211123 1449, Philippe Mathieu-Daudé wrote: > On 11/23/21 14:42, Hanna Reitz wrote: > > On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: > >> From: Alexander Bulekov > >> > >> Without the previous commit, when running 'make check-qtest-i386' > >> with QEMU configured with '--enable-sanitizers' we get: > >> > >> AddressSanitizer:DEADLYSIGNAL > >> = > >> ==287878==ERROR: AddressSanitizer: SEGV on unknown address > >> 0x0344 > >> ==287878==The signal is caused by a WRITE memory access. > >> ==287878==Hint: address points to the zero page. > >> #0 0x564b2e5bac27 in blk_inc_in_flight > >> block/block-backend.c:1346:5 > >> #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5 > >> #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11 > >> #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17 > >> #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9 > >> #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9 > >> > >> Add the reproducer for CVE-2021-20196. > >> > >> Signed-off-by: Alexander Bulekov > >> Message-Id: <20210319050906.14875-2-alx...@bu.edu> > >> [PMD: Rebased, use global test_image] > >> Reviewed-by: Darren Kenny > >> Signed-off-by: Philippe Mathieu-Daudé > >> --- > >> tests/qtest/fdc-test.c | 21 + > >> 1 file changed, 21 insertions(+) > > > > Not sure if I’m doing something wrong, but: > > > > Using the global test_image brings a problem, namely that this test > > fails unconditionally (for me at least...?), with the reason being that > > the global QEMU instance (launched by qtest_start(), quit by > > qtest_end()) still has that image open, so by launching a second > > instance concurrently, I get this: > > > > qemu-system-x86_64: Failed to get "write" lock > > Is another process using the image [/tmp/qtest.xV4IxX]? > > Hmm I had too many odd problems running qtests in parallel so I > switched to 'make check-qtest -j1' more than 1 year ago, which > is probably why I haven't noticed that issue. > > Using another 'test_image' seems against code reuse principle, > but at this point in release, duplicating it is simpler. Someone > will clean that later =) Maybe it makes sense to run this with -snasphot ? > > > So either we need to use a different image file, or we need to quit the > > global instance before using it (e.g. putting a qtest_end() at the > > beginning of test_cve_*()), although the latter just seems wrong. > > > > Second, I can’t make this test fail. When I apply this patch first (to > > master) and run it, I don’t get a SIGSEGV. > > Is your QEMU built with --enable-sanitizers ? >
Re: [PATCH-for-6.2 v3 1/2] hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196
On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: Guest might select another drive on the bus by setting the DRIVE_SEL bit of the DIGITAL OUTPUT REGISTER (DOR). The current controller model doesn't expect a BlockBackend to be NULL. A simple way to fix CVE-2021-20196 is to create an empty BlockBackend when it is missing. All further accesses will be safely handled, and the controller state machines keep behaving correctly. Cc: qemu-sta...@nongnu.org Fixes: CVE-2021-20196 Reported-by: Gaoning Pan (Ant Security Light-Year Lab) BugLink: https://bugs.launchpad.net/qemu/+bug/1912780 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/338 Reviewed-by: Darren Kenny Signed-off-by: Philippe Mathieu-Daudé --- hw/block/fdc.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index d21b717b7d6..6f94b6a6daa 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -1161,7 +1161,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit) static FDrive *get_cur_drv(FDCtrl *fdctrl) { -return get_drv(fdctrl, fdctrl->cur_drv); +FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv); + +if (!cur_drv->blk) { +/* + * Kludge: empty drive line selected. Create an anonymous + * BlockBackend to avoid NULL deref with various BlockBackend + * API calls within this model (CVE-2021-20196). + * Due to the controller QOM model limitations, we don't + * attach the created to the controller device. + */ +cur_drv->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); So to me this looks basically like a mini version of floppy_drive_realize(), and I was wondering what else we might want to use from that function. fd_init() and fd_revalidate() look interesting, but it appears that fdctrl_realize_common() already did that for all drives so we should be good. Then again, fd_revalidate() behaves differently for the initial drv->blk == NULL (drv->drive is set to TYPE_NONE, and last_sect and max_track are set to 0) and for then later !blk_is_inserted() (drv->drive not changed (so I guess it stays TYPE_NONE?), but last_sect and max_track are set to 0xff). Not sure if that’s a problem. Probably not, given that I think drv->disk and drv->drive both stay TYPE_NONE. Reviewed-by: Hanna Reitz +} +return cur_drv; } /* Status A register : 0x00 (read-only) */
Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196
On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: From: Alexander Bulekov Without the previous commit, when running 'make check-qtest-i386' with QEMU configured with '--enable-sanitizers' we get: AddressSanitizer:DEADLYSIGNAL = ==287878==ERROR: AddressSanitizer: SEGV on unknown address 0x0344 ==287878==The signal is caused by a WRITE memory access. ==287878==Hint: address points to the zero page. #0 0x564b2e5bac27 in blk_inc_in_flight block/block-backend.c:1346:5 #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5 #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11 #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17 #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9 #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9 Add the reproducer for CVE-2021-20196. Signed-off-by: Alexander Bulekov Message-Id: <20210319050906.14875-2-alx...@bu.edu> [PMD: Rebased, use global test_image] Reviewed-by: Darren Kenny Signed-off-by: Philippe Mathieu-Daudé --- tests/qtest/fdc-test.c | 21 + 1 file changed, 21 insertions(+) Not sure if I’m doing something wrong, but: Using the global test_image brings a problem, namely that this test fails unconditionally (for me at least...?), with the reason being that the global QEMU instance (launched by qtest_start(), quit by qtest_end()) still has that image open, so by launching a second instance concurrently, I get this: qemu-system-x86_64: Failed to get "write" lock Is another process using the image [/tmp/qtest.xV4IxX]? So either we need to use a different image file, or we need to quit the global instance before using it (e.g. putting a qtest_end() at the beginning of test_cve_*()), although the latter just seems wrong. Second, I can’t make this test fail. When I apply this patch first (to master) and run it, I don’t get a SIGSEGV. Hanna
Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196
On 23.11.21 14:49, Philippe Mathieu-Daudé wrote: On 11/23/21 14:42, Hanna Reitz wrote: On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: From: Alexander Bulekov Without the previous commit, when running 'make check-qtest-i386' with QEMU configured with '--enable-sanitizers' we get: AddressSanitizer:DEADLYSIGNAL = ==287878==ERROR: AddressSanitizer: SEGV on unknown address 0x0344 ==287878==The signal is caused by a WRITE memory access. ==287878==Hint: address points to the zero page. #0 0x564b2e5bac27 in blk_inc_in_flight block/block-backend.c:1346:5 #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5 #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11 #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17 #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9 #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9 Add the reproducer for CVE-2021-20196. Signed-off-by: Alexander Bulekov Message-Id: <20210319050906.14875-2-alx...@bu.edu> [PMD: Rebased, use global test_image] Reviewed-by: Darren Kenny Signed-off-by: Philippe Mathieu-Daudé --- tests/qtest/fdc-test.c | 21 + 1 file changed, 21 insertions(+) Not sure if I’m doing something wrong, but: Using the global test_image brings a problem, namely that this test fails unconditionally (for me at least...?), with the reason being that the global QEMU instance (launched by qtest_start(), quit by qtest_end()) still has that image open, so by launching a second instance concurrently, I get this: qemu-system-x86_64: Failed to get "write" lock Is another process using the image [/tmp/qtest.xV4IxX]? Hmm I had too many odd problems running qtests in parallel so I switched to 'make check-qtest -j1' more than 1 year ago, which is probably why I haven't noticed that issue. I’ve run the test with QTEST_QEMU_BINARY=$PWD/qemu-system-x86_64 tests/qtest/fdc-test so there definitely weren’t any other tests running at the same time. I don’t know why you don’t encounter this problem, but it’s caused by the concurrent QEMU instance launched in the very same test (qtest_start() in main(), and cleaned up by qtest_end() after g_test_run()). Using another 'test_image' seems against code reuse principle, but at this point in release, duplicating it is simpler. Someone will clean that later =) So either we need to use a different image file, or we need to quit the global instance before using it (e.g. putting a qtest_end() at the beginning of test_cve_*()), although the latter just seems wrong. Second, I can’t make this test fail. When I apply this patch first (to master) and run it, I don’t get a SIGSEGV. Is your QEMU built with --enable-sanitizers ? Uh, no. I had (wrongly) assumed there’d be no need, given that the SIGSEGV access address is below 4 kB... I now did configure and compile it with --enable-sanitizers but still can’t reproduce it. Hanna
[PULL 2/3] iotests: Use aes-128-cbc
Our gnutls crypto backend (which is the default as of 8bd0931f6) supports neither twofish-128 nor the CTR mode. CBC and aes-128 are supported by all of our backends (as far as I can tell), so use aes-128-cbc in our iotests. (We could also use e.g. aes-256-cbc, but the different key sizes would lead to different key slot offsets and so change the reference output more, which is why I went with aes-128.) Signed-off-by: Hanna Reitz Message-Id: <2027151707.52549-2-hre...@redhat.com> Reviewed-by: Daniel P. Berrangé Tested-by: Thomas Huth --- tests/qemu-iotests/206 | 4 ++-- tests/qemu-iotests/206.out | 6 +++--- tests/qemu-iotests/210 | 4 ++-- tests/qemu-iotests/210.out | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/qemu-iotests/206 b/tests/qemu-iotests/206 index c3cdad4ce4..10eff343f7 100755 --- a/tests/qemu-iotests/206 +++ b/tests/qemu-iotests/206 @@ -162,8 +162,8 @@ with iotests.FilePath('t.qcow2') as disk_path, \ 'encrypt': { 'format': 'luks', 'key-secret': 'keysec0', - 'cipher-alg': 'twofish-128', - 'cipher-mode': 'ctr', + 'cipher-alg': 'aes-128', + 'cipher-mode': 'cbc', 'ivgen-alg': 'plain64', 'ivgen-hash-alg': 'md5', 'hash-alg': 'sha1', diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out index 3593e8e9c2..80cd274223 100644 --- a/tests/qemu-iotests/206.out +++ b/tests/qemu-iotests/206.out @@ -97,7 +97,7 @@ Format specific information: === Successful image creation (encrypted) === -{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}} +{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -115,10 +115,10 @@ Format specific information: encrypt: ivgen alg: plain64 hash alg: sha1 -cipher alg: twofish-128 +cipher alg: aes-128 uuid: ---- format: luks -cipher mode: ctr +cipher mode: cbc slots: [0]: active: true diff --git a/tests/qemu-iotests/210 b/tests/qemu-iotests/210 index 5a62ed4dd1..a4dcc5fe59 100755 --- a/tests/qemu-iotests/210 +++ b/tests/qemu-iotests/210 @@ -83,8 +83,8 @@ with iotests.FilePath('t.luks') as disk_path, \ }, 'size': size, 'key-secret': 'keysec0', - 'cipher-alg': 'twofish-128', - 'cipher-mode': 'ctr', + 'cipher-alg': 'aes-128', + 'cipher-mode': 'cbc', 'ivgen-alg': 'plain64', 'ivgen-hash-alg': 'md5', 'hash-alg': 'sha1', diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out index 55c0844370..96d9f749dd 100644 --- a/tests/qemu-iotests/210.out +++ b/tests/qemu-iotests/210.out @@ -59,7 +59,7 @@ Format specific information: {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}} +{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -71,9 +71,9 @@ encrypted: yes Format specific information: ivgen alg: plain64 hash alg: sha1 -cipher alg: twofish-128 +cipher alg: aes-128 uuid: ---- -cipher mode: ctr +cipher mode: cbc slots:
Re: [PATCH-for-6.2 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507
On 18.11.21 12:57, Philippe Mathieu-Daudé wrote: Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 Without the previous commit, when running 'make check-qtest-i386' with QEMU configured with '--enable-sanitizers' we get: ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x61962a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 READ of size 786432 at 0x61962a00 thread T0 #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 0x61962a00 is located 0 bytes to the right of 512-byte region [0x61962800,0x61962a00) allocated by thread T0 here: #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy Shadow bytes around the buggy address: 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Heap left redzone: fa Freed heap region: fd ==4028352==ABORTING Reported-by: Alexander Bulekov Signed-off-by: Philippe Mathieu-Daudé --- tests/qtest/fdc-test.c | 20 1 file changed, 20 insertions(+) diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c index 26b69f7c5cd..f164d972d10 100644 --- a/tests/qtest/fdc-test.c +++ b/tests/qtest/fdc-test.c @@ -546,6 +546,25 @@ static void fuzz_registers(void) } } +static void test_cve_2021_3507(void) +{ +QTestState *s; + +s = qtest_initf("-nographic -m 32M -nodefaults " +"-drive file=%s,format=raw,if=floppy", test_image); +qtest_outl(s, 0x9, 0x0a0206); +qtest_outw(s, 0x3f4, 0x1600); +qtest_outw(s, 0x3f4, 0x); +qtest_outw(s, 0x3f4, 0x); +qtest_outw(s, 0x3f4, 0x); +qtest_outw(s, 0x3f4, 0x0200); +qtest_outw(s, 0x3f4, 0x0200); +qtest_outw(s, 0x3f4, 0x); +qtest_outw(s, 0x3f4, 0x); +qtest_outw(s, 0x3f4, 0x); No idea what this does (looks like a 256-byte sector read read from sector 2 with EOT=0), but hits the problem and reproduces for me. Unfortunately, I have the exact same problem with test_image as I did in the other series. Hanna +qtest_quit(s); +} + int main(int argc, char **argv) { int fd; @@ -576,6 +595,7 @@ int main(int argc, char **argv) qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18); qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); qtest_add_func("/fdc/fuzz-registers", fuzz_registers); +qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); ret = g_test_run();
Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196
On 11/23/21 14:42, Hanna Reitz wrote: > On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: >> From: Alexander Bulekov >> >> Without the previous commit, when running 'make check-qtest-i386' >> with QEMU configured with '--enable-sanitizers' we get: >> >> AddressSanitizer:DEADLYSIGNAL >> = >> ==287878==ERROR: AddressSanitizer: SEGV on unknown address >> 0x0344 >> ==287878==The signal is caused by a WRITE memory access. >> ==287878==Hint: address points to the zero page. >> #0 0x564b2e5bac27 in blk_inc_in_flight >> block/block-backend.c:1346:5 >> #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5 >> #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11 >> #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17 >> #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9 >> #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9 >> >> Add the reproducer for CVE-2021-20196. >> >> Signed-off-by: Alexander Bulekov >> Message-Id: <20210319050906.14875-2-alx...@bu.edu> >> [PMD: Rebased, use global test_image] >> Reviewed-by: Darren Kenny >> Signed-off-by: Philippe Mathieu-Daudé >> --- >> tests/qtest/fdc-test.c | 21 + >> 1 file changed, 21 insertions(+) > > Not sure if I’m doing something wrong, but: > > Using the global test_image brings a problem, namely that this test > fails unconditionally (for me at least...?), with the reason being that > the global QEMU instance (launched by qtest_start(), quit by > qtest_end()) still has that image open, so by launching a second > instance concurrently, I get this: > > qemu-system-x86_64: Failed to get "write" lock > Is another process using the image [/tmp/qtest.xV4IxX]? Hmm I had too many odd problems running qtests in parallel so I switched to 'make check-qtest -j1' more than 1 year ago, which is probably why I haven't noticed that issue. Using another 'test_image' seems against code reuse principle, but at this point in release, duplicating it is simpler. Someone will clean that later =) > So either we need to use a different image file, or we need to quit the > global instance before using it (e.g. putting a qtest_end() at the > beginning of test_cve_*()), although the latter just seems wrong. > > Second, I can’t make this test fail. When I apply this patch first (to > master) and run it, I don’t get a SIGSEGV. Is your QEMU built with --enable-sanitizers ?
Re: [PATCH-for-6.2 v3 1/2] hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196
On 11/23/21 14:33, Hanna Reitz wrote: > On 18.11.21 13:06, Philippe Mathieu-Daudé wrote: >> Guest might select another drive on the bus by setting the >> DRIVE_SEL bit of the DIGITAL OUTPUT REGISTER (DOR). >> The current controller model doesn't expect a BlockBackend >> to be NULL. A simple way to fix CVE-2021-20196 is to create >> an empty BlockBackend when it is missing. All further >> accesses will be safely handled, and the controller state >> machines keep behaving correctly. >> >> Cc: qemu-sta...@nongnu.org >> Fixes: CVE-2021-20196 >> Reported-by: Gaoning Pan (Ant Security Light-Year Lab) >> BugLink: https://bugs.launchpad.net/qemu/+bug/1912780 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/338 >> Reviewed-by: Darren Kenny >> Signed-off-by: Philippe Mathieu-Daudé >> --- >> hw/block/fdc.c | 14 +- >> 1 file changed, 13 insertions(+), 1 deletion(-) >> >> diff --git a/hw/block/fdc.c b/hw/block/fdc.c >> index d21b717b7d6..6f94b6a6daa 100644 >> --- a/hw/block/fdc.c >> +++ b/hw/block/fdc.c >> @@ -1161,7 +1161,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit) >> static FDrive *get_cur_drv(FDCtrl *fdctrl) >> { >> - return get_drv(fdctrl, fdctrl->cur_drv); >> + FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv); >> + >> + if (!cur_drv->blk) { >> + /* >> + * Kludge: empty drive line selected. Create an anonymous >> + * BlockBackend to avoid NULL deref with various BlockBackend >> + * API calls within this model (CVE-2021-20196). >> + * Due to the controller QOM model limitations, we don't >> + * attach the created to the controller device. >> + */ >> + cur_drv->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); > > So to me this looks basically like a mini version of > floppy_drive_realize(), and I was wondering what else we might want to > use from that function. fd_init() and fd_revalidate() look interesting, > but it appears that fdctrl_realize_common() already did that for all > drives so we should be good. How the controller / bus / floppy / drive are connected is a bit confusing. Did you ever try to hot-remove the magnetic medium from the floppy plastic enclosure while the controller rotates it? > Then again, fd_revalidate() behaves differently for the initial drv->blk > == NULL (drv->drive is set to TYPE_NONE, and last_sect and max_track are > set to 0) and for then later !blk_is_inserted() (drv->drive not changed > (so I guess it stays TYPE_NONE?), but last_sect and max_track are set to > 0xff). Not sure if that’s a problem. Probably not, given that I think > drv->disk and drv->drive both stay TYPE_NONE. I'm not sure about the future plans for this device model... > Reviewed-by: Hanna Reitz Thanks!
Re: [PATCH-for-6.2 1/2] hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
On 18.11.21 12:57, Philippe Mathieu-Daudé wrote: Per the 82078 datasheet, if the end-of-track (EOT byte in the FIFO) is more than the number of sectors per side, the command is terminated unsuccessfully: Patch looks OK to me (can’t believe I’ve looked into the spec...), just one question (note from later self: I really believed this, and now I ended up with this wall of text...): Isn’t the problem that the EOT is smaller than the start sector index? AFAIU fifo[6] is the EOT, ks (fifo[4]) is the start sector number, and so the problem occurs if fifo[6] < fifo[4] - 1 (EOT < start sector index). I don’t think there is any problem if the EOT exceeds the number of sectors per anything (e.g. sectors per track or per cylinder). Well, the spec might say it should cause an error, but in that case tmp (and thus data_len) just become very large. AFAIU fd_seek() checks that the start sector is in bounds, so in fact passing an EOT that is larger than cur_drv->last_sect would never be negative, and so never trigger the tmp < 0 condition. The explanation below however seems to be precisely for the case where EOT would be larger than last_sect (say 18), and so e.g. sector 19 can’t be found (“second occurrence of a pulse on the INDX# pin”). Contrarily, the spec doesn’t seem to say anything for the case where EOT is in bounds but less than the start sector index. It doesn’t even seem to say how the EOT is evaluated; is it a “read until sector >= EOT”? (I.e. EOT < start sector would yield a zero-byte read) Or is it a “read until sector == EOT”? (I.e. EOT < start sector would yield an error because that condition is never reached, and we go beyond last_sect, yielding the same error as if EOT were out of bounds.) Now that raises another question to me. Say EOT is out of bounds; wouldn’t the FDC still read all sectors until the end of the track/cylinder? And only then raise an error that the sector beyond doesn’t exist? I have no idea if any of that made sense and even if it did, whether I’m right. But even if I am, I don’t think it’s a problem. Our problem at hand is the `tmp` underflow, and it’s good to handle that by returning some form of error to the guest instead of crashing. It’s just that I’m not sure this solution is actually what the spec requires (because I have no idea what the spec requires in that case), and the explanation given in the commit message to me seems to define an error for a very different case (where tmp would not be negative) that we just execute without an error. When I look into this I see many more things that look like they aren’t according to spec (like not checking the sector size, or that we honor fifo[0] & 0x80 for READ TRACK even though the spec says no multi-track operations for READ TRACK), so I don’t personally care. It’s good that this patch handles the `tmp` underflow, because that’s important, compliance with the spec isn’t. (Yes, the entire reason for this wall of text is that I wonder why the commit message goes into so much detail quoting the spec, while I can’t find it applies.) Reviewed-by: Hanna Reitz * 5.2.5 DATA TRANSFER TERMINATION The 82078 supports terminal count explicitly through the TC pin and implicitly through the underrun/over- run and end-of-track (EOT) functions. For full sector transfers, the EOT parameter can define the last sector to be transferred in a single or multisector transfer. If the last sector to be transferred is a par- tial sector, the host can stop transferring the data in mid-sector, and the 82078 will continue to complete the sector as if a hardware TC was received. The only difference between these implicit functions and TC is that they return "abnormal termination" result status. Such status indications can be ignored if they were expected. * 6.1.3 READ TRACK This command terminates when the EOT specified number of sectors have been read. If the 82078 does not find an I D Address Mark on the diskette after the second· occurrence of a pulse on the INDX# pin, then it sets the IC code in Status Regis- ter 0 to "01" (Abnormal termination), sets the MA bit in Status Register 1 to "1", and terminates the com- mand. * 6.1.6 VERIFY Refer to Table 6-6 and Table 6-7 for information concerning the values of MT and EC versus SC and EOT value. * Table 6·6. Result Phase Table * Table 6-7. Verify Command Result Phase Table Fix by aborting the transfer when EOT > # Sectors Per Side. Cc: qemu-sta...@nongnu.org Cc: Hervé Poussineau Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") Reported-by: Alexander Bulekov Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 Signed-off-by: Philippe Mathieu-Daudé --- hw/block/fdc.c | 8 1 file changed, 8 insertions(+) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index fa933cd3263..d21b717b7d6 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c
Re: [PATCH-for-6.2 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507
On 28 1257, Philippe Mathieu-Daudé wrote: > Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 > > Without the previous commit, when running 'make check-qtest-i386' > with QEMU configured with '--enable-sanitizers' we get: > > ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address > 0x61962a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 > READ of size 786432 at 0x61962a00 thread T0 > #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) > #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 > #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 > #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 > #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 > #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 > #6 0x5626d0bd5649 in cpu_physical_memory_write > include/exec/cpu-common.h:82:5 > #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 > #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 > #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 > #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 > #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 > #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 > > 0x61962a00 is located 0 bytes to the right of 512-byte region > [0x61962800,0x61962a00) > allocated by thread T0 here: > #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) > #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 > #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 > #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 > #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 > #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 > > SUMMARY: AddressSanitizer: heap-buffer-overflow > (qemu-system-i386+0x1e65919) in __asan_memcpy > Shadow bytes around the buggy address: > 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa > 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd > Shadow byte legend (one shadow byte represents 8 application bytes): > Addressable: 00 > Heap left redzone: fa > Freed heap region: fd > ==4028352==ABORTING > > Reported-by: Alexander Bulekov > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/qtest/fdc-test.c | 20 > 1 file changed, 20 insertions(+) > > diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c > index 26b69f7c5cd..f164d972d10 100644 > --- a/tests/qtest/fdc-test.c > +++ b/tests/qtest/fdc-test.c > @@ -546,6 +546,25 @@ static void fuzz_registers(void) > } > } > > +static void test_cve_2021_3507(void) > +{ > +QTestState *s; > + > +s = qtest_initf("-nographic -m 32M -nodefaults " > +"-drive file=%s,format=raw,if=floppy", test_image); Does it make sense to run this with -snapshot ? Reviewed-by: Alexander Bulekov > +qtest_outl(s, 0x9, 0x0a0206); > +qtest_outw(s, 0x3f4, 0x1600); > +qtest_outw(s, 0x3f4, 0x); > +qtest_outw(s, 0x3f4, 0x); > +qtest_outw(s, 0x3f4, 0x); > +qtest_outw(s, 0x3f4, 0x0200); > +qtest_outw(s, 0x3f4, 0x0200); > +qtest_outw(s, 0x3f4, 0x); > +qtest_outw(s, 0x3f4, 0x); > +qtest_outw(s, 0x3f4, 0x); > +qtest_quit(s); > +} > + > int main(int argc, char **argv) > { > int fd; > @@ -576,6 +595,7 @@ int main(int argc, char **argv) > qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18); > qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); > qtest_add_func("/fdc/fuzz-registers", fuzz_registers); > +qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); > > ret = g_test_run(); > > -- > 2.31.1 >
Re: [PULL 0/3] Block patches
On 11/23/21 4:59 PM, Hanna Reitz wrote: The following changes since commit 73e0f70e097b7c92a5ce16ee35b53afe119b20d7: Merge tag 'pull-lu-20211123' of https://gitlab.com/rth7680/qemu into staging (2021-11-23 11:33:14 +0100) are available in the Git repository at: https://gitlab.com/hreitz/qemu.git tags/pull-block-2021-11-23 for you to fetch changes up to 4dd218fd0717ed3cddb69c01eeb9da630107d89d: iotests/149: Skip on unsupported ciphers (2021-11-23 15:39:12 +0100) Block patches for 6.2-rc2: - Fix memory leak in vvfat when vvfat_open() fails - iotest fixes for the gnutls crypto backend Daniella Lee (1): block/vvfat.c fix leak when failure occurs Hanna Reitz (2): iotests: Use aes-128-cbc iotests/149: Skip on unsupported ciphers block/vvfat.c | 16 tests/qemu-iotests/149 | 23 ++- tests/qemu-iotests/206 | 4 ++-- tests/qemu-iotests/206.out | 6 +++--- tests/qemu-iotests/210 | 4 ++-- tests/qemu-iotests/210.out | 6 +++--- 6 files changed, 40 insertions(+), 19 deletions(-) Applied, thanks. r~
[PATCH 1/4] block_int: make bdrv_backing_overridden static
bdrv_backing_overridden is only used in block.c, so there is no need to leave it in block_int.h Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block_int.h | 3 --- block.c | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/block/block_int.h b/include/block/block_int.h index f4c75e8ba9..27008cfb22 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1122,9 +1122,6 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, QDict *options); -bool bdrv_backing_overridden(BlockDriverState *bs); - - /** * bdrv_add_aio_context_notifier: * diff --git a/block.c b/block.c index 0ac5b163d2..10346b5011 100644 --- a/block.c +++ b/block.c @@ -103,6 +103,8 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, static void bdrv_reopen_commit(BDRVReopenState *reopen_state); static void bdrv_reopen_abort(BDRVReopenState *reopen_state); +static bool bdrv_backing_overridden(BlockDriverState *bs); + /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -7475,7 +7477,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) /* Note: This function may return false positives; it may return true * even if opening the backing file specified by bs's image header * would result in exactly bs->backing. */ -bool bdrv_backing_overridden(BlockDriverState *bs) +static bool bdrv_backing_overridden(BlockDriverState *bs) { if (bs->backing) { return strcmp(bs->auto_backing_file, -- 2.27.0
[PATCH 3/4] include/sysemu/blockdev.h: move drive_add and inline drive_def
drive_add is only used in softmmu/vl.c, so it can be a static function there, and drive_def is only a particular use case of qemu_opts_parse_noisily, so it can be inlined. Also remove drive_mark_claimed_by_board, as it is only defined but not implemented (nor used) anywhere. Signed-off-by: Emanuele Giuseppe Esposito --- include/sysemu/blockdev.h | 6 ++ block/monitor/block-hmp-cmds.c | 2 +- blockdev.c | 27 +-- softmmu/vl.c | 25 - 4 files changed, 28 insertions(+), 32 deletions(-) diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index 32c2d6023c..aacc587a33 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -27,6 +27,8 @@ typedef enum { IF_COUNT } BlockInterfaceType; +extern const char *const block_if_name[]; + struct DriveInfo { BlockInterfaceType type; int bus; @@ -45,16 +47,12 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); void override_max_devs(BlockInterfaceType type, int max_devs); DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit); -void drive_mark_claimed_by_board(void); void drive_check_orphaned(void); DriveInfo *drive_get_by_index(BlockInterfaceType type, int index); int drive_get_max_bus(BlockInterfaceType type); int drive_get_max_devs(BlockInterfaceType type); DriveInfo *drive_get_next(BlockInterfaceType type); -QemuOpts *drive_def(const char *optstr); -QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, -const char *optstr); DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type, Error **errp); diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 2ac4aedfff..bfb3c043a0 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -101,7 +101,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict) return; } -opts = drive_def(optstr); +opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); if (!opts) return; diff --git a/blockdev.c b/blockdev.c index 1b6ffbbc73..928bb743a1 100644 --- a/blockdev.c +++ b/blockdev.c @@ -71,7 +71,7 @@ void bdrv_set_monitor_owned(BlockDriverState *bs) QTAILQ_INSERT_TAIL(_bdrv_states, bs, monitor_list); } -static const char *const block_if_name[IF_COUNT] = { +const char *const block_if_name[IF_COUNT] = { [IF_NONE] = "none", [IF_IDE] = "ide", [IF_SCSI] = "scsi", @@ -197,31 +197,6 @@ static int drive_index_to_unit_id(BlockInterfaceType type, int index) return max_devs ? index % max_devs : index; } -QemuOpts *drive_def(const char *optstr) -{ -return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); -} - -QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, -const char *optstr) -{ -QemuOpts *opts; - -opts = drive_def(optstr); -if (!opts) { -return NULL; -} -if (type != IF_DEFAULT) { -qemu_opt_set(opts, "if", block_if_name[type], _abort); -} -if (index >= 0) { -qemu_opt_set_number(opts, "index", index, _abort); -} -if (file) -qemu_opt_set(opts, "file", file, _abort); -return opts; -} - DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit) { BlockBackend *blk; diff --git a/softmmu/vl.c b/softmmu/vl.c index 1159a64bce..f47827926c 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -650,6 +650,27 @@ static int drive_enable_snapshot(void *opaque, QemuOpts *opts, Error **errp) return 0; } +static QemuOpts *drive_add(BlockInterfaceType type, int index, + const char *file, const char *optstr) +{ +QemuOpts *opts; + +opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); +if (!opts) { +return NULL; +} +if (type != IF_DEFAULT) { +qemu_opt_set(opts, "if", block_if_name[type], _abort); +} +if (index >= 0) { +qemu_opt_set_number(opts, "index", index, _abort); +} +if (file) { +qemu_opt_set(opts, "file", file, _abort); +} +return opts; +} + static void default_drive(int enable, int snapshot, BlockInterfaceType type, int index, const char *optstr) { @@ -2884,7 +2905,9 @@ void qemu_init(int argc, char **argv, char **envp) break; } case QEMU_OPTION_drive: -if (drive_def(optarg) == NULL) { +opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), + optarg, false); +if (opts == NULL) { exit(1); } break; -- 2.27.0
[PATCH v5 01/31] main-loop.h: introduce qemu_in_main_thread()
When invoked from the main loop, this function is the same as qemu_mutex_iothread_locked, and returns true if the BQL is held. When invoked from iothreads or tests, it returns true only if the current AioContext is the Main Loop. This essentially just extends qemu_mutex_iothread_locked to work also in unit tests or other users like storage-daemon, that run in the Main Loop but end up using the implementation in stubs/iothread-lock.c. Using qemu_mutex_iothread_locked in unit tests defaults to false because they use the implementation in stubs/iothread-lock, making all assertions added in next patches fail despite the AioContext is still the main loop. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- include/qemu/main-loop.h | 13 + softmmu/cpus.c | 5 + stubs/iothread-lock.c| 5 + 3 files changed, 23 insertions(+) diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 8dbc6fcb89..6b8fa57c5d 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -245,6 +245,19 @@ AioContext *iohandler_get_aio_context(void); */ bool qemu_mutex_iothread_locked(void); +/** + * qemu_in_main_thread: same as qemu_mutex_iothread_locked when + * softmmu/cpus.c implementation is linked. Otherwise this function + * checks that the current AioContext is the global AioContext + * (main loop). + * + * This is useful when checking that the BQL is held, to avoid + * returning false when invoked by unit tests or other users like + * storage-daemon that end up using stubs/iothread-lock.c + * implementation. + */ +bool qemu_in_main_thread(void); + /** * qemu_mutex_lock_iothread: Lock the main loop mutex. * diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 071085f840..3f61a3c31d 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -481,6 +481,11 @@ bool qemu_mutex_iothread_locked(void) return iothread_locked; } +bool qemu_in_main_thread(void) +{ +return qemu_mutex_iothread_locked(); +} + /* * The BQL is taken from so many places that it is worth profiling the * callers directly, instead of funneling them all through a single function. diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c index 5b45b7fc8b..ff7386e42c 100644 --- a/stubs/iothread-lock.c +++ b/stubs/iothread-lock.c @@ -6,6 +6,11 @@ bool qemu_mutex_iothread_locked(void) return false; } +bool qemu_in_main_thread(void) +{ +return qemu_get_current_aio_context() == qemu_get_aio_context(); +} + void qemu_mutex_lock_iothread_impl(const char *file, int line) { } -- 2.27.0
[PATCH v5 02/31] include/block/block: split header into I/O and global state API
block.h currently contains a mix of functions: some of them run under the BQL and modify the block layer graph, others are instead thread-safe and perform I/O in iothreads. It is not easy to understand which function is part of which group (I/O vs GS), and this patch aims to clarify it. The "GS" functions need the BQL, and often use aio_context_acquire/release and/or drain to be sure they can modify the graph safely. The I/O function are instead thread safe, and can run in any AioContext. By splitting the header in two files, block-io.h and block-global-state.h we have a clearer view on what needs what kind of protection. block-common.h contains common structures shared by both headers. block.h is left there for legacy and to avoid changing all includes in all c files that use the block APIs. Assertions are added in the next patch. Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block-common.h | 380 + include/block/block-global-state.h | 271 + include/block/block-io.h | 322 +++ include/block/block.h | 878 + block.c| 3 + block/meson.build | 7 +- 6 files changed, 1004 insertions(+), 857 deletions(-) create mode 100644 include/block/block-common.h create mode 100644 include/block/block-global-state.h create mode 100644 include/block/block-io.h diff --git a/include/block/block-common.h b/include/block/block-common.h new file mode 100644 index 00..dfaa73ac70 --- /dev/null +++ b/include/block/block-common.h @@ -0,0 +1,380 @@ +#ifndef BLOCK_COMMON_H +#define BLOCK_COMMON_H + +#include "block/aio.h" +#include "block/aio-wait.h" +#include "qemu/iov.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" +#include "qemu/hbitmap.h" +#include "qemu/transactions.h" + +/* + * generated_co_wrapper + * + * Function specifier, which does nothing but mark functions to be + * generated by scripts/block-coroutine-wrapper.py + * + * Read more in docs/devel/block-coroutine-wrapper.rst + */ +#define generated_co_wrapper + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildClass BdrvChildClass; + +typedef struct BlockDriverInfo { +/* in bytes, 0 if irrelevant */ +int cluster_size; +/* offset at which the VM state can be saved (0 if not possible) */ +int64_t vm_state_offset; +bool is_dirty; +/* + * True if this block driver only supports compressed writes + */ +bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { +uint64_t allocated_clusters; +uint64_t total_clusters; +uint64_t fragmented_clusters; +uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { +BDRV_REQ_COPY_ON_READ = 0x1, +BDRV_REQ_ZERO_WRITE = 0x2, + +/* + * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate + * that the block driver should unmap (discard) blocks if it is guaranteed + * that the result will read back as zeroes. The flag is only passed to the + * driver if the block device is opened with BDRV_O_UNMAP. + */ +BDRV_REQ_MAY_UNMAP = 0x4, + +BDRV_REQ_FUA= 0x10, +BDRV_REQ_WRITE_COMPRESSED = 0x20, + +/* + * Signifies that this write request will not change the visible disk + * content. + */ +BDRV_REQ_WRITE_UNCHANGED= 0x40, + +/* + * Forces request serialisation. Use only with write requests. + */ +BDRV_REQ_SERIALISING= 0x80, + +/* + * Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. + */ +BDRV_REQ_NO_FALLBACK= 0x100, + +/* + * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read + * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR + * filter is involved), in which case it signals that the COR operation + * need not read the data into memory (qiov) but only ensure they are + * copied to the top layer (i.e., that COR operation is done). + */ +BDRV_REQ_PREFETCH = 0x200, + +/* + * If we need to wait for other requests, just fail immediately. Used + * only together with BDRV_REQ_SERIALISING. + */ +BDRV_REQ_NO_WAIT = 0x400, + +/* Mask of valid flags */ +BDRV_REQ_MASK = 0x7ff, +} BdrvRequestFlags; + +#define BDRV_O_NO_SHARE0x0001 /* don't share permissions */ +#define BDRV_O_RDWR0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ +#define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save + writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
[PATCH v5 07/31] include/block/block_int: split header into I/O and global state API
Similarly to the previous patch, split block_int.h in block_int-io.h and block_int-global-state.h block_int-common.h contains the structures shared between the two headers, and the functions that can't be categorized as I/O or global state. Assertions are added in the next patch. Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block_int-common.h | 1161 +++ include/block/block_int-global-state.h | 315 + include/block/block_int-io.h | 167 +++ include/block/block_int.h | 1475 +--- blockdev.c |5 + 5 files changed, 1651 insertions(+), 1472 deletions(-) create mode 100644 include/block/block_int-common.h create mode 100644 include/block/block_int-global-state.h create mode 100644 include/block/block_int-io.h diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h new file mode 100644 index 00..70534f94ae --- /dev/null +++ b/include/block/block_int-common.h @@ -0,0 +1,1161 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_COMMON_H +#define BLOCK_INT_COMMON_H + +#include "block/accounting.h" +#include "block/block.h" +#include "block/aio-wait.h" +#include "qemu/queue.h" +#include "qemu/coroutine.h" +#include "qemu/stats64.h" +#include "qemu/timer.h" +#include "qemu/hbitmap.h" +#include "block/snapshot.h" +#include "qemu/throttle.h" +#include "qemu/rcu.h" + +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_ENCRYPT_FORMAT"encrypt.format" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_HWVERSION "hwversion" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE"table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT"subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS"lazy_refcounts" +#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY"redundancy" +#define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" +#define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" +#define BLOCK_OPT_DATA_FILE "data_file" +#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" +#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" +#define BLOCK_OPT_EXTL2 "extended_l2" + +#define BLOCK_PROBE_BUF_SIZE512 + +enum BdrvTrackedRequestType { +BDRV_TRACKED_READ, +BDRV_TRACKED_WRITE, +BDRV_TRACKED_DISCARD, +BDRV_TRACKED_TRUNCATE, +}; + +/* + * That is not quite good that BdrvTrackedRequest structure is public, + * as block/io.c is very careful about incoming offset/bytes being + * correct. Be sure to assert bdrv_check_request() succeeded after any + * modification of BdrvTrackedRequest object out of block/io.c + */ +typedef struct BdrvTrackedRequest { +BlockDriverState *bs; +int64_t offset; +int64_t bytes; +enum BdrvTrackedRequestType type; + +bool serialising; +int64_t overlap_offset; +int64_t overlap_bytes; + +QLIST_ENTRY(BdrvTrackedRequest) list; +Coroutine *co; /* owner, used for deadlock detection */ +CoQueue wait_queue; /* coroutines blocked on this request */ + +struct BdrvTrackedRequest *waiting_for; +} BdrvTrackedRequest; + + +struct BlockDriver { +const char *format_name; +int instance_size; + +/* + * Set to true if the BlockDriver is a block filter. Block filters pass + * certain callbacks that refer to data (see block.c) to their bs->file + * or
[PATCH v5 13/31] block.c: add assertions to static functions
Following the assertion derived from the API split, propagate the assertion also in the static functions. Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 45 - 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 5516c84ec4..b77ab0a104 100644 --- a/block.c +++ b/block.c @@ -434,6 +434,7 @@ BlockDriverState *bdrv_new(void) static BlockDriver *bdrv_do_find_format(const char *format_name) { BlockDriver *drv1; +assert(qemu_in_main_thread()); QLIST_FOREACH(drv1, _drivers, list) { if (!strcmp(drv1->format_name, format_name)) { @@ -593,6 +594,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, int64_t size; int ret; +assert(qemu_in_main_thread()); + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, _err); if (ret < 0 && ret != -ENOTSUP) { @@ -631,6 +634,8 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, int64_t bytes_to_clear; int ret; +assert(qemu_in_main_thread()); + bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); if (bytes_to_clear) { ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); @@ -891,6 +896,7 @@ static BlockDriver *find_hdev_driver(const char *filename) { int score_max = 0, score; BlockDriver *drv = NULL, *d; +assert(qemu_in_main_thread()); QLIST_FOREACH(d, _drivers, list) { if (d->bdrv_probe_device) { @@ -908,6 +914,7 @@ static BlockDriver *find_hdev_driver(const char *filename) static BlockDriver *bdrv_do_find_protocol(const char *protocol) { BlockDriver *drv1; +assert(qemu_in_main_thread()); QLIST_FOREACH(drv1, _drivers, list) { if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { @@ -1016,6 +1023,8 @@ static int find_image_format(BlockBackend *file, const char *filename, uint8_t buf[BLOCK_PROBE_BUF_SIZE]; int ret = 0; +assert(qemu_in_main_thread()); + /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { *pdrv = _raw; @@ -1097,6 +1106,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, BlockdevDetectZeroesOptions detect_zeroes = qapi_enum_parse(_lookup, value, BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, _err); +assert(qemu_in_main_thread()); g_free(value); if (local_err) { error_propagate(errp, local_err); @@ -1214,6 +1224,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child, static int bdrv_child_cb_inactivate(BdrvChild *child) { BlockDriverState *bs = child->opaque; +assert(qemu_in_main_thread()); assert(bs->open_flags & BDRV_O_INACTIVE); return 0; } @@ -1241,6 +1252,7 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; +assert(qemu_in_main_thread()); /* For temporary files, unconditional cache=unsafe is fine */ qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); @@ -1260,6 +1272,7 @@ static void bdrv_backing_attach(BdrvChild *c) BlockDriverState *parent = c->opaque; BlockDriverState *backing_hd = c->bs; +assert(qemu_in_main_thread()); assert(!parent->backing_blocker); error_setg(>backing_blocker, "node is used as backing hd of '%s'", @@ -1298,6 +1311,7 @@ static void bdrv_backing_detach(BdrvChild *c) { BlockDriverState *parent = c->opaque; +assert(qemu_in_main_thread()); assert(parent->backing_blocker); bdrv_op_unblock_all(c->bs, parent->backing_blocker); error_free(parent->backing_blocker); @@ -1310,6 +1324,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, BlockDriverState *parent = c->opaque; bool read_only = bdrv_is_read_only(parent); int ret; +assert(qemu_in_main_thread()); if (read_only) { ret = bdrv_reopen_set_read_only(parent, false, errp); @@ -1341,6 +1356,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, int parent_flags, QDict *parent_options) { int flags = parent_flags; +assert(qemu_in_main_thread()); /* * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. @@ -1479,6 +1495,7 @@ AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags; +assert(qemu_in_main_thread()); /* * Clear flags that are internal to the block layer before opening the @@ -1491,6 +1508,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
[PATCH v5 11/31] include/block/blockjob_int.h: split header into I/O and GS API
Since the I/O functions are not many, keep a single file. Also split the function pointers in BlockJobDriver. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- include/block/blockjob_int.h | 28 1 file changed, 28 insertions(+) diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 6633d83da2..718d7b92d2 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -38,6 +38,13 @@ struct BlockJobDriver { /** Generic JobDriver callbacks and settings */ JobDriver job_driver; +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + /* * Returns whether the job has pending requests for the child or will * submit new requests before the next pause point. This callback is polled @@ -46,6 +53,13 @@ struct BlockJobDriver { */ bool (*drained_poll)(BlockJob *job); +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /* * If the callback is not NULL, it will be invoked before the job is * resumed in a new AioContext. This is the place to move any resources @@ -56,6 +70,13 @@ struct BlockJobDriver { void (*set_speed)(BlockJob *job, int64_t speed); }; +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_create: * @job_id: The id of the newly-created job, or %NULL to have one @@ -98,6 +119,13 @@ void block_job_free(Job *job); */ void block_job_user_resume(Job *job); +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + /** * block_job_ratelimit_get_delay: * -- 2.27.0
[PATCH v5 15/31] assertions for blockjob.h global state API
Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- blockjob.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/blockjob.c b/blockjob.c index 10c807413e..74476af473 100644 --- a/blockjob.c +++ b/blockjob.c @@ -62,6 +62,7 @@ static bool is_block_job(Job *job) BlockJob *block_job_next(BlockJob *bjob) { Job *job = bjob ? >job : NULL; +assert(qemu_in_main_thread()); do { job = job_next(job); @@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob) BlockJob *block_job_get(const char *id) { Job *job = job_get(id); +assert(qemu_in_main_thread()); if (job && is_block_job(job)) { return container_of(job, BlockJob, job); @@ -185,6 +187,7 @@ static const BdrvChildClass child_job = { void block_job_remove_all_bdrv(BlockJob *job) { +assert(qemu_in_main_thread()); /* * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), * which will also traverse job->nodes, so consume the list one by @@ -207,6 +210,7 @@ void block_job_remove_all_bdrv(BlockJob *job) bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) { GSList *el; +assert(qemu_in_main_thread()); for (el = job->nodes; el; el = el->next) { BdrvChild *c = el->data; @@ -223,6 +227,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, { BdrvChild *c; bool need_context_ops; +assert(qemu_in_main_thread()); bdrv_ref(bs); @@ -272,6 +277,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) const BlockJobDriver *drv = block_job_driver(job); int64_t old_speed = job->speed; +assert(qemu_in_main_thread()); + if (job_apply_verb(>job, JOB_VERB_SET_SPEED, errp) < 0) { return false; } @@ -309,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp) BlockJobInfo *info; uint64_t progress_current, progress_total; +assert(qemu_in_main_thread()); + if (block_job_is_internal(job)) { error_setg(errp, "Cannot query QEMU internal jobs"); return NULL; @@ -498,6 +507,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, void block_job_iostatus_reset(BlockJob *job) { +assert(qemu_in_main_thread()); if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { return; } -- 2.27.0
[PATCH v5 24/31] block_int-common.h: assertions in the callers of BdrvChildClass function pointers
Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 9 + 1 file changed, 9 insertions(+) diff --git a/block.c b/block.c index 180884b8c0..a0309f827d 100644 --- a/block.c +++ b/block.c @@ -1491,6 +1491,7 @@ const BdrvChildClass child_of_bds = { AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) { +assert(qemu_in_main_thread()); return c->klass->get_parent_aio_context(c); } @@ -2120,6 +2121,7 @@ bool bdrv_is_writable(BlockDriverState *bs) static char *bdrv_child_user_desc(BdrvChild *c) { +assert(qemu_in_main_thread()); return c->klass->get_parent_desc(c); } @@ -2832,6 +2834,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, assert(!child->frozen); assert(old_bs != new_bs); +assert(qemu_in_main_thread()); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); @@ -2928,6 +2931,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; +assert(qemu_in_main_thread()); /* * Pass free_empty_child=false, because we still need the child * for the AioContext operations on the parent below; those @@ -3296,6 +3300,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) { BdrvChild *c; +assert(qemu_in_main_thread()); QLIST_FOREACH(c, >parents, next_parent) { if (c->klass->change_media) { c->klass->change_media(c, load); @@ -3792,6 +3797,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, assert(!child_class || !flags); assert(!child_class == !parent); +assert(qemu_in_main_thread()); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -4178,6 +4184,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, * important to avoid graph changes between the recursive queuing here and * bdrv_reopen_multiple(). */ assert(bs->quiesce_counter > 0); +assert(qemu_in_main_thread()); if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); @@ -7271,6 +7278,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, BdrvChild *child, *parent; g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +assert(qemu_in_main_thread()); if (old_context == new_context) { return; @@ -7343,6 +7351,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { +assert(qemu_in_main_thread()); if (g_slist_find(*ignore, c)) { return true; } -- 2.27.0
[PATCH v5 09/31] block: introduce assert_bdrv_graph_writable
We want to be sure that the functions that write the child and parent list of a bs are under BQL and drain. BQL prevents from concurrent writings from the GS API, while drains protect from I/O. TODO: drains are missing in some functions using this assert. Therefore a proper assertion will fail. Because adding drains requires additional discussions, they will be added in future series. Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block_int-global-state.h | 10 +- block.c| 4 block/io.c | 11 +++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index a1b7d0579d..fa96e8b449 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -312,4 +312,12 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, */ void bdrv_drain_all_end_quiesce(BlockDriverState *bs); -#endif /* BLOCK_INT_GLOBAL_STATE*/ +/** + * Make sure that the function is running under both drain and BQL. + * The latter protects from concurrent writings + * from the GS API, while the former prevents concurrent reads + * from I/O. + */ +void assert_bdrv_graph_writable(BlockDriverState *bs); + +#endif /* BLOCK_INT_GLOBAL_STATE */ diff --git a/block.c b/block.c index 198ec636ff..522a273140 100644 --- a/block.c +++ b/block.c @@ -1416,6 +1416,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; +assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(>children, child, next); if (child->role & BDRV_CHILD_COW) { @@ -1435,6 +1436,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_unapply_subtree_drain(child, bs); +assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); } @@ -2818,6 +2820,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, if (child->klass->detach) { child->klass->detach(child); } +assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } @@ -2827,6 +2830,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } if (new_bs) { +assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(_bs->parents, child, next_parent); /* diff --git a/block/io.c b/block/io.c index cb095deeec..3be08cad29 100644 --- a/block/io.c +++ b/block/io.c @@ -734,6 +734,17 @@ void bdrv_drain_all(void) bdrv_drain_all_end(); } +void assert_bdrv_graph_writable(BlockDriverState *bs) +{ +/* + * TODO: this function is incomplete. Because the users of this + * assert lack the necessary drains, check only for BQL. + * Once the necessary drains are added, + * assert also for qatomic_read(>quiesce_counter) > 0 + */ +assert(qemu_in_main_thread()); +} + /** * Remove an active request from the tracked requests list * -- 2.27.0
[PATCH v5 26/31] job.h: split function pointers in JobDriver
The job API will be handled separately in another serie. Signed-off-by: Emanuele Giuseppe Esposito --- include/qemu/job.h | 22 ++ 1 file changed, 22 insertions(+) diff --git a/include/qemu/job.h b/include/qemu/job.h index 6e67b6977f..4ea7a4a0cd 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -169,6 +169,12 @@ typedef struct Job { * Callbacks and other information about a Job driver. */ struct JobDriver { + +/* + * These fields are initialized when this object is created, + * and are never changed afterwards + */ + /** Derived Job struct size */ size_t instance_size; @@ -184,9 +190,18 @@ struct JobDriver { * aborted. If it returns zero, the job moves into the WAITING state. If it * is the last job to complete in its transaction, all jobs in the * transaction move from WAITING to PENDING. + * + * This callback must be run in the job's context. */ int coroutine_fn (*run)(Job *job, Error **errp); +/* + * Functions run without regard to the BQL that may run in any + * arbitrary thread. These functions do not need to be thread-safe + * because the caller ensures that they are invoked from one + * thread at time. + */ + /** * If the callback is not NULL, it will be invoked when the job transitions * into the paused state. Paused jobs must not perform any asynchronous @@ -201,6 +216,13 @@ struct JobDriver { */ void coroutine_fn (*resume)(Job *job); +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * Called when the job is resumed by the user (i.e. user_paused becomes * false). .user_resume is called before .resume. -- 2.27.0
[PATCH v5 29/31] jobs: introduce pre_run function in JobDriver
.pre_run takes care of doing some initial job setup just before the job is run inside the coroutine. Doing so can be useful to invoke GS functions that require the BQL held. Signed-off-by: Emanuele Giuseppe Esposito --- include/qemu/job.h | 9 + job.c | 13 + 2 files changed, 22 insertions(+) diff --git a/include/qemu/job.h b/include/qemu/job.h index 4ea7a4a0cd..915ceff425 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -223,6 +223,15 @@ struct JobDriver { * the GS API. */ +/** + * Called in the Main Loop context, before the job coroutine + * is started in the job aiocontext. Useful to perform operations + * that needs to be done under BQL (like permissions setup). + * On success, it returns 0. Otherwise the job failed to initialize + * and won't start. + */ +int (*pre_run)(Job *job, Error **errp); + /** * Called when the job is resumed by the user (i.e. user_paused becomes * false). .user_resume is called before .resume. diff --git a/job.c b/job.c index bb57ec6887..e048037099 100644 --- a/job.c +++ b/job.c @@ -967,11 +967,24 @@ static void coroutine_fn job_co_entry(void *opaque) aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); } +static int job_pre_run(Job *job) +{ +assert(qemu_in_main_thread()); +if (job->driver->pre_run) { +return job->driver->pre_run(job, >err); +} + +return 0; +} + void job_start(Job *job) { assert(job && !job_started(job) && job->paused && job->driver && job->driver->run); job->co = qemu_coroutine_create(job_co_entry, job); +if (job_pre_run(job)) { +return; +} job->pause_count--; job->busy = true; job->paused = false; -- 2.27.0
[PATCH v5 17/31] assertions for blockdev.h global state API
Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi --- block/block-backend.c | 3 +++ blockdev.c| 15 +++ 2 files changed, 18 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 56670a774f..aad6d9a4c3 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -794,6 +794,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) */ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) { +assert(qemu_in_main_thread()); return blk->legacy_dinfo; } @@ -805,6 +806,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) { assert(!blk->legacy_dinfo); +assert(qemu_in_main_thread()); return blk->legacy_dinfo = dinfo; } @@ -815,6 +817,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) { BlockBackend *blk = NULL; +assert(qemu_in_main_thread()); while ((blk = blk_next(blk)) != NULL) { if (blk->legacy_dinfo == dinfo) { diff --git a/blockdev.c b/blockdev.c index 7706919410..eebd9317c2 100644 --- a/blockdev.c +++ b/blockdev.c @@ -113,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs) BlockBackend *blk; DriveInfo *dinfo; +assert(qemu_in_main_thread()); + if (max_devs <= 0) { return; } @@ -142,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk) DriveInfo *dinfo = blk_legacy_dinfo(blk); BlockJob *job; +assert(qemu_in_main_thread()); + if (!dinfo) { return; } @@ -163,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk) void blockdev_auto_del(BlockBackend *blk) { DriveInfo *dinfo = blk_legacy_dinfo(blk); +assert(qemu_in_main_thread()); if (dinfo && dinfo->auto_del) { monitor_remove_blk(blk); @@ -187,6 +192,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit) BlockBackend *blk; DriveInfo *dinfo; +assert(qemu_in_main_thread()); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); if (dinfo && dinfo->type == type @@ -209,6 +216,8 @@ void drive_check_orphaned(void) Location loc; bool orphans = false; +assert(qemu_in_main_thread()); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); /* @@ -242,6 +251,7 @@ void drive_check_orphaned(void) DriveInfo *drive_get_by_index(BlockInterfaceType type, int index) { +assert(qemu_in_main_thread()); return drive_get(type, drive_index_to_bus_id(type, index), drive_index_to_unit_id(type, index)); @@ -253,6 +263,8 @@ int drive_get_max_bus(BlockInterfaceType type) BlockBackend *blk; DriveInfo *dinfo; +assert(qemu_in_main_thread()); + max_bus = -1; for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); @@ -269,6 +281,7 @@ int drive_get_max_bus(BlockInterfaceType type) DriveInfo *drive_get_next(BlockInterfaceType type) { static int next_block_unit[IF_COUNT]; +assert(qemu_in_main_thread()); return drive_get(type, 0, next_block_unit[type]++); } @@ -749,6 +762,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, const char *filename; int i; +assert(qemu_in_main_thread()); + /* Change legacy command line options into QMP ones */ static const struct { const char *from; -- 2.27.0
[PATCH v5 18/31] include/block/snapshot: global state API + assertions
Snapshots run also under the BQL lock, so they all are in the global state API. The aiocontext lock that they hold is currently an overkill and in future could be removed. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi --- include/block/snapshot.h | 13 +++-- block/snapshot.c | 28 migration/savevm.c | 2 ++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 940345692f..cda82c1ba1 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo { uint64_t icount; /* record/replay step */ } QEMUSnapshotInfo; +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, const char *name); bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, @@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, Error **errp); -/* Group operations. All block drivers are involved. +/* + * Group operations. All block drivers are involved. * These functions will properly handle dataplane (take aio_context_acquire - * when appropriate for appropriate block drivers */ + * when appropriate for appropriate block drivers + */ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, Error **errp); diff --git a/block/snapshot.c b/block/snapshot.c index ccacda8bd5..3dd3c9d3bd 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, QEMUSnapshotInfo *sn_tab, *sn; int nb_sns, i, ret; +assert(qemu_in_main_thread()); + ret = -ENOENT; nb_sns = bdrv_snapshot_list(bs, _tab); if (nb_sns < 0) { @@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, bool ret = false; assert(id || name); +assert(qemu_in_main_thread()); nb_sns = bdrv_snapshot_list(bs, _tab); if (nb_sns < 0) { @@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) int bdrv_can_snapshot(BlockDriverState *bs) { BlockDriver *drv = bs->drv; +assert(qemu_in_main_thread()); if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { return 0; } @@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + +assert(qemu_in_main_thread()); + if (!drv) { return -ENOMEDIUM; } @@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, BdrvChild **fallback_ptr; int ret, open_ret; +assert(qemu_in_main_thread()); + if (!drv) { error_setg(errp, "Block driver is closed"); return -ENOMEDIUM; @@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); int ret; +assert(qemu_in_main_thread()); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + +assert(qemu_in_main_thread()); if (!drv) { return -ENOMEDIUM; } @@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, { BlockDriver *drv = bs->drv; +assert(qemu_in_main_thread()); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, int ret; Error *local_err = NULL; +assert(qemu_in_main_thread()); + ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, _err); if (ret == -ENOENT || ret == -EINVAL) { error_free(local_err); @@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; +assert(qemu_in_main_thread()); + if (bdrv_all_get_snapshot_devices(has_devices, devices, , errp) < 0) { return false; } @@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; +assert(qemu_in_main_thread()); + if (bdrv_all_get_snapshot_devices(has_devices, devices, , errp) < 0) { return -1; } @@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; +assert(qemu_in_main_thread()); + if
[PATCH v5 31/31] block.c: assertions to the block layer permissions API
Now that we "covered" the three main cases where the permission API was being used under BQL (fuse, amend and invalidate_cache), we can safely assert for the permission functions implemented in block.c Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 12 1 file changed, 12 insertions(+) diff --git a/block.c b/block.c index 805974676b..6056ec4bc5 100644 --- a/block.c +++ b/block.c @@ -2138,6 +2138,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) assert(a->bs); assert(a->bs == b->bs); +assert(qemu_in_main_thread()); if ((b->perm & a->shared_perm) == b->perm) { return true; @@ -2161,6 +2162,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) { BdrvChild *a, *b; +assert(qemu_in_main_thread()); /* * During the loop we'll look at each pair twice. That's correct because @@ -2245,6 +2247,8 @@ static void bdrv_child_set_perm_abort(void *opaque) { BdrvChildSetPermState *s = opaque; +assert(qemu_in_main_thread()); + s->child->perm = s->old_perm; s->child->shared_perm = s->old_shared_perm; } @@ -2258,6 +2262,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran) { BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); +assert(qemu_in_main_thread()); *s = (BdrvChildSetPermState) { .child = c, @@ -2442,6 +2447,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, BdrvChild *c; int ret; uint64_t cumulative_perms, cumulative_shared_perms; +assert(qemu_in_main_thread()); bdrv_get_cumulative_perm(bs, _perms, _shared_perms); @@ -2510,6 +2516,7 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, { int ret; BlockDriverState *bs; +assert(qemu_in_main_thread()); for ( ; list; list = list->next) { bs = list->data; @@ -2582,6 +2589,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) int ret; Transaction *tran = tran_new(); g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); +assert(qemu_in_main_thread()); ret = bdrv_list_refresh_perms(list, NULL, tran, errp); tran_finalize(tran, ret); @@ -2648,6 +2656,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { +assert(qemu_in_main_thread()); *nperm = perm & DEFAULT_PERM_PASSTHROUGH; *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; } @@ -2659,6 +2668,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared) { assert(role & BDRV_CHILD_COW); +assert(qemu_in_main_thread()); /* * We want consistent read from backing files if the parent needs it. @@ -2696,6 +2706,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, { int flags; +assert(qemu_in_main_thread()); assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); flags = bdrv_reopen_get_flags(reopen_queue, bs); @@ -6094,6 +6105,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, { BlockPermission qapi_perm; XDbgBlockGraphEdge *edge; +assert(qemu_in_main_thread()); edge = g_new0(XDbgBlockGraphEdge, 1); -- 2.27.0
[PATCH v5 27/31] job.h: assertions in the callers of JobDriver funcion pointers
Also assert that job->run() callback is called in the job aiocontext. Signed-off-by: Emanuele Giuseppe Esposito --- job.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/job.c b/job.c index dbfa67bb0a..bb57ec6887 100644 --- a/job.c +++ b/job.c @@ -380,6 +380,8 @@ void job_ref(Job *job) void job_unref(Job *job) { +assert(qemu_in_main_thread()); + if (--job->refcnt == 0) { assert(job->status == JOB_STATUS_NULL); assert(!timer_pending(>sleep_timer)); @@ -601,6 +603,7 @@ bool job_user_paused(Job *job) void job_user_resume(Job *job, Error **errp) { assert(job); +assert(qemu_in_main_thread()); if (!job->user_paused || job->pause_count <= 0) { error_setg(errp, "Can't resume a job that was not paused"); return; @@ -671,6 +674,7 @@ static void job_update_rc(Job *job) static void job_commit(Job *job) { assert(!job->ret); +assert(qemu_in_main_thread()); if (job->driver->commit) { job->driver->commit(job); } @@ -679,6 +683,7 @@ static void job_commit(Job *job) static void job_abort(Job *job) { assert(job->ret); +assert(qemu_in_main_thread()); if (job->driver->abort) { job->driver->abort(job); } @@ -686,6 +691,7 @@ static void job_abort(Job *job) static void job_clean(Job *job) { +assert(qemu_in_main_thread()); if (job->driver->clean) { job->driver->clean(job); } @@ -725,6 +731,7 @@ static int job_finalize_single(Job *job) static void job_cancel_async(Job *job, bool force) { +assert(qemu_in_main_thread()); if (job->driver->cancel) { force = job->driver->cancel(job, force); } else { @@ -824,6 +831,7 @@ static void job_completed_txn_abort(Job *job) static int job_prepare(Job *job) { +assert(qemu_in_main_thread()); if (job->ret == 0 && job->driver->prepare) { job->ret = job->driver->prepare(job); job_update_rc(job); @@ -950,6 +958,7 @@ static void coroutine_fn job_co_entry(void *opaque) { Job *job = opaque; +assert(job->aio_context == qemu_get_current_aio_context()); assert(job && job->driver && job->driver->run); job_pause_point(job); job->ret = job->driver->run(job, >err); @@ -1053,6 +1062,7 @@ void job_complete(Job *job, Error **errp) { /* Should not be reachable via external interface for internal jobs */ assert(job->id); +assert(qemu_in_main_thread()); if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { return; } -- 2.27.0
Re: [PATCH 2/4] include/sysemu/blockdev.h: rename if_name in block_if_name
On 11/24/21 07:36, Emanuele Giuseppe Esposito wrote: > In preparation to next patch, where we export it to be used > also in softmmu/vlc.c "vl.c"? :) > > Signed-off-by: Emanuele Giuseppe Esposito > --- > blockdev.c | 14 +++--- > 1 file changed, 7 insertions(+), 7 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
[PATCH 0/4] block: minor refactoring in preparation to the block layer API split
These patches are taken from my old patches and feedback of my series "block layer: split block APIs in global state and I/O". The reason for a separate series is that the original one is already too long, and these patches are just refactoring the code, mainly deleting or moving functions in blockdev.h and block_int.h. Signed-off-by: Emanuele Giuseppe Esposito Emanuele Giuseppe Esposito (4): block_int: make bdrv_backing_overridden static include/sysemu/blockdev.h: rename if_name in block_if_name include/sysemu/blockdev.h: move drive_add and inline drive_def include/sysemu/blockdev.h: remove drive_get_max_devs include/block/block_int.h | 3 -- include/sysemu/blockdev.h | 7 ++--- block.c| 4 ++- block/monitor/block-hmp-cmds.c | 2 +- blockdev.c | 54 -- softmmu/vl.c | 25 +++- 6 files changed, 36 insertions(+), 59 deletions(-) -- 2.27.0
[PATCH 4/4] include/sysemu/blockdev.h: remove drive_get_max_devs
Remove drive_get_max_devs, as it is not used by anyone. Signed-off-by: Emanuele Giuseppe Esposito --- include/sysemu/blockdev.h | 1 - blockdev.c| 17 - 2 files changed, 18 deletions(-) diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index aacc587a33..c4b7b8b54e 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -50,7 +50,6 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit); void drive_check_orphaned(void); DriveInfo *drive_get_by_index(BlockInterfaceType type, int index); int drive_get_max_bus(BlockInterfaceType type); -int drive_get_max_devs(BlockInterfaceType type); DriveInfo *drive_get_next(BlockInterfaceType type); DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type, diff --git a/blockdev.c b/blockdev.c index 928bb743a1..84897f9d70 100644 --- a/blockdev.c +++ b/blockdev.c @@ -168,23 +168,6 @@ void blockdev_auto_del(BlockBackend *blk) } } -/** - * Returns the current mapping of how many units per bus - * a particular interface can support. - * - * A positive integer indicates n units per bus. - * 0 implies the mapping has not been established. - * -1 indicates an invalid BlockInterfaceType was given. - */ -int drive_get_max_devs(BlockInterfaceType type) -{ -if (type >= IF_IDE && type < IF_COUNT) { -return if_max_devs[type]; -} - -return -1; -} - static int drive_index_to_bus_id(BlockInterfaceType type, int index) { int max_devs = if_max_devs[type]; -- 2.27.0
[PATCH 2/4] include/sysemu/blockdev.h: rename if_name in block_if_name
In preparation to next patch, where we export it to be used also in softmmu/vlc.c Signed-off-by: Emanuele Giuseppe Esposito --- blockdev.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/blockdev.c b/blockdev.c index b35072644e..1b6ffbbc73 100644 --- a/blockdev.c +++ b/blockdev.c @@ -71,7 +71,7 @@ void bdrv_set_monitor_owned(BlockDriverState *bs) QTAILQ_INSERT_TAIL(_bdrv_states, bs, monitor_list); } -static const char *const if_name[IF_COUNT] = { +static const char *const block_if_name[IF_COUNT] = { [IF_NONE] = "none", [IF_IDE] = "ide", [IF_SCSI] = "scsi", @@ -120,7 +120,7 @@ void override_max_devs(BlockInterfaceType type, int max_devs) if (dinfo->type == type) { fprintf(stderr, "Cannot override units-per-bus property of" " the %s interface, because a drive of that type has" -" already been added.\n", if_name[type]); +" already been added.\n", block_if_name[type]); g_assert_not_reached(); } } @@ -212,7 +212,7 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, return NULL; } if (type != IF_DEFAULT) { -qemu_opt_set(opts, "if", if_name[type], _abort); +qemu_opt_set(opts, "if", block_if_name[type], _abort); } if (index >= 0) { qemu_opt_set_number(opts, "index", index, _abort); @@ -269,7 +269,7 @@ void drive_check_orphaned(void) qemu_opts_loc_restore(dinfo->opts); error_report("machine type does not support" " if=%s,bus=%d,unit=%d", - if_name[dinfo->type], dinfo->bus, dinfo->unit); + block_if_name[dinfo->type], dinfo->bus, dinfo->unit); loc_pop(); orphans = true; } @@ -887,7 +887,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, value = qemu_opt_get(legacy_opts, "if"); if (value) { for (type = 0; - type < IF_COUNT && strcmp(value, if_name[type]); + type < IF_COUNT && strcmp(value, block_if_name[type]); type++) { } if (type == IF_COUNT) { @@ -945,10 +945,10 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd"; } if (max_devs) { -new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id, +new_id = g_strdup_printf("%s%i%s%i", block_if_name[type], bus_id, mediastr, unit_id); } else { -new_id = g_strdup_printf("%s%s%i", if_name[type], +new_id = g_strdup_printf("%s%s%i", block_if_name[type], mediastr, unit_id); } qdict_put_str(bs_opts, "id", new_id); -- 2.27.0
[PATCH v5 00/31] block layer: split block APIs in global state and I/O
Currently, block layer APIs like block.h contain a mix of functions that are either running in the main loop and under the BQL, or are thread-safe functions and run in iothreads performing I/O. The functions running under BQL also take care of modifying the block graph, by using drain and/or aio_context_acquire/release. This makes it very confusing to understand where each function runs, and what assumptions it provided with regards to thread safety. We call the functions running under BQL "global state (GS) API", and distinguish them from the thread-safe "I/O API". The aim of this series is to split the relevant block headers in global state and I/O sub-headers. The division will be done in this way: header.h will be split in header-global-state.h, header-io.h and header-common.h. The latter will just contain the data structures needed by header-global-state and header-io, and common helpers that are neither in GS nor in I/O. header.h will remain for legacy and to avoid changing all includes in all QEMU c files, but will only include the two new headers. No function shall be added in header.c . Once we split all relevant headers, it will be much easier to see what uses the AioContext lock and remove it, which is the overall main goal of this and other series that I posted/will post. In addition to splitting the relevant headers shown in this series, it is also very helpful splitting the function pointers in some block structures, to understand what runs under AioContext lock and what doesn't. This is what patches 21-27 do. Each function in the GS API will have an assertion, checking that it is always running under BQL. I/O functions are instead thread safe (or so should be), meaning that they *can* run under BQL, but also in an iothread in another AioContext. Therefore they do not provide any assertion, and need to be audited manually to verify the correctness. Adding assetions has helped finding 2 bugs already, as shown in my series "Migration: fix missing iothread locking". Tested this series by running unit tests, qemu-iotests and qtests (x86_64). Some functions in the GS API are used everywhere but not properly tested. Therefore their assertion is never actually run in the tests, so despite my very careful auditing, it is not impossible to exclude that some will trigger while actually using QEMU. Patch 1 introduces qemu_in_main_thread(), the function used in all assertions. This had to be introduced otherwise all unit tests would fail, since they run in the main loop but use the code in stubs/iothread.c Patches 2-27 (with the exception of patch 9-10, that are an additional assert) are all structured in the same way: first we split the header and in the next (or same, if small) patch we add assertions. Patch 28-31 take care instead of the block layer permission API, fixing some bugs where they are used in I/O functions. Next steps once this get reviewed: 1) audit the GS API and replace the AioContext lock with drains, or remove them when not necessary (requires further discussion). 2) [optional as it should be already the case] audit the I/O API and check that thread safety is guaranteed In order to keep this series a little bit smaller, move some refactoring patches in another series, "block: minor refactoring in preparation to the block layer API split". Based-on: <20211124063640.3118897-1-eespo...@redhat.com> Signed-off-by: Emanuele Giuseppe Esposito --- v5 -> v6: * In short, apply all Hanna's comments. More in details, the following functions in the following headers have been moved: block-backend: blk_replace_bs (to gs) blk_nb_sectors (to io) blk_name (to io) blk_set_perm (to io) blk_get_perm (to io) blk_drain (to io) blk_abort_aio_request (to io) blk_make_empty (to gs) blk_invalidate_cache (was in io, but had GS assertion) blk_aio_cancel (to gs) block: bdrv_replace_child_bs (to gs) bdrv_get_device_name (to io) bdrv_get_device_or_node_name (to io) bdrv_drained_end_no_poll (to io) bdrv_block_status (was in io, but had GS assertion) bdrv_drain (to io) bdrv_co_drain (to io) bdrv_make_zero (was in GS, but did not have the assertion) bdrv_save_vmstate (to io) bdrv_load_vmstate (to io) bdrv_aio_cancel_async (to io) block_int: bdrv_get_parent_name (to io) bdrv_apply_subtree_drain (to io) bdrv_unapply_subtree_drain (to io) bdrv_co_copy_range_from (was in io, but had GS assertion) bdrv_co_copy_range_to (was in io, but had GS assertion) ->bdrv_save_vmstate (to io) ->bdrv_load_vmstate (to io) coding style (assertion after definitions): bdrv_save_vmstate bdrv_load_vmstate block_job_next block_job_get new patches: block.c: modify .attach and .detach callbacks of child_of_bds introduce pre_run as JobDriver callback to handle bdrv_co_amend usage of permission function leave blk_set/get_perm as a TODO in fuse.c make sure
[PATCH v5 03/31] assertions for block global state API
All the global state (GS) API functions will check that qemu_in_main_thread() returns true. If not, it means that the safety of BQL cannot be guaranteed, and they need to be moved to I/O. Signed-off-by: Emanuele Giuseppe Esposito --- block.c| 135 - block/commit.c | 2 + block/io.c | 14 + blockdev.c | 1 + 4 files changed, 150 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 84de6867e6..49bee69e27 100644 --- a/block.c +++ b/block.c @@ -278,6 +278,8 @@ bool bdrv_is_read_only(BlockDriverState *bs) int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, bool ignore_allow_rdw, Error **errp) { +assert(qemu_in_main_thread()); + /* Do not set read_only if copy_on_read is enabled */ if (bs->copy_on_read && read_only) { error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", @@ -311,6 +313,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, Error **errp) { int ret = 0; +assert(qemu_in_main_thread()); if (!(bs->open_flags & BDRV_O_RDWR)) { return 0; @@ -393,6 +396,7 @@ char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) void bdrv_register(BlockDriver *bdrv) { assert(bdrv->format_name); +assert(qemu_in_main_thread()); QLIST_INSERT_HEAD(_drivers, bdrv, list); } @@ -401,6 +405,8 @@ BlockDriverState *bdrv_new(void) BlockDriverState *bs; int i; +assert(qemu_in_main_thread()); + bs = g_new0(BlockDriverState, 1); QLIST_INIT(>dirty_bitmaps); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { @@ -443,6 +449,8 @@ BlockDriver *bdrv_find_format(const char *format_name) BlockDriver *drv1; int i; +assert(qemu_in_main_thread()); + drv1 = bdrv_do_find_format(format_name); if (drv1) { return drv1; @@ -492,11 +500,13 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) { +assert(qemu_in_main_thread()); return bdrv_format_is_whitelisted(drv->format_name, read_only); } bool bdrv_uses_whitelist(void) { +assert(qemu_in_main_thread()); return use_bdrv_whitelist; } @@ -527,6 +537,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, { int ret; +assert(qemu_in_main_thread()); + Coroutine *co; CreateCo cco = { .drv = drv, @@ -702,6 +714,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) QDict *qdict; int ret; +assert(qemu_in_main_thread()); + drv = bdrv_find_protocol(filename, true, errp); if (drv == NULL) { return -ENOENT; @@ -799,6 +813,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); +assert(qemu_in_main_thread()); if (drv && drv->bdrv_probe_blocksizes) { return drv->bdrv_probe_blocksizes(bs, bsz); @@ -819,6 +834,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); +assert(qemu_in_main_thread()); if (drv && drv->bdrv_probe_geometry) { return drv->bdrv_probe_geometry(bs, geo); @@ -910,6 +926,7 @@ BlockDriver *bdrv_find_protocol(const char *filename, const char *p; int i; +assert(qemu_in_main_thread()); /* TODO Drivers without bdrv_file_open must be specified explicitly */ /* @@ -975,6 +992,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, { int score_max = 0, score; BlockDriver *drv = NULL, *d; +assert(qemu_in_main_thread()); QLIST_FOREACH(d, _drivers, list) { if (d->bdrv_probe) { @@ -1122,6 +1140,7 @@ int bdrv_parse_aio(const char *mode, int *flags) */ int bdrv_parse_discard_flags(const char *mode, int *flags) { +assert(qemu_in_main_thread()); *flags &= ~BDRV_O_UNMAP; if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { @@ -1142,6 +1161,7 @@ int bdrv_parse_discard_flags(const char *mode, int *flags) */ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) { +assert(qemu_in_main_thread()); *flags &= ~BDRV_O_CACHE_MASK; if (!strcmp(mode, "off") || !strcmp(mode, "none")) { @@ -1634,6 +1654,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bs; int ret; +assert(qemu_in_main_thread()); + bs = bdrv_new(); bs->open_flags = flags; bs->options = options ?: qdict_new(); @@ -1659,6 +1681,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp) { +
[PATCH v5 05/31] block-backend: special comments for blk_set/get_perm due to fuse
Fuse logic can be classified as I/O, so there is no BQL held during its execution. And yet, it uses blk_{get/set}_perm functions, that are classified as BQL and clearly require the BQL lock. Since there is no easy solution for this, add a couple of TODOs and FIXME in the relevant sections of the code. Signed-off-by: Emanuele Giuseppe Esposito --- block/block-backend.c | 10 ++ block/export/fuse.c | 16 ++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 1f0bda578e..7a4b50a8f0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -888,6 +888,11 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp) { int ret; +/* + * FIXME: blk_{get/set}_perm should be always called under + * BQL, but it is not the case right now (see block/export/fuse.c) + */ +/* assert(qemu_in_main_thread()); */ if (blk->root && !blk->disable_perm) { ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); @@ -904,6 +909,11 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) { +/* + * FIXME: blk_{get/set}_perm should be always called under + * BQL, but it is not the case right now (see block/export/fuse.c) + */ +/* assert(qemu_in_main_thread()); */ *perm = blk->perm; *shared_perm = blk->shared_perm; } diff --git a/block/export/fuse.c b/block/export/fuse.c index 823c126d23..7ceb8d783b 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -89,7 +89,10 @@ static int fuse_export_create(BlockExport *blk_exp, /* For growable exports, take the RESIZE permission */ if (args->growable) { uint64_t blk_perm, blk_shared_perm; - +/* + * FIXME: blk_{get/set}_perm should not be here, as permissions + * should be modified only under BQL and here we are not! + */ blk_get_perm(exp->common.blk, _perm, _shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, @@ -400,6 +403,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, /* Growable exports have a permanent RESIZE permission */ if (!exp->growable) { + +/* + * FIXME: blk_{get/set}_perm should not be here, as permissions + * should be modified only under BQL and here we are not! + */ blk_get_perm(exp->common.blk, _perm, _shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, @@ -413,7 +421,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, truncate_flags, NULL); if (!exp->growable) { -/* Must succeed, because we are only giving up the RESIZE permission */ +/* + * Must succeed, because we are only giving up the RESIZE permission. + * FIXME: blk_{get/set}_perm should not be here, as permissions + * should be modified only under BQL and here we are not! + */ blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, _abort); } -- 2.27.0
[PATCH v5 04/31] include/sysemu/block-backend: split header into I/O and global state (GS) API
Similarly to the previous patches, split block-backend.h in block-backend-io.h and block-backend-global-state.h In addition, remove "block/block.h" include as it seems it is not necessary anymore, together with "qemu/iov.h" block-backend-common.h contains the structures shared between the two headers, and the functions that can't be categorized as I/O or global state. Assertions are added in the next patch. Signed-off-by: Emanuele Giuseppe Esposito --- include/sysemu/block-backend-common.h | 84 ++ include/sysemu/block-backend-global-state.h | 121 + include/sysemu/block-backend-io.h | 139 ++ include/sysemu/block-backend.h | 269 +--- block/block-backend.c | 9 +- 5 files changed, 353 insertions(+), 269 deletions(-) create mode 100644 include/sysemu/block-backend-common.h create mode 100644 include/sysemu/block-backend-global-state.h create mode 100644 include/sysemu/block-backend-io.h diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h new file mode 100644 index 00..6963bbf45a --- /dev/null +++ b/include/sysemu/block-backend-common.h @@ -0,0 +1,84 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_COMMON_H +#define BLOCK_BACKEND_COMMON_H + +#include "qemu/iov.h" +#include "block/throttle-groups.h" + +/* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState + * API, which we don't want used by many BlockBackend users. Some of + * the types belong here, and the rest should be split into a common + * header and one for the BlockDriverState API. + */ +#include "block/block.h" + +/* Callbacks for block device models */ +typedef struct BlockDevOps { +/* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ +void (*change_media_cb)(void *opaque, bool load, Error **errp); +/* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ +void (*eject_request_cb)(void *opaque, bool force); +/* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ +bool (*is_tray_open)(void *opaque); +/* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ +bool (*is_medium_locked)(void *opaque); +/* + * Runs when the size changed (e.g. monitor command block_resize) + */ +void (*resize_cb)(void *opaque); +/* + * Runs when the backend receives a drain request. + */ +void (*drained_begin)(void *opaque); +/* + * Runs when the backend's last drain request ends. + */ +void (*drained_end)(void *opaque); +/* + * Is the device still busy? + */ +bool (*drained_poll)(void *opaque); +} BlockDevOps; + +/* + * This struct is embedded in (the private) BlockBackend struct and contains + * fields that must be public. This is in particular for QLIST_ENTRY() and + * friends so that BlockBackends can be kept in lists outside block-backend.c + */ +typedef struct BlockBackendPublic { +ThrottleGroupMember throttle_group_member; +} BlockBackendPublic; + +#endif /* BLOCK_BACKEND_COMMON_H */ diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h new file mode 100644 index 00..77009bf7a2 --- /dev/null +++ b/include/sysemu/block-backend-global-state.h @@ -0,0 +1,121 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster , + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_GS_H +#define BLOCK_BACKEND_GS_H + +#include "block-backend-common.h" + +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); +BlockBackend
[PATCH v5 08/31] assertions for block_int global state API
Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 15 +++ block/backup.c | 1 + block/block-backend.c | 3 +++ block/commit.c | 2 ++ block/dirty-bitmap.c| 1 + block/io.c | 1 + block/mirror.c | 4 block/monitor/bitmap-qmp-cmds.c | 6 ++ block/stream.c | 2 ++ blockdev.c | 7 +++ 10 files changed, 42 insertions(+) diff --git a/block.c b/block.c index 49bee69e27..198ec636ff 100644 --- a/block.c +++ b/block.c @@ -662,6 +662,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, Error *local_err = NULL; int ret; +assert(qemu_in_main_thread()); + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(_lookup, buf, @@ -2492,6 +2494,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t cumulative_perms = 0; uint64_t cumulative_shared_perms = BLK_PERM_ALL; +assert(qemu_in_main_thread()); + QLIST_FOREACH(c, >parents, next_parent) { cumulative_perms |= c->perm; cumulative_shared_perms &= c->shared_perm; @@ -2552,6 +2556,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran = tran_new(); int ret; +assert(qemu_in_main_thread()); + bdrv_child_set_perm(c, perm, shared, tran); ret = bdrv_refresh_perms(c->bs, _err); @@ -2582,6 +2588,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) uint64_t parent_perms, parent_shared; uint64_t perms, shared; +assert(qemu_in_main_thread()); + bdrv_get_cumulative_perm(bs, _perms, _shared); bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, , ); @@ -2724,6 +2732,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { +assert(qemu_in_main_thread()); if (role & BDRV_CHILD_FILTERED) { assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_COW))); @@ -3084,6 +3093,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *child = NULL; Transaction *tran = tran_new(); +assert(qemu_in_main_thread()); + ret = bdrv_attach_child_common(child_bs, child_name, child_class, child_role, perm, shared_perm, opaque, , tran, errp); @@ -7436,6 +7447,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs, { BlockDriverState *filtered; +assert(qemu_in_main_thread()); + if (!bs || !bs->drv) { return false; } @@ -7607,6 +7620,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) * would result in exactly bs->backing. */ static bool bdrv_backing_overridden(BlockDriverState *bs) { +assert(qemu_in_main_thread()); if (bs->backing) { return strcmp(bs->auto_backing_file, bs->backing->bs->filename); @@ -7995,6 +8009,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, */ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) { +assert(qemu_in_main_thread()); return bdrv_do_skip_filters(bs, true); } diff --git a/block/backup.c b/block/backup.c index 21d5983779..c53041772c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, assert(bs); assert(target); +assert(qemu_in_main_thread()); /* QMP interface protects us from these cases */ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); diff --git a/block/block-backend.c b/block/block-backend.c index 11131ca68c..56670a774f 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1095,6 +1095,7 @@ static void blk_root_change_media(BdrvChild *child, bool load) */ bool blk_dev_has_removable_media(BlockBackend *blk) { +assert(qemu_in_main_thread()); return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); } @@ -1112,6 +1113,7 @@ bool blk_dev_has_tray(BlockBackend *blk) */ void blk_dev_eject_request(BlockBackend *blk, bool force) { +assert(qemu_in_main_thread()); if (blk->dev_ops && blk->dev_ops->eject_request_cb) { blk->dev_ops->eject_request_cb(blk->dev_opaque, force); } @@ -1134,6 +1136,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk) */ bool blk_dev_is_medium_locked(BlockBackend *blk) { +assert(qemu_in_main_thread()); if (blk->dev_ops && blk->dev_ops->is_medium_locked) { return blk->dev_ops->is_medium_locked(blk->dev_opaque); } diff --git a/block/commit.c b/block/commit.c
[PATCH v5 10/31] block.c: modify .attach and .detach callbacks of child_of_bds
According to the assertions put in the previous patch, we should first drain and then modify the ->children list. In this way we prevent other iothreads to read the list while it is being updated. In this case, moving the drain won't cause any harm, because child is a parameter of the drain function so it will still be included in the operation, despite not being in the list. Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 522a273140..5516c84ec4 100644 --- a/block.c +++ b/block.c @@ -1416,6 +1416,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; +bdrv_apply_subtree_drain(child, bs); assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(>children, child, next); @@ -1423,7 +1424,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) bdrv_backing_attach(child); } -bdrv_apply_subtree_drain(child, bs); } static void bdrv_child_cb_detach(BdrvChild *child) @@ -1434,10 +1434,9 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } -bdrv_unapply_subtree_drain(child, bs); - assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); +bdrv_unapply_subtree_drain(child, bs); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, -- 2.27.0
[PATCH v5 06/31] block/block-backend.c: assertions for block-backend
All the global state (GS) API functions will check that qemu_in_main_thread() returns true. If not, it means that the safety of BQL cannot be guaranteed, and they need to be moved to I/O. Signed-off-by: Emanuele Giuseppe Esposito --- block/block-backend.c | 83 ++ softmmu/qdev-monitor.c | 2 + 2 files changed, 85 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 7a4b50a8f0..11131ca68c 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -228,6 +228,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) void blk_set_force_allow_inactivate(BlockBackend *blk) { +assert(qemu_in_main_thread()); blk->force_allow_inactivate = true; } @@ -346,6 +347,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; +assert(qemu_in_main_thread()); + blk = g_new0(BlockBackend, 1); blk->refcnt = 1; blk->ctx = ctx; @@ -383,6 +386,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, { BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); +assert(qemu_in_main_thread()); + if (blk_insert_bs(blk, bs, errp) < 0) { blk_unref(blk); return NULL; @@ -411,6 +416,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, uint64_t perm = 0; uint64_t shared = BLK_PERM_ALL; +assert(qemu_in_main_thread()); + /* * blk_new_open() is mainly used in .bdrv_create implementations and the * tools where sharing isn't a major concern because the BDS stays private @@ -488,6 +495,7 @@ static void drive_info_del(DriveInfo *dinfo) int blk_get_refcnt(BlockBackend *blk) { +assert(qemu_in_main_thread()); return blk ? blk->refcnt : 0; } @@ -498,6 +506,7 @@ int blk_get_refcnt(BlockBackend *blk) void blk_ref(BlockBackend *blk) { assert(blk->refcnt > 0); +assert(qemu_in_main_thread()); blk->refcnt++; } @@ -508,6 +517,7 @@ void blk_ref(BlockBackend *blk) */ void blk_unref(BlockBackend *blk) { +assert(qemu_in_main_thread()); if (blk) { assert(blk->refcnt > 0); if (blk->refcnt > 1) { @@ -528,6 +538,7 @@ void blk_unref(BlockBackend *blk) */ BlockBackend *blk_all_next(BlockBackend *blk) { +assert(qemu_in_main_thread()); return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(_backends); } @@ -536,6 +547,8 @@ void blk_remove_all_bs(void) { BlockBackend *blk = NULL; +assert(qemu_in_main_thread()); + while ((blk = blk_all_next(blk)) != NULL) { AioContext *ctx = blk_get_aio_context(blk); @@ -559,6 +572,7 @@ void blk_remove_all_bs(void) */ BlockBackend *blk_next(BlockBackend *blk) { +assert(qemu_in_main_thread()); return blk ? QTAILQ_NEXT(blk, monitor_link) : QTAILQ_FIRST(_block_backends); } @@ -625,6 +639,7 @@ static void bdrv_next_reset(BdrvNextIterator *it) BlockDriverState *bdrv_first(BdrvNextIterator *it) { +assert(qemu_in_main_thread()); bdrv_next_reset(it); return bdrv_next(it); } @@ -662,6 +677,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) { assert(!blk->name); assert(name && name[0]); +assert(qemu_in_main_thread()); if (!id_wellformed(name)) { error_setg(errp, "Invalid device name"); @@ -689,6 +705,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) */ void monitor_remove_blk(BlockBackend *blk) { +assert(qemu_in_main_thread()); + if (!blk->name) { return; } @@ -715,6 +733,7 @@ BlockBackend *blk_by_name(const char *name) { BlockBackend *blk = NULL; +assert(qemu_in_main_thread()); assert(name); while ((blk = blk_next(blk)) != NULL) { if (!strcmp(name, blk->name)) { @@ -749,6 +768,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs) */ bool bdrv_has_blk(BlockDriverState *bs) { +assert(qemu_in_main_thread()); return bdrv_first_blk(bs) != NULL; } @@ -759,6 +779,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) { BdrvChild *c; +assert(qemu_in_main_thread()); QLIST_FOREACH(c, >parents, next_parent) { if (c->klass != _root) { return false; @@ -808,6 +829,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) */ BlockBackendPublic *blk_get_public(BlockBackend *blk) { +assert(qemu_in_main_thread()); return >public; } @@ -816,6 +838,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk) */ BlockBackend *blk_by_public(BlockBackendPublic *public) { +assert(qemu_in_main_thread()); return container_of(public, BlockBackend, public); } @@ -828,6 +851,8 @@ void blk_remove_bs(BlockBackend *blk) BlockDriverState *bs; BdrvChild *root; +assert(qemu_in_main_thread()); + notifier_list_notify(>remove_bs_notifiers, blk); if
[PATCH v5 12/31] assertions for blockjob_int.h
Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- blockjob.c | 4 1 file changed, 4 insertions(+) diff --git a/blockjob.c b/blockjob.c index 4bad1408cb..10c807413e 100644 --- a/blockjob.c +++ b/blockjob.c @@ -84,6 +84,7 @@ BlockJob *block_job_get(const char *id) void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); +assert(qemu_in_main_thread()); block_job_remove_all_bdrv(bjob); blk_unref(bjob->blk); @@ -436,6 +437,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, BlockBackend *blk; BlockJob *job; +assert(qemu_in_main_thread()); + if (job_id == NULL && !(flags & JOB_INTERNAL)) { job_id = bdrv_get_device_name(bs); } @@ -505,6 +508,7 @@ void block_job_iostatus_reset(BlockJob *job) void block_job_user_resume(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); +assert(qemu_in_main_thread()); block_job_iostatus_reset(bjob); } -- 2.27.0
[PATCH v5 14/31] include/block/blockjob.h: global state API
blockjob functions run always under the BQL lock. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- include/block/blockjob.h | 9 + 1 file changed, 9 insertions(+) diff --git a/include/block/blockjob.h b/include/block/blockjob.h index d200f33c10..fa0c3f7a47 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -77,6 +77,13 @@ typedef struct BlockJob { GSList *nodes; } BlockJob; +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_next: * @job: A block job, or %NULL. @@ -158,6 +165,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp); */ void block_job_iostatus_reset(BlockJob *job); +/* Common functions that are neither I/O nor Global State */ + /** * block_job_is_internal: * @job: The job to determine if it is user-visible or not. -- 2.27.0
[PATCH v5 16/31] include/sysemu/blockdev.h: global state API
blockdev functions run always under the BQL lock. Signed-off-by: Emanuele Giuseppe Esposito --- include/sysemu/blockdev.h | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index c4b7b8b54e..e53eb91be6 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -13,9 +13,6 @@ #include "block/block.h" #include "qemu/queue.h" -void blockdev_mark_auto_del(BlockBackend *blk); -void blockdev_auto_del(BlockBackend *blk); - typedef enum { IF_DEFAULT = -1,/* for use with drive_add() only */ /* @@ -40,6 +37,16 @@ struct DriveInfo { QTAILQ_ENTRY(DriveInfo) next; }; +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +void blockdev_mark_auto_del(BlockBackend *blk); +void blockdev_auto_del(BlockBackend *blk); + DriveInfo *blk_legacy_dinfo(BlockBackend *blk); DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo); BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); -- 2.27.0
[PATCH v5 22/31] block_int-common.h: assertion in the callers of BlockDriver function pointers
Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi --- block.c| 18 ++ block/create.c | 10 ++ 2 files changed, 28 insertions(+) diff --git a/block.c b/block.c index b77ab0a104..180884b8c0 100644 --- a/block.c +++ b/block.c @@ -526,6 +526,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) CreateCo *cco = opaque; assert(cco->drv); +assert(qemu_in_main_thread()); ret = cco->drv->bdrv_co_create_opts(cco->drv, cco->filename, cco->opts, _err); @@ -1090,6 +1091,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint) static void bdrv_join_options(BlockDriverState *bs, QDict *options, QDict *old_options) { +assert(qemu_in_main_thread()); if (bs->drv && bs->drv->bdrv_join_options) { bs->drv->bdrv_join_options(options, old_options); } else { @@ -1598,6 +1600,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, { Error *local_err = NULL; int i, ret; +assert(qemu_in_main_thread()); bdrv_assign_node_name(bs, node_name, _err); if (local_err) { @@ -1989,6 +1992,8 @@ static int bdrv_fill_options(QDict **options, const char *filename, BlockDriver *drv = NULL; Error *local_err = NULL; +assert(qemu_in_main_thread()); + /* * Caution: while qdict_get_try_str() is fine, getting non-string * types would require more care. When @options come from @@ -2182,6 +2187,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); +assert(qemu_in_main_thread()); bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); @@ -2267,6 +2273,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) { BlockDriverState *bs = opaque; uint64_t cumulative_perms, cumulative_shared_perms; +assert(qemu_in_main_thread()); if (bs->drv->bdrv_set_perm) { bdrv_get_cumulative_perm(bs, _perms, @@ -2278,6 +2285,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) static void bdrv_drv_set_perm_abort(void *opaque) { BlockDriverState *bs = opaque; +assert(qemu_in_main_thread()); if (bs->drv->bdrv_abort_perm_update) { bs->drv->bdrv_abort_perm_update(bs); @@ -2293,6 +2301,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, Transaction *tran, Error **errp) { +assert(qemu_in_main_thread()); if (!bs->drv) { return 0; } @@ -4357,6 +4366,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bs_queue != NULL); +assert(qemu_in_main_thread()); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { ctx = bdrv_get_aio_context(bs_entry->state.bs); @@ -4622,6 +4632,7 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, assert(reopen_state != NULL); assert(reopen_state->bs->drv != NULL); +assert(qemu_in_main_thread()); drv = reopen_state->bs->drv; /* This function and each driver's bdrv_reopen_prepare() remove @@ -4832,6 +4843,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs = reopen_state->bs; drv = bs->drv; assert(drv != NULL); +assert(qemu_in_main_thread()); /* If there are any driver level actions to take */ if (drv->bdrv_reopen_commit) { @@ -4873,6 +4885,7 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state) assert(reopen_state != NULL); drv = reopen_state->bs->drv; assert(drv != NULL); +assert(qemu_in_main_thread()); if (drv->bdrv_reopen_abort) { drv->bdrv_reopen_abort(reopen_state); @@ -4886,6 +4899,7 @@ static void bdrv_close(BlockDriverState *bs) BdrvChild *child, *next; assert(!bs->refcnt); +assert(qemu_in_main_thread()); bdrv_drained_begin(bs); /* complete I/O */ bdrv_flush(bs); @@ -6671,6 +6685,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) int ret; uint64_t cumulative_perms, cumulative_shared_perms; +assert(qemu_in_main_thread()); + if (!bs->drv) { return -ENOMEDIUM; } @@ -7180,6 +7196,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) BdrvAioNotifier *baf, *baf_tmp; assert(!bs->walking_aio_notifiers); +assert(qemu_in_main_thread()); bs->walking_aio_notifiers = true; QLIST_FOREACH_SAFE(baf, >aio_notifiers, list, baf_tmp) { if (baf->deleted) { @@ -7207,6 +7224,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) {
[PATCH v5 19/31] block/copy-before-write.h: global state API + assertions
copy-before-write functions always run under BQL lock. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi --- block/copy-before-write.h | 7 +++ block/copy-before-write.c | 2 ++ 2 files changed, 9 insertions(+) diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 51847e711a..9a45de2fce 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -29,6 +29,13 @@ #include "block/block_int.h" #include "block/block-copy.h" +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c30a5ff8de..36a8d7ba52 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -223,6 +223,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, QDict *opts; assert(source->total_sectors == target->total_sectors); +assert(qemu_in_main_thread()); opts = qdict_new(); qdict_put_str(opts, "driver", "copy-before-write"); @@ -245,6 +246,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, void bdrv_cbw_drop(BlockDriverState *bs) { +assert(qemu_in_main_thread()); bdrv_drop_filter(bs, _abort); bdrv_unref(bs); } -- 2.27.0
[PATCH v5 21/31] block_int-common.h: split function pointers in BlockDriver
Similar to the header split, also the function pointers in BlockDriver can be split in I/O and global state. Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block_int-common.h | 440 +-- 1 file changed, 235 insertions(+), 205 deletions(-) diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 70534f94ae..0e63dc694f 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -96,6 +96,11 @@ typedef struct BdrvTrackedRequest { struct BlockDriver { +/* + * These fields are initialized when this object is created, + * and are never changed afterwards. + */ + const char *format_name; int instance_size; @@ -121,23 +126,7 @@ struct BlockDriver { * on those children. */ bool is_format; -/* - * Return true if @to_replace can be replaced by a BDS with the - * same data as @bs without it affecting @bs's behavior (that is, - * without it being visible to @bs's parents). - */ -bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, - BlockDriverState *to_replace); - -int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); -int (*bdrv_probe_device)(const char *filename); -/* - * Any driver implementing this callback is expected to be able to handle - * NULL file names in its .bdrv_open() implementation. - */ -void (*bdrv_parse_filename)(const char *filename, QDict *options, -Error **errp); /* * Drivers not implementing bdrv_parse_filename nor bdrv_open should have * this field set to true, except ones that are defined only by their @@ -159,7 +148,66 @@ struct BlockDriver { */ bool supports_backing; -/* For handling image reopen for split or non-split files */ +bool has_variable_length; + +/* + * Drivers setting this field must be able to work with just a plain + * filename with ':' as a prefix, and no other options. + * Options may be extracted from the filename by implementing + * bdrv_parse_filename. + */ +const char *protocol_name; + +/* List of options for creating images, terminated by name == NULL */ +QemuOptsList *create_opts; + +/* List of options for image amend */ +QemuOptsList *amend_opts; + +/* + * If this driver supports reopening images this contains a + * NULL-terminated list of the runtime options that can be + * modified. If an option in this list is unspecified during + * reopen then it _must_ be reset to its default value or return + * an error. + */ +const char *const *mutable_opts; + +/* + * Pointer to a NULL-terminated array of names of strong options + * that can be specified for bdrv_open(). A strong option is one + * that changes the data of a BDS. + * If this pointer is NULL, the array is considered empty. + * "filename" and "driver" are always considered strong. + */ +const char *const *strong_runtime_opts; + +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +/* + * Return true if @to_replace can be replaced by a BDS with the + * same data as @bs without it affecting @bs's behavior (that is, + * without it being visible to @bs's parents). + */ +bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, + BlockDriverState *to_replace); + +int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); +int (*bdrv_probe_device)(const char *filename); + +/* + * Any driver implementing this callback is expected to be able to handle + * NULL file names in its .bdrv_open() implementation. + */ +void (*bdrv_parse_filename)(const char *filename, QDict *options, +Error **errp); + +/* For handling image reopen for split or non-split files. */ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); @@ -175,7 +223,6 @@ struct BlockDriver { Error **errp); void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, Error **errp); int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, @@ -183,11 +230,6 @@ struct BlockDriver { QemuOpts *opts, Error **errp); -int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, - BlockdevAmendOptions *opts, - bool force, -
[PATCH v5 25/31] block-backend-common.h: split function pointers in BlockDevOps
Assertions in the callers of the function pointrs are already added by previous patches. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé --- include/sysemu/block-backend-common.h | 28 ++- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h index 6963bbf45a..ae630c624c 100644 --- a/include/sysemu/block-backend-common.h +++ b/include/sysemu/block-backend-common.h @@ -27,6 +27,14 @@ /* Callbacks for block device models */ typedef struct BlockDevOps { + +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /* * Runs when virtual media changed (monitor commands eject, change) * Argument load is true on load and false on eject. @@ -44,16 +52,26 @@ typedef struct BlockDevOps { * true, even if they do not support eject requests. */ void (*eject_request_cb)(void *opaque, bool force); -/* - * Is the virtual tray open? - * Device models implement this only when the device has a tray. - */ -bool (*is_tray_open)(void *opaque); + /* * Is the virtual medium locked into the device? * Device models implement this only when device has such a lock. */ bool (*is_medium_locked)(void *opaque); + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +/* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ +bool (*is_tray_open)(void *opaque); + /* * Runs when the size changed (e.g. monitor command block_resize) */ -- 2.27.0
[PATCH v5 20/31] block/coroutines: I/O API
block coroutines functions run in different aiocontext, and are not protected by the BQL. Therefore are I/O. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi --- block/coroutines.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/block/coroutines.h b/block/coroutines.h index c8c14a29c8..c61abd271a 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -29,6 +29,12 @@ /* For blk_bs() in generated block/block-gen.c */ #include "sysemu/block-backend.h" +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); -- 2.27.0
[PATCH v5 23/31] block_int-common.h: split function pointers in BdrvChildClass
Signed-off-by: Emanuele Giuseppe Esposito --- include/block/block_int-common.h | 67 +++- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 0e63dc694f..3ceb2365a8 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -817,12 +817,16 @@ struct BdrvChildClass { */ bool parent_is_bds; +/* + * Global state (GS) API. These functions run under the BQL lock. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ void (*inherit_options)(BdrvChildRole role, bool parent_is_format, int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); - void (*change_media)(BdrvChild *child, bool load); -void (*resize)(BdrvChild *child); /* * Returns a name that is supposedly more useful for human users than the @@ -839,6 +843,40 @@ struct BdrvChildClass { */ char *(*get_parent_desc)(BdrvChild *child); +/* + * Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. + */ +void (*activate)(BdrvChild *child, Error **errp); +int (*inactivate)(BdrvChild *child); + +void (*attach)(BdrvChild *child); +void (*detach)(BdrvChild *child); + +/* + * Notifies the parent that the filename of its child has changed (e.g. + * because the direct child was removed from the backing chain), so that it + * can update its reference. + */ +int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, + const char *filename, Error **errp); + +bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, +GSList **ignore, Error **errp); +void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); + +AioContext *(*get_parent_aio_context)(BdrvChild *child); + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +void (*resize)(BdrvChild *child); + /* * If this pair of functions is implemented, the parent doesn't issue new * requests after returning from .drained_begin() until .drained_end() is @@ -863,31 +901,6 @@ struct BdrvChildClass { * activity on the child has stopped. */ bool (*drained_poll)(BdrvChild *child); - -/* - * Notifies the parent that the child has been activated/inactivated (e.g. - * when migration is completing) and it can start/stop requesting - * permissions and doing I/O on it. - */ -void (*activate)(BdrvChild *child, Error **errp); -int (*inactivate)(BdrvChild *child); - -void (*attach)(BdrvChild *child); -void (*detach)(BdrvChild *child); - -/* - * Notifies the parent that the filename of its child has changed (e.g. - * because the direct child was removed from the backing chain), so that it - * can update its reference. - */ -int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, - const char *filename, Error **errp); - -bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, -GSList **ignore, Error **errp); -void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); - -AioContext *(*get_parent_aio_context)(BdrvChild *child); }; extern const BdrvChildClass child_of_bds; -- 2.27.0
[PATCH v5 28/31] block.c: assert BQL lock held in bdrv_co_invalidate_cache
bdrv_co_invalidate_cache is special: it is an I/O function, but uses the block layer permission API, which is GS. Because of this, we can assert that either the function is being called with BQL held, and thus can use the permission API, or make sure that the permission API is not used, by ensuring that bs (and parents) .open_flags does not contain BDRV_O_INACTIVE. Signed-off-by: Emanuele Giuseppe Esposito --- block.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/block.c b/block.c index a0309f827d..805974676b 100644 --- a/block.c +++ b/block.c @@ -6574,6 +6574,26 @@ void bdrv_init_with_whitelist(void) bdrv_init(); } +static bool bdrv_is_active(BlockDriverState *bs) +{ +BdrvChild *parent; + +if (bs->open_flags & BDRV_O_INACTIVE) { +return false; +} + +QLIST_FOREACH(parent, >parents, next_parent) { +if (parent->klass->parent_is_bds) { +BlockDriverState *parent_bs = parent->opaque; +if (!bdrv_is_active(parent_bs)) { +return false; +} +} +} + + return true; +} + int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) { BdrvChild *child, *parent; @@ -6585,6 +6605,12 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return -ENOMEDIUM; } +/* + * No need to muck with permissions if bs is active. + * TODO: should activation be a separate function? + */ +assert(qemu_in_main_thread() || bdrv_is_active(bs)); + QLIST_FOREACH(child, >children, next) { bdrv_co_invalidate_cache(child->bs, _err); if (local_err) { -- 2.27.0
[PATCH v5 30/31] crypto: delegate permission functions to JobDriver .pre_run
block_crypto_amend_options_generic_luks uses the block layer permission API, therefore it should be called with the BQL held. However, the same function is being called ib two BlockDriver callbacks: bdrv_amend_options (under BQL) and bdrv_co_amend (I/O). The latter is I/O because it is invoked by block/amend.c's blockdev_amend_run(), a .run callback of the amend JobDriver Therefore we want to 1) change block_crypto_amend_options_generic_luks to use the permission API only when the BQL is held, and 2) use the .pre_run JobDriver callback to check for permissions before switching to the job aiocontext. This has also the benefit of applying the same permission operation to all amend implementations, not only luks. Signed-off-by: Emanuele Giuseppe Esposito --- block/amend.c | 20 block/crypto.c | 18 -- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/block/amend.c b/block/amend.c index 392df9ef83..fba6add51a 100644 --- a/block/amend.c +++ b/block/amend.c @@ -53,10 +53,30 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) return ret; } +static int blockdev_amend_refresh_perms(Job *job, Error **errp) +{ +BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); + +return bdrv_child_refresh_perms(s->bs, s->bs->file, errp); +} + +static int blockdev_amend_pre_run(Job *job, Error **errp) +{ +return blockdev_amend_refresh_perms(job, errp); +} + +static void blockdev_amend_clean(Job *job) +{ +Error *errp; +blockdev_amend_refresh_perms(job, ); +} + static const JobDriver blockdev_amend_job_driver = { .instance_size = sizeof(BlockdevAmendJob), .job_type = JOB_TYPE_AMEND, .run = blockdev_amend_run, +.pre_run = blockdev_amend_pre_run, +.clean = blockdev_amend_clean, }; void qmp_x_blockdev_amend(const char *job_id, diff --git a/block/crypto.c b/block/crypto.c index c8ba4681e2..82f154516c 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -780,6 +780,7 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp) static int block_crypto_amend_options_generic_luks(BlockDriverState *bs, QCryptoBlockAmendOptions *amend_options, +bool under_bql, bool force, Error **errp) { @@ -791,9 +792,12 @@ block_crypto_amend_options_generic_luks(BlockDriverState *bs, /* apply for exclusive read/write permissions to the underlying file*/ crypto->updating_keys = true; -ret = bdrv_child_refresh_perms(bs, bs->file, errp); -if (ret) { -goto cleanup; + +if (under_bql) { +ret = bdrv_child_refresh_perms(bs, bs->file, errp); +if (ret) { +goto cleanup; +} } ret = qcrypto_block_amend_options(crypto->block, @@ -806,7 +810,9 @@ block_crypto_amend_options_generic_luks(BlockDriverState *bs, cleanup: /* release exclusive read/write permissions to the underlying file*/ crypto->updating_keys = false; -bdrv_child_refresh_perms(bs, bs->file, errp); +if (under_bql) { +bdrv_child_refresh_perms(bs, bs->file, errp); +} return ret; } @@ -834,7 +840,7 @@ block_crypto_amend_options_luks(BlockDriverState *bs, goto cleanup; } ret = block_crypto_amend_options_generic_luks(bs, amend_options, - force, errp); + true, force, errp); cleanup: qapi_free_QCryptoBlockAmendOptions(amend_options); return ret; @@ -853,7 +859,7 @@ coroutine_fn block_crypto_co_amend_luks(BlockDriverState *bs, .u.luks = *qapi_BlockdevAmendOptionsLUKS_base(>u.luks), }; return block_crypto_amend_options_generic_luks(bs, _opts, - force, errp); + false, force, errp); } static void -- 2.27.0