Re: [RFC PATCH v3] hw/nvme:Adding Support for namespace management

2021-11-23 Thread Lukasz Maniak
On Wed, Nov 10, 2021 at 04:56:29PM +0530, Naveen wrote:
> From: Naveen Nagar 
> 
> This patch supports namespace management : create and delete operations
> This patch has been tested with the following command and size of image
> file for unallocated namespaces is taken as 0GB. ns_create will look into
> the list of unallocated namespaces and it will initialize the same and 
> return the nsid of the same. A new mandatory field has been added called
> tnvmcap and we have ensured that the total capacity of namespace created
> does not exceed tnvmcap
> 
> -device nvme-subsys,id=subsys0,tnvmcap=8
> -device nvme,serial=foo,id=nvme0,subsys=subsys0
> -device nvme,serial=bar,id=nvme1,subsys=subsys0
> 
> -drive id=ns1,file=ns1.img,if=none
> -device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true
> -drive id=ns2,file=ns2.img,if=none
> -device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true
> -drive id=ns3,file=ns3.img,if=none
> -device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true
> -drive id=ns4,file=ns4.img,if=none
> -device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true
> 
> Please review and suggest if any changes are required.
> 
> Signed-off-by: Naveen Nagar 
> 
> Since v2:
> -Lukasz Maniak found a bug in namespace attachment and proposed 
>  solution is added
> 

Hi Naveen,

The current implementation is inconsistent and thus has a bug related to
unvmcap support.

Namespaces are pre-allocated after boot, and the initial namespace size
is the size of the associated blockdev. If the blockdevs are non-zero
sized then the first deletion of the namespaces associated with them
will increment unvmcap by their size. This will make unvmcap greater
than tnvmcap.

While the easiest way would be to prohibit the use of non-zero sized
blockdev with namespace management, doing so would limit the
functionality of the namespaces itself, which we would like to avoid.

This fix below addresses issues related to unvmcap and non-zero block
devices. The unvmcap value will be properly updated on both the first
and subsequent controllers added to the subsystem regardless of the
order in which nvme-ns is defined on the command line before or after
the controller definition. Additionally, if the block device size of any
namespace causes the unvmcap to be exceeded, an error will be returned
at the namespace definition point.

The fix is based on v3 based on v6.1.0, as v3 does not apply to master.

---
 hw/nvme/ctrl.c |  7 +++
 hw/nvme/ns.c   | 23 ---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 63ea2fcb14..dc0ad4155b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6594,6 +6594,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
 uint64_t cap = ldq_le_p(>bar.cap);
+int i;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -6672,6 +6673,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->cmic |= NVME_CMIC_MULTI_CTRL;
 id->tnvmcap = n->subsys->params.tnvmcap * GiB;
 id->unvmcap = n->subsys->params.tnvmcap * GiB;
+
+for (i = 0; i < ARRAY_SIZE(n->subsys->namespaces); i++) {
+if (n->subsys->namespaces[i]) {
+id->unvmcap -= le64_to_cpu(n->subsys->namespaces[i]->size);
+}
+}
 }
 
 NVME_CAP_SET_MQES(cap, 0x7ff);
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index f62a695132..c87d7f5bd6 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -140,9 +140,12 @@ lbaf_found:
 return 0;
 }
 
-static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
+static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeSubsystem *subsys,
+Error **errp)
 {
 bool read_only;
+NvmeCtrl *ctrl;
+int i;
 
 if (!blkconf_blocksizes(>blkconf, errp)) {
 return -1;
@@ -164,6 +167,21 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error 
**errp)
 return -1;
 }
 
+if (subsys) {
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+ctrl = nvme_subsys_ctrl(subsys, i);
+
+if (ctrl) {
+if (ctrl->id_ctrl.unvmcap < le64_to_cpu(ns->size)) {
+error_setg(errp, "blockdev size %ld exceeds subsystem's "
+ "unallocated capacity", ns->size);
+} else {
+ctrl->id_ctrl.unvmcap -= le64_to_cpu(ns->size);
+}
+}
+}
+}
+
 return 0;
 }
 
@@ -480,7 +498,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 }
 }
 
-if (nvme_ns_init_blk(ns, errp)) {
+if (nvme_ns_init_blk(ns, subsys, errp)) {
 return;
 }
 
@@ -527,7 +545,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 
 

Re: [PULL 0/7] Python patches

2021-11-23 Thread Richard Henderson

On 11/23/21 3:37 AM, John Snow wrote:

The following changes since commit 89d2f9e4c63799f7f03e9180c63b7dc45fc2a04a:

   Merge tag 'pull-target-arm-20211122' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2021-11-22 
16:35:54 +0100)

are available in the Git repository at:

   https://gitlab.com/jsnow/qemu.git tags/python-pull-request

for you to fetch changes up to a57cb3e23d5ac918a69d0aab918470ff0b429ff9:

   python/aqmp: fix send_fd_scm for python 3.6.x (2021-11-22 18:41:21 -0500)


Python testing fixes for 6.2

A few more fixes to help eliminate race conditions from
device-crash-test, along with a fix that allows the SCM_RIGHTS
functionality to work on hosts that only have Python 3.6.

If this is too much this late in the RC process, I'd advocate for at
least patch 7/7 by itself.



John Snow (7):
   python/machine: add @sock_dir property
   python/machine: remove _remove_monitor_sockfile property
   python/machine: add instance disambiguator to default nickname
   python/machine: move more variable initializations to _pre_launch
   python/machine: handle "fast" QEMU terminations
   scripts/device-crash-test: Use a QMP timeout
   python/aqmp: fix send_fd_scm for python 3.6.x

  python/qemu/aqmp/qmp_client.py |  9 --
  python/qemu/machine/machine.py | 59 --
  scripts/device-crash-test  |  2 +-
  3 files changed, 42 insertions(+), 28 deletions(-)


Applied, thanks.

r~



Re: [PATCH] block vvfat.c fix leak when failure occurs

2021-11-23 Thread Hanna Reitz

On 19.11.21 12:25, Daniella Lee wrote:

Based on your suggestions. I made a new patch which contians:
1.format detection
2.replace calloc with g_malloc0 in enable_write_target function
3.use g_free without null pointer detection in vvfat_open function
4.delete line "ret = 0", use return ret directly in vvfat_open function


Signed-off-by: Daniella Lee 
---
  block/vvfat.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)


Thanks, looks good to me!

Two remarks for the next time: When you send a new version of a patch, 
you should mark it as “v2” (and then “v3” and so on) in the subject, 
e.g. “[PATCH v2]”.  This can be done e.g. with `git format-patch -v2`.


Second, for a new iteration, you should generally keep the commit 
message from the previous one and not replace it with a change log. 
Having a change log is very nice, don’t get me wrong, but it shouldn’t 
be part of the commit message.  Once you’ve formatted the patch with 
`git format-patch`, you can edit the file and put the change log below 
the “---” line, e.g. like it was done here: 
https://lists.nongnu.org/archive/html/qemu-block/2021-09/msg00453.html


No need for you to respin this patch, though, I’ve just replaced the 
commit message with the one from v1 and applied it to my block branch:


https://gitlab.com/hreitz/qemu/-/commits/block/

Thanks again,

Hanna




[PULL 0/3] Block patches

2021-11-23 Thread Hanna Reitz
The following changes since commit 73e0f70e097b7c92a5ce16ee35b53afe119b20d7:

  Merge tag 'pull-lu-20211123' of https://gitlab.com/rth7680/qemu into staging 
(2021-11-23 11:33:14 +0100)

are available in the Git repository at:

  https://gitlab.com/hreitz/qemu.git tags/pull-block-2021-11-23

for you to fetch changes up to 4dd218fd0717ed3cddb69c01eeb9da630107d89d:

  iotests/149: Skip on unsupported ciphers (2021-11-23 15:39:12 +0100)


Block patches for 6.2-rc2:
- Fix memory leak in vvfat when vvfat_open() fails
- iotest fixes for the gnutls crypto backend


Daniella Lee (1):
  block/vvfat.c fix leak when failure occurs

Hanna Reitz (2):
  iotests: Use aes-128-cbc
  iotests/149: Skip on unsupported ciphers

 block/vvfat.c  | 16 
 tests/qemu-iotests/149 | 23 ++-
 tests/qemu-iotests/206 |  4 ++--
 tests/qemu-iotests/206.out |  6 +++---
 tests/qemu-iotests/210 |  4 ++--
 tests/qemu-iotests/210.out |  6 +++---
 6 files changed, 40 insertions(+), 19 deletions(-)

-- 
2.33.1




[PULL 1/3] block/vvfat.c fix leak when failure occurs

2021-11-23 Thread Hanna Reitz
From: Daniella Lee 

Function vvfat_open called function enable_write_target and init_directories,
and these functions malloc new memory for BDRVVVFATState::qcow_filename,
BDRVVVFATState::used_clusters, and BDRVVVFATState::cluster_buff.

When the specified folder does not exist ,it may contains memory leak.
After init_directories function is executed, the vvfat_open return -EIO,
and bdrv_open_driver goto label open_failed,
the program use g_free(bs->opaque) to release BDRVVVFATState struct
without members mentioned.

command line:
qemu-system-x86_64 -hdb   -usb -device usb-storage,drive=fat16
-drive file=fat:rw:fat-type=16:"",
id=fat16,format=raw,if=none

enable_write_target called:
(gdb) bt
at ../block/vvfat.c:3114
flags=155650, errp=0x7fffd780) at ../block/vvfat.c:1236
node_name=0x0, options=0x56fa45d0, open_flags=155650,
errp=0x7fffd890) at ../block.c:1558
errp=0x7fffd890) at ../block.c:1852
reference=0x0, options=0x56fa45d0, flags=40962, parent=0x56f98cd0,
child_class=0x56b1d6a0 , child_role=19,
errp=0x7fffda90) at ../block.c:3779
options=0x56f9cfc0, bdref_key=0x56239bb8 "file",
parent=0x56f98cd0, child_class=0x56b1d6a0 ,
child_role=19, allow_none=true, errp=0x7fffda90) at ../block.c:3419
reference=0x0, options=0x56f9cfc0, flags=8194, parent=0x0,
child_class=0x0, child_role=0, errp=0x56c98c40 )
at ../block.c:3726
options=0x56f757b0, flags=0, errp=0x56c98c40 )
at ../block.c:3872
options=0x56f757b0, flags=0, errp=0x56c98c40 )
at ../block/block-backend.c:436
bs_opts=0x56f757b0, errp=0x56c98c40 )
at ../blockdev.c:608
errp=0x56c98c40 ) at ../blockdev.c:992
..

Signed-off-by: Daniella Lee 
Message-Id: <2029112553.35-1-daniellalee...@gmail.com>
[hreitz: Took commit message from v1]
Signed-off-by: Hanna Reitz 
---
 block/vvfat.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 05e78e3c27..5dacc6cfac 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1279,8 +1279,18 @@ static int vvfat_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 qemu_co_mutex_init(>lock);
 
-ret = 0;
+qemu_opts_del(opts);
+
+return 0;
+
 fail:
+g_free(s->qcow_filename);
+s->qcow_filename = NULL;
+g_free(s->cluster_buffer);
+s->cluster_buffer = NULL;
+g_free(s->used_clusters);
+s->used_clusters = NULL;
+
 qemu_opts_del(opts);
 return ret;
 }
@@ -3118,7 +3128,7 @@ static int enable_write_target(BlockDriverState *bs, 
Error **errp)
 int size = sector2cluster(s, s->sector_count);
 QDict *options;
 
-s->used_clusters = calloc(size, 1);
+s->used_clusters = g_malloc0(size);
 
 array_init(&(s->commits), sizeof(commit_t));
 
@@ -3166,8 +3176,6 @@ static int enable_write_target(BlockDriverState *bs, 
Error **errp)
 return 0;
 
 err:
-g_free(s->qcow_filename);
-s->qcow_filename = NULL;
 return ret;
 }
 
-- 
2.33.1




[PULL 3/3] iotests/149: Skip on unsupported ciphers

2021-11-23 Thread Hanna Reitz
Whenever qemu-img or qemu-io report that some cipher is unsupported,
skip the whole test, because that is probably because qemu has been
configured with the gnutls crypto backend.

We could taylor the algorithm list to what gnutls supports, but this is
a test that is run rather rarely anyway (because it requires
password-less sudo), and so it seems better and easier to skip it.  When
this test is intentionally run to check LUKS compatibility, it seems
better not to limit the algorithms but keep the list extensive.

Signed-off-by: Hanna Reitz 
Message-Id: <2027151707.52549-3-hre...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
---
 tests/qemu-iotests/149 | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/149 b/tests/qemu-iotests/149
index 328fd05a4c..d49646ca60 100755
--- a/tests/qemu-iotests/149
+++ b/tests/qemu-iotests/149
@@ -230,6 +230,18 @@ def create_image(config, size_mb):
 fn.truncate(size_mb * 1024 * 1024)
 
 
+def check_cipher_support(config, output):
+"""Check the output of qemu-img or qemu-io for mention of the respective
+cipher algorithm being unsupported, and if so, skip this test.
+(Returns `output` for convenience.)"""
+
+if 'Unsupported cipher algorithm' in output:
+iotests.notrun('Unsupported cipher algorithm '
+   f'{config.cipher}-{config.keylen}-{config.mode}; '
+   'consider configuring qemu with a different crypto '
+   'backend')
+return output
+
 def qemu_img_create(config, size_mb):
 """Create and format a disk image with LUKS using qemu-img"""
 
@@ -253,7 +265,8 @@ def qemu_img_create(config, size_mb):
 "%dM" % size_mb]
 
 iotests.log("qemu-img " + " ".join(args), 
filters=[iotests.filter_test_dir])
-iotests.log(iotests.qemu_img_pipe(*args), 
filters=[iotests.filter_test_dir])
+iotests.log(check_cipher_support(config, iotests.qemu_img_pipe(*args)),
+filters=[iotests.filter_test_dir])
 
 def qemu_io_image_args(config, dev=False):
 """Get the args for access an image or device with qemu-io"""
@@ -279,8 +292,8 @@ def qemu_io_write_pattern(config, pattern, offset_mb, 
size_mb, dev=False):
 args = ["-c", "write -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)]
 args.extend(qemu_io_image_args(config, dev))
 iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir])
-iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir,
- iotests.filter_qemu_io])
+iotests.log(check_cipher_support(config, iotests.qemu_io(*args)),
+filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
 
 
 def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False):
@@ -291,8 +304,8 @@ def qemu_io_read_pattern(config, pattern, offset_mb, 
size_mb, dev=False):
 args = ["-c", "read -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)]
 args.extend(qemu_io_image_args(config, dev))
 iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir])
-iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir,
- iotests.filter_qemu_io])
+iotests.log(check_cipher_support(config, iotests.qemu_io(*args)),
+filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
 
 
 def test_once(config, qemu_img=False):
-- 
2.33.1




Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-23 Thread Alexander Bulekov
On 211123 1449, Philippe Mathieu-Daudé wrote:
> On 11/23/21 14:42, Hanna Reitz wrote:
> > On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:
> >> From: Alexander Bulekov 
> >>
> >> Without the previous commit, when running 'make check-qtest-i386'
> >> with QEMU configured with '--enable-sanitizers' we get:
> >>
> >>    AddressSanitizer:DEADLYSIGNAL
> >>    =
> >>    ==287878==ERROR: AddressSanitizer: SEGV on unknown address
> >> 0x0344
> >>    ==287878==The signal is caused by a WRITE memory access.
> >>    ==287878==Hint: address points to the zero page.
> >>    #0 0x564b2e5bac27 in blk_inc_in_flight
> >> block/block-backend.c:1346:5
> >>    #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
> >>    #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
> >>    #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
> >>    #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
> >>    #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9
> >>
> >> Add the reproducer for CVE-2021-20196.
> >>
> >> Signed-off-by: Alexander Bulekov 
> >> Message-Id: <20210319050906.14875-2-alx...@bu.edu>
> >> [PMD: Rebased, use global test_image]
> >> Reviewed-by: Darren Kenny 
> >> Signed-off-by: Philippe Mathieu-Daudé 
> >> ---
> >>   tests/qtest/fdc-test.c | 21 +
> >>   1 file changed, 21 insertions(+)
> > 
> > Not sure if I’m doing something wrong, but:
> > 
> > Using the global test_image brings a problem, namely that this test
> > fails unconditionally (for me at least...?), with the reason being that
> > the global QEMU instance (launched by qtest_start(), quit by
> > qtest_end()) still has that image open, so by launching a second
> > instance concurrently, I get this:
> > 
> > qemu-system-x86_64: Failed to get "write" lock
> > Is another process using the image [/tmp/qtest.xV4IxX]?
> 
> Hmm I had too many odd problems running qtests in parallel so I
> switched to 'make check-qtest -j1' more than 1 year ago, which
> is probably why I haven't noticed that issue.
> 
> Using another 'test_image' seems against code reuse principle,
> but at this point in release, duplicating it is simpler. Someone
> will clean that later =)

Maybe it makes sense to run this with -snasphot ?

> 
> > So either we need to use a different image file, or we need to quit the
> > global instance before using it (e.g. putting a qtest_end() at the
> > beginning of test_cve_*()), although the latter just seems wrong.
> > 
> > Second, I can’t make this test fail.  When I apply this patch first (to
> > master) and run it, I don’t get a SIGSEGV.
> 
> Is your QEMU built with --enable-sanitizers ?
> 



Re: [PATCH-for-6.2 v3 1/2] hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196

2021-11-23 Thread Hanna Reitz

On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:

Guest might select another drive on the bus by setting the
DRIVE_SEL bit of the DIGITAL OUTPUT REGISTER (DOR).
The current controller model doesn't expect a BlockBackend
to be NULL. A simple way to fix CVE-2021-20196 is to create
an empty BlockBackend when it is missing. All further
accesses will be safely handled, and the controller state
machines keep behaving correctly.

Cc: qemu-sta...@nongnu.org
Fixes: CVE-2021-20196
Reported-by: Gaoning Pan (Ant Security Light-Year Lab) 
BugLink: https://bugs.launchpad.net/qemu/+bug/1912780
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/338
Reviewed-by: Darren Kenny 
Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/block/fdc.c | 14 +-
  1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index d21b717b7d6..6f94b6a6daa 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1161,7 +1161,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit)
  
  static FDrive *get_cur_drv(FDCtrl *fdctrl)

  {
-return get_drv(fdctrl, fdctrl->cur_drv);
+FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv);
+
+if (!cur_drv->blk) {
+/*
+ * Kludge: empty drive line selected. Create an anonymous
+ * BlockBackend to avoid NULL deref with various BlockBackend
+ * API calls within this model (CVE-2021-20196).
+ * Due to the controller QOM model limitations, we don't
+ * attach the created to the controller device.
+ */
+cur_drv->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);


So to me this looks basically like a mini version of 
floppy_drive_realize(), and I was wondering what else we might want to 
use from that function.  fd_init() and fd_revalidate() look interesting, 
but it appears that fdctrl_realize_common() already did that for all 
drives so we should be good.


Then again, fd_revalidate() behaves differently for the initial drv->blk 
== NULL (drv->drive is set to TYPE_NONE, and last_sect and max_track are 
set to 0) and for then later !blk_is_inserted() (drv->drive not changed 
(so I guess it stays TYPE_NONE?), but last_sect and max_track are set to 
0xff).  Not sure if that’s a problem.  Probably not, given that I think 
drv->disk and drv->drive both stay TYPE_NONE.


Reviewed-by: Hanna Reitz 


+}
+return cur_drv;
  }
  
  /* Status A register : 0x00 (read-only) */





Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-23 Thread Hanna Reitz

On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:

From: Alexander Bulekov 

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

   AddressSanitizer:DEADLYSIGNAL
   =
   ==287878==ERROR: AddressSanitizer: SEGV on unknown address 0x0344
   ==287878==The signal is caused by a WRITE memory access.
   ==287878==Hint: address points to the zero page.
   #0 0x564b2e5bac27 in blk_inc_in_flight block/block-backend.c:1346:5
   #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
   #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
   #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
   #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
   #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9

Add the reproducer for CVE-2021-20196.

Signed-off-by: Alexander Bulekov 
Message-Id: <20210319050906.14875-2-alx...@bu.edu>
[PMD: Rebased, use global test_image]
Reviewed-by: Darren Kenny 
Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/qtest/fdc-test.c | 21 +
  1 file changed, 21 insertions(+)


Not sure if I’m doing something wrong, but:

Using the global test_image brings a problem, namely that this test 
fails unconditionally (for me at least...?), with the reason being that 
the global QEMU instance (launched by qtest_start(), quit by 
qtest_end()) still has that image open, so by launching a second 
instance concurrently, I get this:


qemu-system-x86_64: Failed to get "write" lock
Is another process using the image [/tmp/qtest.xV4IxX]?

So either we need to use a different image file, or we need to quit the 
global instance before using it (e.g. putting a qtest_end() at the 
beginning of test_cve_*()), although the latter just seems wrong.


Second, I can’t make this test fail.  When I apply this patch first (to 
master) and run it, I don’t get a SIGSEGV.


Hanna




Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-23 Thread Hanna Reitz

On 23.11.21 14:49, Philippe Mathieu-Daudé wrote:

On 11/23/21 14:42, Hanna Reitz wrote:

On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:

From: Alexander Bulekov 

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

    AddressSanitizer:DEADLYSIGNAL
    =
    ==287878==ERROR: AddressSanitizer: SEGV on unknown address
0x0344
    ==287878==The signal is caused by a WRITE memory access.
    ==287878==Hint: address points to the zero page.
    #0 0x564b2e5bac27 in blk_inc_in_flight
block/block-backend.c:1346:5
    #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
    #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
    #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
    #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
    #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9

Add the reproducer for CVE-2021-20196.

Signed-off-by: Alexander Bulekov 
Message-Id: <20210319050906.14875-2-alx...@bu.edu>
[PMD: Rebased, use global test_image]
Reviewed-by: Darren Kenny 
Signed-off-by: Philippe Mathieu-Daudé 
---
   tests/qtest/fdc-test.c | 21 +
   1 file changed, 21 insertions(+)

Not sure if I’m doing something wrong, but:

Using the global test_image brings a problem, namely that this test
fails unconditionally (for me at least...?), with the reason being that
the global QEMU instance (launched by qtest_start(), quit by
qtest_end()) still has that image open, so by launching a second
instance concurrently, I get this:

qemu-system-x86_64: Failed to get "write" lock
Is another process using the image [/tmp/qtest.xV4IxX]?

Hmm I had too many odd problems running qtests in parallel so I
switched to 'make check-qtest -j1' more than 1 year ago, which
is probably why I haven't noticed that issue.


I’ve run the test with

QTEST_QEMU_BINARY=$PWD/qemu-system-x86_64 tests/qtest/fdc-test

so there definitely weren’t any other tests running at the same time.  I 
don’t know why you don’t encounter this problem, but it’s caused by the 
concurrent QEMU instance launched in the very same test (qtest_start() 
in main(), and cleaned up by qtest_end() after g_test_run()).



Using another 'test_image' seems against code reuse principle,
but at this point in release, duplicating it is simpler. Someone
will clean that later =)


So either we need to use a different image file, or we need to quit the
global instance before using it (e.g. putting a qtest_end() at the
beginning of test_cve_*()), although the latter just seems wrong.

Second, I can’t make this test fail.  When I apply this patch first (to
master) and run it, I don’t get a SIGSEGV.

Is your QEMU built with --enable-sanitizers ?


Uh, no.  I had (wrongly) assumed there’d be no need, given that the 
SIGSEGV access address is below 4 kB...


I now did configure and compile it with --enable-sanitizers but still 
can’t reproduce it.


Hanna




[PULL 2/3] iotests: Use aes-128-cbc

2021-11-23 Thread Hanna Reitz
Our gnutls crypto backend (which is the default as of 8bd0931f6)
supports neither twofish-128 nor the CTR mode.  CBC and aes-128 are
supported by all of our backends (as far as I can tell), so use
aes-128-cbc in our iotests.

(We could also use e.g. aes-256-cbc, but the different key sizes would
lead to different key slot offsets and so change the reference output
more, which is why I went with aes-128.)

Signed-off-by: Hanna Reitz 
Message-Id: <2027151707.52549-2-hre...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Tested-by: Thomas Huth 
---
 tests/qemu-iotests/206 | 4 ++--
 tests/qemu-iotests/206.out | 6 +++---
 tests/qemu-iotests/210 | 4 ++--
 tests/qemu-iotests/210.out | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/qemu-iotests/206 b/tests/qemu-iotests/206
index c3cdad4ce4..10eff343f7 100755
--- a/tests/qemu-iotests/206
+++ b/tests/qemu-iotests/206
@@ -162,8 +162,8 @@ with iotests.FilePath('t.qcow2') as disk_path, \
  'encrypt': {
  'format': 'luks',
  'key-secret': 'keysec0',
- 'cipher-alg': 'twofish-128',
- 'cipher-mode': 'ctr',
+ 'cipher-alg': 'aes-128',
+ 'cipher-mode': 'cbc',
  'ivgen-alg': 'plain64',
  'ivgen-hash-alg': 'md5',
  'hash-alg': 'sha1',
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index 3593e8e9c2..80cd274223 100644
--- a/tests/qemu-iotests/206.out
+++ b/tests/qemu-iotests/206.out
@@ -97,7 +97,7 @@ Format specific information:
 
 === Successful image creation (encrypted) ===
 
-{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode": 
"ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": 
"plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": 
{"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"driver": "qcow2", "encrypt": {"cipher-alg": "aes-128", "cipher-mode": "cbc", 
"format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", 
"ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", 
"filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
@@ -115,10 +115,10 @@ Format specific information:
 encrypt:
 ivgen alg: plain64
 hash alg: sha1
-cipher alg: twofish-128
+cipher alg: aes-128
 uuid: ----
 format: luks
-cipher mode: ctr
+cipher mode: cbc
 slots:
 [0]:
 active: true
diff --git a/tests/qemu-iotests/210 b/tests/qemu-iotests/210
index 5a62ed4dd1..a4dcc5fe59 100755
--- a/tests/qemu-iotests/210
+++ b/tests/qemu-iotests/210
@@ -83,8 +83,8 @@ with iotests.FilePath('t.luks') as disk_path, \
  },
  'size': size,
  'key-secret': 'keysec0',
- 'cipher-alg': 'twofish-128',
- 'cipher-mode': 'ctr',
+ 'cipher-alg': 'aes-128',
+ 'cipher-mode': 'cbc',
  'ivgen-alg': 'plain64',
  'ivgen-hash-alg': 'md5',
  'hash-alg': 'sha1',
diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out
index 55c0844370..96d9f749dd 100644
--- a/tests/qemu-iotests/210.out
+++ b/tests/qemu-iotests/210.out
@@ -59,7 +59,7 @@ Format specific information:
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
-{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"cipher-alg": "twofish-128", "cipher-mode": "ctr", "driver": "luks", "file": 
{"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", 
"iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": 
"keysec0", "size": 67108864}}}
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"cipher-alg": "aes-128", "cipher-mode": "cbc", "driver": "luks", "file": 
{"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", 
"iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": 
"keysec0", "size": 67108864}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
@@ -71,9 +71,9 @@ encrypted: yes
 Format specific information:
 ivgen alg: plain64
 hash alg: sha1
-cipher alg: twofish-128
+cipher alg: aes-128
 uuid: ----
-cipher mode: ctr
+cipher mode: cbc
 slots:

Re: [PATCH-for-6.2 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507

2021-11-23 Thread Hanna Reitz

On 18.11.21 12:57, Philippe Mathieu-Daudé wrote:

Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

   ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x61962a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0
   READ of size 786432 at 0x61962a00 thread T0
   #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919)
   #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13
   #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14
   #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18
   #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16
   #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5
   #6 0x5626d0bd5649 in cpu_physical_memory_write 
include/exec/cpu-common.h:82:5
   #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9
   #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13
   #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13
   #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13
   #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9
   #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17

   0x61962a00 is located 0 bytes to the right of 512-byte region 
[0x61962800,0x61962a00)
   allocated by thread T0 here:
   #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec)
   #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11
   #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27
   #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20
   #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5
   #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13

   SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) 
in __asan_memcpy
   Shadow bytes around the buggy address:
 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
   =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
   Shadow byte legend (one shadow byte represents 8 application bytes):
 Addressable:   00
 Heap left redzone:   fa
 Freed heap region:   fd
   ==4028352==ABORTING

Reported-by: Alexander Bulekov 
Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/qtest/fdc-test.c | 20 
  1 file changed, 20 insertions(+)

diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
index 26b69f7c5cd..f164d972d10 100644
--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
@@ -546,6 +546,25 @@ static void fuzz_registers(void)
  }
  }
  
+static void test_cve_2021_3507(void)

+{
+QTestState *s;
+
+s = qtest_initf("-nographic -m 32M -nodefaults "
+"-drive file=%s,format=raw,if=floppy", test_image);
+qtest_outl(s, 0x9, 0x0a0206);
+qtest_outw(s, 0x3f4, 0x1600);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x0200);
+qtest_outw(s, 0x3f4, 0x0200);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);


No idea what this does (looks like a 256-byte sector read read from 
sector 2 with EOT=0), but hits the problem and reproduces for me.


Unfortunately, I have the exact same problem with test_image as I did in 
the other series.


Hanna


+qtest_quit(s);
+}
+
  int main(int argc, char **argv)
  {
  int fd;
@@ -576,6 +595,7 @@ int main(int argc, char **argv)
  qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18);
  qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
  qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
+qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507);
  
  ret = g_test_run();
  





Re: [PATCH-for-6.2 v3 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-23 Thread Philippe Mathieu-Daudé
On 11/23/21 14:42, Hanna Reitz wrote:
> On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:
>> From: Alexander Bulekov 
>>
>> Without the previous commit, when running 'make check-qtest-i386'
>> with QEMU configured with '--enable-sanitizers' we get:
>>
>>    AddressSanitizer:DEADLYSIGNAL
>>    =
>>    ==287878==ERROR: AddressSanitizer: SEGV on unknown address
>> 0x0344
>>    ==287878==The signal is caused by a WRITE memory access.
>>    ==287878==Hint: address points to the zero page.
>>    #0 0x564b2e5bac27 in blk_inc_in_flight
>> block/block-backend.c:1346:5
>>    #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
>>    #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
>>    #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
>>    #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
>>    #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9
>>
>> Add the reproducer for CVE-2021-20196.
>>
>> Signed-off-by: Alexander Bulekov 
>> Message-Id: <20210319050906.14875-2-alx...@bu.edu>
>> [PMD: Rebased, use global test_image]
>> Reviewed-by: Darren Kenny 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>   tests/qtest/fdc-test.c | 21 +
>>   1 file changed, 21 insertions(+)
> 
> Not sure if I’m doing something wrong, but:
> 
> Using the global test_image brings a problem, namely that this test
> fails unconditionally (for me at least...?), with the reason being that
> the global QEMU instance (launched by qtest_start(), quit by
> qtest_end()) still has that image open, so by launching a second
> instance concurrently, I get this:
> 
> qemu-system-x86_64: Failed to get "write" lock
> Is another process using the image [/tmp/qtest.xV4IxX]?

Hmm I had too many odd problems running qtests in parallel so I
switched to 'make check-qtest -j1' more than 1 year ago, which
is probably why I haven't noticed that issue.

Using another 'test_image' seems against code reuse principle,
but at this point in release, duplicating it is simpler. Someone
will clean that later =)

> So either we need to use a different image file, or we need to quit the
> global instance before using it (e.g. putting a qtest_end() at the
> beginning of test_cve_*()), although the latter just seems wrong.
> 
> Second, I can’t make this test fail.  When I apply this patch first (to
> master) and run it, I don’t get a SIGSEGV.

Is your QEMU built with --enable-sanitizers ?




Re: [PATCH-for-6.2 v3 1/2] hw/block/fdc: Kludge missing floppy drive to fix CVE-2021-20196

2021-11-23 Thread Philippe Mathieu-Daudé
On 11/23/21 14:33, Hanna Reitz wrote:
> On 18.11.21 13:06, Philippe Mathieu-Daudé wrote:
>> Guest might select another drive on the bus by setting the
>> DRIVE_SEL bit of the DIGITAL OUTPUT REGISTER (DOR).
>> The current controller model doesn't expect a BlockBackend
>> to be NULL. A simple way to fix CVE-2021-20196 is to create
>> an empty BlockBackend when it is missing. All further
>> accesses will be safely handled, and the controller state
>> machines keep behaving correctly.
>>
>> Cc: qemu-sta...@nongnu.org
>> Fixes: CVE-2021-20196
>> Reported-by: Gaoning Pan (Ant Security Light-Year Lab) 
>> BugLink: https://bugs.launchpad.net/qemu/+bug/1912780
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/338
>> Reviewed-by: Darren Kenny 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>   hw/block/fdc.c | 14 +-
>>   1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/block/fdc.c b/hw/block/fdc.c
>> index d21b717b7d6..6f94b6a6daa 100644
>> --- a/hw/block/fdc.c
>> +++ b/hw/block/fdc.c
>> @@ -1161,7 +1161,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit)
>>     static FDrive *get_cur_drv(FDCtrl *fdctrl)
>>   {
>> -    return get_drv(fdctrl, fdctrl->cur_drv);
>> +    FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv);
>> +
>> +    if (!cur_drv->blk) {
>> +    /*
>> + * Kludge: empty drive line selected. Create an anonymous
>> + * BlockBackend to avoid NULL deref with various BlockBackend
>> + * API calls within this model (CVE-2021-20196).
>> + * Due to the controller QOM model limitations, we don't
>> + * attach the created to the controller device.
>> + */
>> +    cur_drv->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
> 
> So to me this looks basically like a mini version of
> floppy_drive_realize(), and I was wondering what else we might want to
> use from that function.  fd_init() and fd_revalidate() look interesting,
> but it appears that fdctrl_realize_common() already did that for all
> drives so we should be good.

How the controller / bus / floppy / drive are connected is a bit
confusing. Did you ever try to hot-remove the magnetic medium from
the floppy plastic enclosure while the controller rotates it?

> Then again, fd_revalidate() behaves differently for the initial drv->blk
> == NULL (drv->drive is set to TYPE_NONE, and last_sect and max_track are
> set to 0) and for then later !blk_is_inserted() (drv->drive not changed
> (so I guess it stays TYPE_NONE?), but last_sect and max_track are set to
> 0xff).  Not sure if that’s a problem.  Probably not, given that I think
> drv->disk and drv->drive both stay TYPE_NONE.

I'm not sure about the future plans for this device model...

> Reviewed-by: Hanna Reitz 

Thanks!




Re: [PATCH-for-6.2 1/2] hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)

2021-11-23 Thread Hanna Reitz

On 18.11.21 12:57, Philippe Mathieu-Daudé wrote:

Per the 82078 datasheet, if the end-of-track (EOT byte in
the FIFO) is more than the number of sectors per side, the
command is terminated unsuccessfully:


Patch looks OK to me (can’t believe I’ve looked into the spec...), just 
one question (note from later self: I really believed this, and now I 
ended up with this wall of text...):


Isn’t the problem that the EOT is smaller than the start sector index?  
AFAIU fifo[6] is the EOT, ks (fifo[4]) is the start sector number, and 
so the problem occurs if fifo[6] < fifo[4] - 1 (EOT < start sector index).


I don’t think there is any problem if the EOT exceeds the number of 
sectors per anything (e.g. sectors per track or per cylinder). Well, the 
spec might say it should cause an error, but in that case tmp (and thus 
data_len) just become very large.  AFAIU fd_seek() checks that the start 
sector is in bounds, so in fact passing an EOT that is larger than 
cur_drv->last_sect would never be negative, and so never trigger the tmp 
< 0 condition.


The explanation below however seems to be precisely for the case where 
EOT would be larger than last_sect (say 18), and so e.g. sector 19 can’t 
be found (“second occurrence of a pulse on the INDX# pin”).  Contrarily, 
the spec doesn’t seem to say anything for the case where EOT is in 
bounds but less than the start sector index. It doesn’t even seem to say 
how the EOT is evaluated; is it a “read until sector >= EOT”?  (I.e. EOT 
< start sector would yield a zero-byte read)  Or is it a “read until 
sector == EOT”?  (I.e. EOT < start sector would yield an error because 
that condition is never reached, and we go beyond last_sect, yielding 
the same error as if EOT were out of bounds.)


Now that raises another question to me.  Say EOT is out of bounds; 
wouldn’t the FDC still read all sectors until the end of the 
track/cylinder?  And only then raise an error that the sector beyond 
doesn’t exist?


I have no idea if any of that made sense and even if it did, whether I’m 
right.  But even if I am, I don’t think it’s a problem.  Our problem at 
hand is the `tmp` underflow, and it’s good to handle that by returning 
some form of error to the guest instead of crashing. It’s just that I’m 
not sure this solution is actually what the spec requires (because I 
have no idea what the spec requires in that case), and the explanation 
given in the commit message to me seems to define an error for a very 
different case (where tmp would not be negative) that we just execute 
without an error.


When I look into this I see many more things that look like they aren’t 
according to spec (like not checking the sector size, or that we honor 
fifo[0] & 0x80 for READ TRACK even though the spec says no multi-track 
operations for READ TRACK), so I don’t personally care.  It’s good that 
this patch handles the `tmp` underflow, because that’s important, 
compliance with the spec isn’t.


(Yes, the entire reason for this wall of text is that I wonder why the 
commit message goes into so much detail quoting the spec, while I can’t 
find it applies.)


Reviewed-by: Hanna Reitz 


* 5.2.5 DATA TRANSFER TERMINATION

   The 82078 supports terminal count explicitly through
   the TC pin and implicitly through the underrun/over-
   run and end-of-track (EOT) functions. For full sector
   transfers, the EOT parameter can define the last
   sector to be transferred in a single or multisector
   transfer. If the last sector to be transferred is a par-
   tial sector, the host can stop transferring the data in
   mid-sector, and the 82078 will continue to complete
   the sector as if a hardware TC was received. The
   only difference between these implicit functions and
   TC is that they return "abnormal termination" result
   status. Such status indications can be ignored if they
   were expected.

* 6.1.3 READ TRACK

   This command terminates when the EOT specified
   number of sectors have been read. If the 82078
   does not find an I D Address Mark on the diskette
   after the second· occurrence of a pulse on the
   INDX# pin, then it sets the IC code in Status Regis-
   ter 0 to "01" (Abnormal termination), sets the MA bit
   in Status Register 1 to "1", and terminates the com-
   mand.

* 6.1.6 VERIFY

   Refer to Table 6-6 and Table 6-7 for information
   concerning the values of MT and EC versus SC and
   EOT value.

* Table 6·6. Result Phase Table

* Table 6-7. Verify Command Result Phase Table

Fix by aborting the transfer when EOT > # Sectors Per Side.

Cc: qemu-sta...@nongnu.org
Cc: Hervé Poussineau 
Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
Reported-by: Alexander Bulekov 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/block/fdc.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index fa933cd3263..d21b717b7d6 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c

Re: [PATCH-for-6.2 2/2] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507

2021-11-23 Thread Alexander Bulekov
On 28 1257, Philippe Mathieu-Daudé wrote:
> Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339
> 
> Without the previous commit, when running 'make check-qtest-i386'
> with QEMU configured with '--enable-sanitizers' we get:
> 
>   ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 
> 0x61962a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0
>   READ of size 786432 at 0x61962a00 thread T0
>   #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919)
>   #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13
>   #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14
>   #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18
>   #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16
>   #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5
>   #6 0x5626d0bd5649 in cpu_physical_memory_write 
> include/exec/cpu-common.h:82:5
>   #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9
>   #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13
>   #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13
>   #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13
>   #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9
>   #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17
> 
>   0x61962a00 is located 0 bytes to the right of 512-byte region 
> [0x61962800,0x61962a00)
>   allocated by thread T0 here:
>   #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec)
>   #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11
>   #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27
>   #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20
>   #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5
>   #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13
> 
>   SUMMARY: AddressSanitizer: heap-buffer-overflow 
> (qemu-system-i386+0x1e65919) in __asan_memcpy
>   Shadow bytes around the buggy address:
> 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>   =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
>   Shadow byte legend (one shadow byte represents 8 application bytes):
> Addressable:   00
> Heap left redzone:   fa
> Freed heap region:   fd
>   ==4028352==ABORTING
> 
> Reported-by: Alexander Bulekov 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  tests/qtest/fdc-test.c | 20 
>  1 file changed, 20 insertions(+)
> 
> diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
> index 26b69f7c5cd..f164d972d10 100644
> --- a/tests/qtest/fdc-test.c
> +++ b/tests/qtest/fdc-test.c
> @@ -546,6 +546,25 @@ static void fuzz_registers(void)
>  }
>  }
>  
> +static void test_cve_2021_3507(void)
> +{
> +QTestState *s;
> +
> +s = qtest_initf("-nographic -m 32M -nodefaults "
> +"-drive file=%s,format=raw,if=floppy", test_image);

Does it make sense to run this with -snapshot ?

Reviewed-by: Alexander Bulekov 

> +qtest_outl(s, 0x9, 0x0a0206);
> +qtest_outw(s, 0x3f4, 0x1600);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_outw(s, 0x3f4, 0x0200);
> +qtest_outw(s, 0x3f4, 0x0200);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_outw(s, 0x3f4, 0x);
> +qtest_quit(s);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  int fd;
> @@ -576,6 +595,7 @@ int main(int argc, char **argv)
>  qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18);
>  qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
>  qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
> +qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507);
>  
>  ret = g_test_run();
>  
> -- 
> 2.31.1
> 



Re: [PULL 0/3] Block patches

2021-11-23 Thread Richard Henderson

On 11/23/21 4:59 PM, Hanna Reitz wrote:

The following changes since commit 73e0f70e097b7c92a5ce16ee35b53afe119b20d7:

   Merge tag 'pull-lu-20211123' of https://gitlab.com/rth7680/qemu into staging 
(2021-11-23 11:33:14 +0100)

are available in the Git repository at:

   https://gitlab.com/hreitz/qemu.git tags/pull-block-2021-11-23

for you to fetch changes up to 4dd218fd0717ed3cddb69c01eeb9da630107d89d:

   iotests/149: Skip on unsupported ciphers (2021-11-23 15:39:12 +0100)


Block patches for 6.2-rc2:
- Fix memory leak in vvfat when vvfat_open() fails
- iotest fixes for the gnutls crypto backend


Daniella Lee (1):
   block/vvfat.c fix leak when failure occurs

Hanna Reitz (2):
   iotests: Use aes-128-cbc
   iotests/149: Skip on unsupported ciphers

  block/vvfat.c  | 16 
  tests/qemu-iotests/149 | 23 ++-
  tests/qemu-iotests/206 |  4 ++--
  tests/qemu-iotests/206.out |  6 +++---
  tests/qemu-iotests/210 |  4 ++--
  tests/qemu-iotests/210.out |  6 +++---
  6 files changed, 40 insertions(+), 19 deletions(-)


Applied, thanks.

r~




[PATCH 1/4] block_int: make bdrv_backing_overridden static

2021-11-23 Thread Emanuele Giuseppe Esposito
bdrv_backing_overridden is only used in block.c, so there is
no need to leave it in block_int.h

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block_int.h | 3 ---
 block.c   | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index f4c75e8ba9..27008cfb22 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1122,9 +1122,6 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
   QDict *options);
 
-bool bdrv_backing_overridden(BlockDriverState *bs);
-
-
 /**
  * bdrv_add_aio_context_notifier:
  *
diff --git a/block.c b/block.c
index 0ac5b163d2..10346b5011 100644
--- a/block.c
+++ b/block.c
@@ -103,6 +103,8 @@ static int bdrv_reopen_prepare(BDRVReopenState 
*reopen_state,
 static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 
+static bool bdrv_backing_overridden(BlockDriverState *bs);
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -7475,7 +7477,7 @@ static bool append_strong_runtime_options(QDict *d, 
BlockDriverState *bs)
 /* Note: This function may return false positives; it may return true
  * even if opening the backing file specified by bs's image header
  * would result in exactly bs->backing. */
-bool bdrv_backing_overridden(BlockDriverState *bs)
+static bool bdrv_backing_overridden(BlockDriverState *bs)
 {
 if (bs->backing) {
 return strcmp(bs->auto_backing_file,
-- 
2.27.0




[PATCH 3/4] include/sysemu/blockdev.h: move drive_add and inline drive_def

2021-11-23 Thread Emanuele Giuseppe Esposito
drive_add is only used in softmmu/vl.c, so it can be a static
function there, and drive_def is only a particular use case of
qemu_opts_parse_noisily, so it can be inlined.

Also remove drive_mark_claimed_by_board, as it is only defined
but not implemented (nor used) anywhere.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/sysemu/blockdev.h  |  6 ++
 block/monitor/block-hmp-cmds.c |  2 +-
 blockdev.c | 27 +--
 softmmu/vl.c   | 25 -
 4 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 32c2d6023c..aacc587a33 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -27,6 +27,8 @@ typedef enum {
 IF_COUNT
 } BlockInterfaceType;
 
+extern const char *const block_if_name[];
+
 struct DriveInfo {
 BlockInterfaceType type;
 int bus;
@@ -45,16 +47,12 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
 void override_max_devs(BlockInterfaceType type, int max_devs);
 
 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit);
-void drive_mark_claimed_by_board(void);
 void drive_check_orphaned(void);
 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
 int drive_get_max_bus(BlockInterfaceType type);
 int drive_get_max_devs(BlockInterfaceType type);
 DriveInfo *drive_get_next(BlockInterfaceType type);
 
-QemuOpts *drive_def(const char *optstr);
-QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
-const char *optstr);
 DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
  Error **errp);
 
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 2ac4aedfff..bfb3c043a0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -101,7 +101,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
 return;
 }
 
-opts = drive_def(optstr);
+opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
 if (!opts)
 return;
 
diff --git a/blockdev.c b/blockdev.c
index 1b6ffbbc73..928bb743a1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -71,7 +71,7 @@ void bdrv_set_monitor_owned(BlockDriverState *bs)
 QTAILQ_INSERT_TAIL(_bdrv_states, bs, monitor_list);
 }
 
-static const char *const block_if_name[IF_COUNT] = {
+const char *const block_if_name[IF_COUNT] = {
 [IF_NONE] = "none",
 [IF_IDE] = "ide",
 [IF_SCSI] = "scsi",
@@ -197,31 +197,6 @@ static int drive_index_to_unit_id(BlockInterfaceType type, 
int index)
 return max_devs ? index % max_devs : index;
 }
 
-QemuOpts *drive_def(const char *optstr)
-{
-return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
-}
-
-QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
-const char *optstr)
-{
-QemuOpts *opts;
-
-opts = drive_def(optstr);
-if (!opts) {
-return NULL;
-}
-if (type != IF_DEFAULT) {
-qemu_opt_set(opts, "if", block_if_name[type], _abort);
-}
-if (index >= 0) {
-qemu_opt_set_number(opts, "index", index, _abort);
-}
-if (file)
-qemu_opt_set(opts, "file", file, _abort);
-return opts;
-}
-
 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
 {
 BlockBackend *blk;
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 1159a64bce..f47827926c 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -650,6 +650,27 @@ static int drive_enable_snapshot(void *opaque, QemuOpts 
*opts, Error **errp)
 return 0;
 }
 
+static QemuOpts *drive_add(BlockInterfaceType type, int index,
+   const char *file, const char *optstr)
+{
+QemuOpts *opts;
+
+opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
+if (!opts) {
+return NULL;
+}
+if (type != IF_DEFAULT) {
+qemu_opt_set(opts, "if", block_if_name[type], _abort);
+}
+if (index >= 0) {
+qemu_opt_set_number(opts, "index", index, _abort);
+}
+if (file) {
+qemu_opt_set(opts, "file", file, _abort);
+}
+return opts;
+}
+
 static void default_drive(int enable, int snapshot, BlockInterfaceType type,
   int index, const char *optstr)
 {
@@ -2884,7 +2905,9 @@ void qemu_init(int argc, char **argv, char **envp)
 break;
 }
 case QEMU_OPTION_drive:
-if (drive_def(optarg) == NULL) {
+opts = qemu_opts_parse_noisily(qemu_find_opts("drive"),
+   optarg, false);
+if (opts == NULL) {
 exit(1);
 }
 break;
-- 
2.27.0




[PATCH v5 01/31] main-loop.h: introduce qemu_in_main_thread()

2021-11-23 Thread Emanuele Giuseppe Esposito
When invoked from the main loop, this function is the same
as qemu_mutex_iothread_locked, and returns true if the BQL is held.
When invoked from iothreads or tests, it returns true only
if the current AioContext is the Main Loop.

This essentially just extends qemu_mutex_iothread_locked to work
also in unit tests or other users like storage-daemon, that run
in the Main Loop but end up using the implementation in
stubs/iothread-lock.c.

Using qemu_mutex_iothread_locked in unit tests defaults to false
because they use the implementation in stubs/iothread-lock,
making all assertions added in next patches fail despite the
AioContext is still the main loop.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 include/qemu/main-loop.h | 13 +
 softmmu/cpus.c   |  5 +
 stubs/iothread-lock.c|  5 +
 3 files changed, 23 insertions(+)

diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 8dbc6fcb89..6b8fa57c5d 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -245,6 +245,19 @@ AioContext *iohandler_get_aio_context(void);
  */
 bool qemu_mutex_iothread_locked(void);
 
+/**
+ * qemu_in_main_thread: same as qemu_mutex_iothread_locked when
+ * softmmu/cpus.c implementation is linked. Otherwise this function
+ * checks that the current AioContext is the global AioContext
+ * (main loop).
+ *
+ * This is useful when checking that the BQL is held, to avoid
+ * returning false when invoked by unit tests or other users like
+ * storage-daemon that end up using stubs/iothread-lock.c
+ * implementation.
+ */
+bool qemu_in_main_thread(void);
+
 /**
  * qemu_mutex_lock_iothread: Lock the main loop mutex.
  *
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 071085f840..3f61a3c31d 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -481,6 +481,11 @@ bool qemu_mutex_iothread_locked(void)
 return iothread_locked;
 }
 
+bool qemu_in_main_thread(void)
+{
+return qemu_mutex_iothread_locked();
+}
+
 /*
  * The BQL is taken from so many places that it is worth profiling the
  * callers directly, instead of funneling them all through a single function.
diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c
index 5b45b7fc8b..ff7386e42c 100644
--- a/stubs/iothread-lock.c
+++ b/stubs/iothread-lock.c
@@ -6,6 +6,11 @@ bool qemu_mutex_iothread_locked(void)
 return false;
 }
 
+bool qemu_in_main_thread(void)
+{
+return qemu_get_current_aio_context() == qemu_get_aio_context();
+}
+
 void qemu_mutex_lock_iothread_impl(const char *file, int line)
 {
 }
-- 
2.27.0




[PATCH v5 02/31] include/block/block: split header into I/O and global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
block.h currently contains a mix of functions:
some of them run under the BQL and modify the block layer graph,
others are instead thread-safe and perform I/O in iothreads.
It is not easy to understand which function is part of which
group (I/O vs GS), and this patch aims to clarify it.

The "GS" functions need the BQL, and often use
aio_context_acquire/release and/or drain to be sure they
can modify the graph safely.
The I/O function are instead thread safe, and can run in
any AioContext.

By splitting the header in two files, block-io.h
and block-global-state.h we have a clearer view on what
needs what kind of protection. block-common.h
contains common structures shared by both headers.

block.h is left there for legacy and to avoid changing
all includes in all c files that use the block APIs.

Assertions are added in the next patch.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block-common.h   | 380 +
 include/block/block-global-state.h | 271 +
 include/block/block-io.h   | 322 +++
 include/block/block.h  | 878 +
 block.c|   3 +
 block/meson.build  |   7 +-
 6 files changed, 1004 insertions(+), 857 deletions(-)
 create mode 100644 include/block/block-common.h
 create mode 100644 include/block/block-global-state.h
 create mode 100644 include/block/block-io.h

diff --git a/include/block/block-common.h b/include/block/block-common.h
new file mode 100644
index 00..dfaa73ac70
--- /dev/null
+++ b/include/block/block-common.h
@@ -0,0 +1,380 @@
+#ifndef BLOCK_COMMON_H
+#define BLOCK_COMMON_H
+
+#include "block/aio.h"
+#include "block/aio-wait.h"
+#include "qemu/iov.h"
+#include "qemu/coroutine.h"
+#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
+#include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
+
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildClass BdrvChildClass;
+
+typedef struct BlockDriverInfo {
+/* in bytes, 0 if irrelevant */
+int cluster_size;
+/* offset at which the VM state can be saved (0 if not possible) */
+int64_t vm_state_offset;
+bool is_dirty;
+/*
+ * True if this block driver only supports compressed writes
+ */
+bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+uint64_t allocated_clusters;
+uint64_t total_clusters;
+uint64_t fragmented_clusters;
+uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+BDRV_REQ_COPY_ON_READ   = 0x1,
+BDRV_REQ_ZERO_WRITE = 0x2,
+
+/*
+ * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+ * that the block driver should unmap (discard) blocks if it is guaranteed
+ * that the result will read back as zeroes. The flag is only passed to the
+ * driver if the block device is opened with BDRV_O_UNMAP.
+ */
+BDRV_REQ_MAY_UNMAP  = 0x4,
+
+BDRV_REQ_FUA= 0x10,
+BDRV_REQ_WRITE_COMPRESSED   = 0x20,
+
+/*
+ * Signifies that this write request will not change the visible disk
+ * content.
+ */
+BDRV_REQ_WRITE_UNCHANGED= 0x40,
+
+/*
+ * Forces request serialisation. Use only with write requests.
+ */
+BDRV_REQ_SERIALISING= 0x80,
+
+/*
+ * Execute the request only if the operation can be offloaded or otherwise
+ * be executed efficiently, but return an error instead of using a slow
+ * fallback.
+ */
+BDRV_REQ_NO_FALLBACK= 0x100,
+
+/*
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
+ */
+BDRV_REQ_PREFETCH  = 0x200,
+
+/*
+ * If we need to wait for other requests, just fail immediately. Used
+ * only together with BDRV_REQ_SERIALISING.
+ */
+BDRV_REQ_NO_WAIT = 0x400,
+
+/* Mask of valid flags */
+BDRV_REQ_MASK   = 0x7ff,
+} BdrvRequestFlags;
+
+#define BDRV_O_NO_SHARE0x0001 /* don't share permissions */
+#define BDRV_O_RDWR0x0002
+#define BDRV_O_RESIZE  0x0004 /* request permission for resizing the node 
*/
+#define BDRV_O_SNAPSHOT0x0008 /* open the file read only and save
+ writes in a snapshot */
+#define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */

[PATCH v5 07/31] include/block/block_int: split header into I/O and global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
Similarly to the previous patch, split block_int.h
in block_int-io.h and block_int-global-state.h

block_int-common.h contains the structures shared between
the two headers, and the functions that can't be categorized as
I/O or global state.

Assertions are added in the next patch.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block_int-common.h   | 1161 +++
 include/block/block_int-global-state.h |  315 +
 include/block/block_int-io.h   |  167 +++
 include/block/block_int.h  | 1475 +---
 blockdev.c |5 +
 5 files changed, 1651 insertions(+), 1472 deletions(-)
 create mode 100644 include/block/block_int-common.h
 create mode 100644 include/block/block_int-global-state.h
 create mode 100644 include/block/block_int-io.h

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
new file mode 100644
index 00..70534f94ae
--- /dev/null
+++ b/include/block/block_int-common.h
@@ -0,0 +1,1161 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_COMMON_H
+#define BLOCK_INT_COMMON_H
+
+#include "block/accounting.h"
+#include "block/block.h"
+#include "block/aio-wait.h"
+#include "qemu/queue.h"
+#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
+#include "qemu/timer.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+#include "qemu/throttle.h"
+#include "qemu/rcu.h"
+
+#define BLOCK_FLAG_LAZY_REFCOUNTS   8
+
+#define BLOCK_OPT_SIZE  "size"
+#define BLOCK_OPT_ENCRYPT   "encryption"
+#define BLOCK_OPT_ENCRYPT_FORMAT"encrypt.format"
+#define BLOCK_OPT_COMPAT6   "compat6"
+#define BLOCK_OPT_HWVERSION "hwversion"
+#define BLOCK_OPT_BACKING_FILE  "backing_file"
+#define BLOCK_OPT_BACKING_FMT   "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE"table_size"
+#define BLOCK_OPT_PREALLOC  "preallocation"
+#define BLOCK_OPT_SUBFMT"subformat"
+#define BLOCK_OPT_COMPAT_LEVEL  "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS"lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE  "adapter_type"
+#define BLOCK_OPT_REDUNDANCY"redundancy"
+#define BLOCK_OPT_NOCOW "nocow"
+#define BLOCK_OPT_EXTENT_SIZE_HINT  "extent_size_hint"
+#define BLOCK_OPT_OBJECT_SIZE   "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
+#define BLOCK_OPT_DATA_FILE "data_file"
+#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
+#define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
+#define BLOCK_OPT_EXTL2 "extended_l2"
+
+#define BLOCK_PROBE_BUF_SIZE512
+
+enum BdrvTrackedRequestType {
+BDRV_TRACKED_READ,
+BDRV_TRACKED_WRITE,
+BDRV_TRACKED_DISCARD,
+BDRV_TRACKED_TRUNCATE,
+};
+
+/*
+ * That is not quite good that BdrvTrackedRequest structure is public,
+ * as block/io.c is very careful about incoming offset/bytes being
+ * correct. Be sure to assert bdrv_check_request() succeeded after any
+ * modification of BdrvTrackedRequest object out of block/io.c
+ */
+typedef struct BdrvTrackedRequest {
+BlockDriverState *bs;
+int64_t offset;
+int64_t bytes;
+enum BdrvTrackedRequestType type;
+
+bool serialising;
+int64_t overlap_offset;
+int64_t overlap_bytes;
+
+QLIST_ENTRY(BdrvTrackedRequest) list;
+Coroutine *co; /* owner, used for deadlock detection */
+CoQueue wait_queue; /* coroutines blocked on this request */
+
+struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+
+struct BlockDriver {
+const char *format_name;
+int instance_size;
+
+/*
+ * Set to true if the BlockDriver is a block filter. Block filters pass
+ * certain callbacks that refer to data (see block.c) to their bs->file
+ * or 

[PATCH v5 13/31] block.c: add assertions to static functions

2021-11-23 Thread Emanuele Giuseppe Esposito
Following the assertion derived from the API split,
propagate the assertion also in the static functions.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 45 -
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 5516c84ec4..b77ab0a104 100644
--- a/block.c
+++ b/block.c
@@ -434,6 +434,7 @@ BlockDriverState *bdrv_new(void)
 static BlockDriver *bdrv_do_find_format(const char *format_name)
 {
 BlockDriver *drv1;
+assert(qemu_in_main_thread());
 
 QLIST_FOREACH(drv1, _drivers, list) {
 if (!strcmp(drv1->format_name, format_name)) {
@@ -593,6 +594,8 @@ static int64_t create_file_fallback_truncate(BlockBackend 
*blk,
 int64_t size;
 int ret;
 
+assert(qemu_in_main_thread());
+
 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
_err);
 if (ret < 0 && ret != -ENOTSUP) {
@@ -631,6 +634,8 @@ static int 
create_file_fallback_zero_first_sector(BlockBackend *blk,
 int64_t bytes_to_clear;
 int ret;
 
+assert(qemu_in_main_thread());
+
 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
 if (bytes_to_clear) {
 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
@@ -891,6 +896,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
 {
 int score_max = 0, score;
 BlockDriver *drv = NULL, *d;
+assert(qemu_in_main_thread());
 
 QLIST_FOREACH(d, _drivers, list) {
 if (d->bdrv_probe_device) {
@@ -908,6 +914,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
 static BlockDriver *bdrv_do_find_protocol(const char *protocol)
 {
 BlockDriver *drv1;
+assert(qemu_in_main_thread());
 
 QLIST_FOREACH(drv1, _drivers, list) {
 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
@@ -1016,6 +1023,8 @@ static int find_image_format(BlockBackend *file, const 
char *filename,
 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
 int ret = 0;
 
+assert(qemu_in_main_thread());
+
 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) 
{
 *pdrv = _raw;
@@ -1097,6 +1106,7 @@ static BlockdevDetectZeroesOptions 
bdrv_parse_detect_zeroes(QemuOpts *opts,
 BlockdevDetectZeroesOptions detect_zeroes =
 qapi_enum_parse(_lookup, value,
 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, _err);
+assert(qemu_in_main_thread());
 g_free(value);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -1214,6 +1224,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child,
 static int bdrv_child_cb_inactivate(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
+assert(qemu_in_main_thread());
 assert(bs->open_flags & BDRV_O_INACTIVE);
 return 0;
 }
@@ -1241,6 +1252,7 @@ static void bdrv_temp_snapshot_options(int *child_flags, 
QDict *child_options,
int parent_flags, QDict *parent_options)
 {
 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+assert(qemu_in_main_thread());
 
 /* For temporary files, unconditional cache=unsafe is fine */
 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
@@ -1260,6 +1272,7 @@ static void bdrv_backing_attach(BdrvChild *c)
 BlockDriverState *parent = c->opaque;
 BlockDriverState *backing_hd = c->bs;
 
+assert(qemu_in_main_thread());
 assert(!parent->backing_blocker);
 error_setg(>backing_blocker,
"node is used as backing hd of '%s'",
@@ -1298,6 +1311,7 @@ static void bdrv_backing_detach(BdrvChild *c)
 {
 BlockDriverState *parent = c->opaque;
 
+assert(qemu_in_main_thread());
 assert(parent->backing_blocker);
 bdrv_op_unblock_all(c->bs, parent->backing_blocker);
 error_free(parent->backing_blocker);
@@ -1310,6 +1324,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, 
BlockDriverState *base,
 BlockDriverState *parent = c->opaque;
 bool read_only = bdrv_is_read_only(parent);
 int ret;
+assert(qemu_in_main_thread());
 
 if (read_only) {
 ret = bdrv_reopen_set_read_only(parent, false, errp);
@@ -1341,6 +1356,7 @@ static void bdrv_inherited_options(BdrvChildRole role, 
bool parent_is_format,
int parent_flags, QDict *parent_options)
 {
 int flags = parent_flags;
+assert(qemu_in_main_thread());
 
 /*
  * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
@@ -1479,6 +1495,7 @@ AioContext *bdrv_child_get_parent_aio_context(BdrvChild 
*c)
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
 int open_flags = flags;
+assert(qemu_in_main_thread());
 
 /*
  * Clear flags that are internal to the block layer before opening the
@@ -1491,6 +1508,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int 
flags)
 
 

[PATCH v5 11/31] include/block/blockjob_int.h: split header into I/O and GS API

2021-11-23 Thread Emanuele Giuseppe Esposito
Since the I/O functions are not many, keep a single file.
Also split the function pointers in BlockJobDriver.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/blockjob_int.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 6633d83da2..718d7b92d2 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -38,6 +38,13 @@ struct BlockJobDriver {
 /** Generic JobDriver callbacks and settings */
 JobDriver job_driver;
 
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
 /*
  * Returns whether the job has pending requests for the child or will
  * submit new requests before the next pause point. This callback is polled
@@ -46,6 +53,13 @@ struct BlockJobDriver {
  */
 bool (*drained_poll)(BlockJob *job);
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /*
  * If the callback is not NULL, it will be invoked before the job is
  * resumed in a new AioContext.  This is the place to move any resources
@@ -56,6 +70,13 @@ struct BlockJobDriver {
 void (*set_speed)(BlockJob *job, int64_t speed);
 };
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /**
  * block_job_create:
  * @job_id: The id of the newly-created job, or %NULL to have one
@@ -98,6 +119,13 @@ void block_job_free(Job *job);
  */
 void block_job_user_resume(Job *job);
 
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
 /**
  * block_job_ratelimit_get_delay:
  *
-- 
2.27.0




[PATCH v5 15/31] assertions for blockjob.h global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 blockjob.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 10c807413e..74476af473 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -62,6 +62,7 @@ static bool is_block_job(Job *job)
 BlockJob *block_job_next(BlockJob *bjob)
 {
 Job *job = bjob ? >job : NULL;
+assert(qemu_in_main_thread());
 
 do {
 job = job_next(job);
@@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob)
 BlockJob *block_job_get(const char *id)
 {
 Job *job = job_get(id);
+assert(qemu_in_main_thread());
 
 if (job && is_block_job(job)) {
 return container_of(job, BlockJob, job);
@@ -185,6 +187,7 @@ static const BdrvChildClass child_job = {
 
 void block_job_remove_all_bdrv(BlockJob *job)
 {
+assert(qemu_in_main_thread());
 /*
  * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
  * which will also traverse job->nodes, so consume the list one by
@@ -207,6 +210,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
 bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
 {
 GSList *el;
+assert(qemu_in_main_thread());
 
 for (el = job->nodes; el; el = el->next) {
 BdrvChild *c = el->data;
@@ -223,6 +227,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, 
BlockDriverState *bs,
 {
 BdrvChild *c;
 bool need_context_ops;
+assert(qemu_in_main_thread());
 
 bdrv_ref(bs);
 
@@ -272,6 +277,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, 
Error **errp)
 const BlockJobDriver *drv = block_job_driver(job);
 int64_t old_speed = job->speed;
 
+assert(qemu_in_main_thread());
+
 if (job_apply_verb(>job, JOB_VERB_SET_SPEED, errp) < 0) {
 return false;
 }
@@ -309,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 BlockJobInfo *info;
 uint64_t progress_current, progress_total;
 
+assert(qemu_in_main_thread());
+
 if (block_job_is_internal(job)) {
 error_setg(errp, "Cannot query QEMU internal jobs");
 return NULL;
@@ -498,6 +507,7 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 
 void block_job_iostatus_reset(BlockJob *job)
 {
+assert(qemu_in_main_thread());
 if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 return;
 }
-- 
2.27.0




[PATCH v5 24/31] block_int-common.h: assertions in the callers of BdrvChildClass function pointers

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/block.c b/block.c
index 180884b8c0..a0309f827d 100644
--- a/block.c
+++ b/block.c
@@ -1491,6 +1491,7 @@ const BdrvChildClass child_of_bds = {
 
 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
 {
+assert(qemu_in_main_thread());
 return c->klass->get_parent_aio_context(c);
 }
 
@@ -2120,6 +2121,7 @@ bool bdrv_is_writable(BlockDriverState *bs)
 
 static char *bdrv_child_user_desc(BdrvChild *c)
 {
+assert(qemu_in_main_thread());
 return c->klass->get_parent_desc(c);
 }
 
@@ -2832,6 +2834,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
 
 assert(!child->frozen);
 assert(old_bs != new_bs);
+assert(qemu_in_main_thread());
 
 if (old_bs && new_bs) {
 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
@@ -2928,6 +2931,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
 BdrvChild *child = *s->child;
 BlockDriverState *bs = child->bs;
 
+assert(qemu_in_main_thread());
 /*
  * Pass free_empty_child=false, because we still need the child
  * for the AioContext operations on the parent below; those
@@ -3296,6 +3300,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild 
*child)
 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
 {
 BdrvChild *c;
+assert(qemu_in_main_thread());
 QLIST_FOREACH(c, >parents, next_parent) {
 if (c->klass->change_media) {
 c->klass->change_media(c, load);
@@ -3792,6 +3797,7 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 
 assert(!child_class || !flags);
 assert(!child_class == !parent);
+assert(qemu_in_main_thread());
 
 if (reference) {
 bool options_non_empty = options ? qdict_size(options) : false;
@@ -4178,6 +4184,7 @@ static BlockReopenQueue 
*bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
  * important to avoid graph changes between the recursive queuing here and
  * bdrv_reopen_multiple(). */
 assert(bs->quiesce_counter > 0);
+assert(qemu_in_main_thread());
 
 if (bs_queue == NULL) {
 bs_queue = g_new0(BlockReopenQueue, 1);
@@ -7271,6 +7278,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
 BdrvChild *child, *parent;
 
 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+assert(qemu_in_main_thread());
 
 if (old_context == new_context) {
 return;
@@ -7343,6 +7351,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
 static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
 GSList **ignore, Error **errp)
 {
+assert(qemu_in_main_thread());
 if (g_slist_find(*ignore, c)) {
 return true;
 }
-- 
2.27.0




[PATCH v5 09/31] block: introduce assert_bdrv_graph_writable

2021-11-23 Thread Emanuele Giuseppe Esposito
We want to be sure that the functions that write the child and
parent list of a bs are under BQL and drain.

BQL prevents from concurrent writings from the GS API, while
drains protect from I/O.

TODO: drains are missing in some functions using this assert.
Therefore a proper assertion will fail. Because adding drains
requires additional discussions, they will be added in future
series.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block_int-global-state.h | 10 +-
 block.c|  4 
 block/io.c | 11 +++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/block/block_int-global-state.h 
b/include/block/block_int-global-state.h
index a1b7d0579d..fa96e8b449 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -312,4 +312,12 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
  */
 void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
 
-#endif /* BLOCK_INT_GLOBAL_STATE*/
+/**
+ * Make sure that the function is running under both drain and BQL.
+ * The latter protects from concurrent writings
+ * from the GS API, while the former prevents concurrent reads
+ * from I/O.
+ */
+void assert_bdrv_graph_writable(BlockDriverState *bs);
+
+#endif /* BLOCK_INT_GLOBAL_STATE */
diff --git a/block.c b/block.c
index 198ec636ff..522a273140 100644
--- a/block.c
+++ b/block.c
@@ -1416,6 +1416,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
 
+assert_bdrv_graph_writable(bs);
 QLIST_INSERT_HEAD(>children, child, next);
 
 if (child->role & BDRV_CHILD_COW) {
@@ -1435,6 +1436,7 @@ static void bdrv_child_cb_detach(BdrvChild *child)
 
 bdrv_unapply_subtree_drain(child, bs);
 
+assert_bdrv_graph_writable(bs);
 QLIST_REMOVE(child, next);
 }
 
@@ -2818,6 +2820,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
 if (child->klass->detach) {
 child->klass->detach(child);
 }
+assert_bdrv_graph_writable(old_bs);
 QLIST_REMOVE(child, next_parent);
 }
 
@@ -2827,6 +2830,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
 }
 
 if (new_bs) {
+assert_bdrv_graph_writable(new_bs);
 QLIST_INSERT_HEAD(_bs->parents, child, next_parent);
 
 /*
diff --git a/block/io.c b/block/io.c
index cb095deeec..3be08cad29 100644
--- a/block/io.c
+++ b/block/io.c
@@ -734,6 +734,17 @@ void bdrv_drain_all(void)
 bdrv_drain_all_end();
 }
 
+void assert_bdrv_graph_writable(BlockDriverState *bs)
+{
+/*
+ * TODO: this function is incomplete. Because the users of this
+ * assert lack the necessary drains, check only for BQL.
+ * Once the necessary drains are added,
+ * assert also for qatomic_read(>quiesce_counter) > 0
+ */
+assert(qemu_in_main_thread());
+}
+
 /**
  * Remove an active request from the tracked requests list
  *
-- 
2.27.0




[PATCH v5 26/31] job.h: split function pointers in JobDriver

2021-11-23 Thread Emanuele Giuseppe Esposito
The job API will be handled separately in another serie.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/qemu/job.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 6e67b6977f..4ea7a4a0cd 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -169,6 +169,12 @@ typedef struct Job {
  * Callbacks and other information about a Job driver.
  */
 struct JobDriver {
+
+/*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards
+ */
+
 /** Derived Job struct size */
 size_t instance_size;
 
@@ -184,9 +190,18 @@ struct JobDriver {
  * aborted. If it returns zero, the job moves into the WAITING state. If it
  * is the last job to complete in its transaction, all jobs in the
  * transaction move from WAITING to PENDING.
+ *
+ * This callback must be run in the job's context.
  */
 int coroutine_fn (*run)(Job *job, Error **errp);
 
+/*
+ * Functions run without regard to the BQL that may run in any
+ * arbitrary thread. These functions do not need to be thread-safe
+ * because the caller ensures that they are invoked from one
+ * thread at time.
+ */
+
 /**
  * If the callback is not NULL, it will be invoked when the job transitions
  * into the paused state.  Paused jobs must not perform any asynchronous
@@ -201,6 +216,13 @@ struct JobDriver {
  */
 void coroutine_fn (*resume)(Job *job);
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /**
  * Called when the job is resumed by the user (i.e. user_paused becomes
  * false). .user_resume is called before .resume.
-- 
2.27.0




[PATCH v5 29/31] jobs: introduce pre_run function in JobDriver

2021-11-23 Thread Emanuele Giuseppe Esposito
.pre_run takes care of doing some initial job setup just before
the job is run inside the coroutine. Doing so can be useful
to invoke GS functions that require the BQL held.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/qemu/job.h |  9 +
 job.c  | 13 +
 2 files changed, 22 insertions(+)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 4ea7a4a0cd..915ceff425 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -223,6 +223,15 @@ struct JobDriver {
  * the GS API.
  */
 
+/**
+ * Called in the Main Loop context, before the job coroutine
+ * is started in the job aiocontext. Useful to perform operations
+ * that needs to be done under BQL (like permissions setup).
+ * On success, it returns 0. Otherwise the job failed to initialize
+ * and won't start.
+ */
+int (*pre_run)(Job *job, Error **errp);
+
 /**
  * Called when the job is resumed by the user (i.e. user_paused becomes
  * false). .user_resume is called before .resume.
diff --git a/job.c b/job.c
index bb57ec6887..e048037099 100644
--- a/job.c
+++ b/job.c
@@ -967,11 +967,24 @@ static void coroutine_fn job_co_entry(void *opaque)
 aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
 }
 
+static int job_pre_run(Job *job)
+{
+assert(qemu_in_main_thread());
+if (job->driver->pre_run) {
+return job->driver->pre_run(job, >err);
+}
+
+return 0;
+}
+
 void job_start(Job *job)
 {
 assert(job && !job_started(job) && job->paused &&
job->driver && job->driver->run);
 job->co = qemu_coroutine_create(job_co_entry, job);
+if (job_pre_run(job)) {
+return;
+}
 job->pause_count--;
 job->busy = true;
 job->paused = false;
-- 
2.27.0




[PATCH v5 17/31] assertions for blockdev.h global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 block/block-backend.c |  3 +++
 blockdev.c| 15 +++
 2 files changed, 18 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 56670a774f..aad6d9a4c3 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -794,6 +794,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
  */
 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return blk->legacy_dinfo;
 }
 
@@ -805,6 +806,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 {
 assert(!blk->legacy_dinfo);
+assert(qemu_in_main_thread());
 return blk->legacy_dinfo = dinfo;
 }
 
@@ -815,6 +817,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, 
DriveInfo *dinfo)
 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 {
 BlockBackend *blk = NULL;
+assert(qemu_in_main_thread());
 
 while ((blk = blk_next(blk)) != NULL) {
 if (blk->legacy_dinfo == dinfo) {
diff --git a/blockdev.c b/blockdev.c
index 7706919410..eebd9317c2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -113,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int 
max_devs)
 BlockBackend *blk;
 DriveInfo *dinfo;
 
+assert(qemu_in_main_thread());
+
 if (max_devs <= 0) {
 return;
 }
@@ -142,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk)
 DriveInfo *dinfo = blk_legacy_dinfo(blk);
 BlockJob *job;
 
+assert(qemu_in_main_thread());
+
 if (!dinfo) {
 return;
 }
@@ -163,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
 void blockdev_auto_del(BlockBackend *blk)
 {
 DriveInfo *dinfo = blk_legacy_dinfo(blk);
+assert(qemu_in_main_thread());
 
 if (dinfo && dinfo->auto_del) {
 monitor_remove_blk(blk);
@@ -187,6 +192,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int 
unit)
 BlockBackend *blk;
 DriveInfo *dinfo;
 
+assert(qemu_in_main_thread());
+
 for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 dinfo = blk_legacy_dinfo(blk);
 if (dinfo && dinfo->type == type
@@ -209,6 +216,8 @@ void drive_check_orphaned(void)
 Location loc;
 bool orphans = false;
 
+assert(qemu_in_main_thread());
+
 for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 dinfo = blk_legacy_dinfo(blk);
 /*
@@ -242,6 +251,7 @@ void drive_check_orphaned(void)
 
 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
 {
+assert(qemu_in_main_thread());
 return drive_get(type,
  drive_index_to_bus_id(type, index),
  drive_index_to_unit_id(type, index));
@@ -253,6 +263,8 @@ int drive_get_max_bus(BlockInterfaceType type)
 BlockBackend *blk;
 DriveInfo *dinfo;
 
+assert(qemu_in_main_thread());
+
 max_bus = -1;
 for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 dinfo = blk_legacy_dinfo(blk);
@@ -269,6 +281,7 @@ int drive_get_max_bus(BlockInterfaceType type)
 DriveInfo *drive_get_next(BlockInterfaceType type)
 {
 static int next_block_unit[IF_COUNT];
+assert(qemu_in_main_thread());
 
 return drive_get(type, 0, next_block_unit[type]++);
 }
@@ -749,6 +762,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType 
block_default_type,
 const char *filename;
 int i;
 
+assert(qemu_in_main_thread());
+
 /* Change legacy command line options into QMP ones */
 static const struct {
 const char *from;
-- 
2.27.0




[PATCH v5 18/31] include/block/snapshot: global state API + assertions

2021-11-23 Thread Emanuele Giuseppe Esposito
Snapshots run also under the BQL lock, so they all are
in the global state API. The aiocontext lock that they hold
is currently an overkill and in future could be removed.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/snapshot.h | 13 +++--
 block/snapshot.c | 28 
 migration/savevm.c   |  2 ++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 940345692f..cda82c1ba1 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo {
 uint64_t icount; /* record/replay step */
 } QEMUSnapshotInfo;
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name);
 bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState 
*bs,
  Error **errp);
 
 
-/* Group operations. All block drivers are involved.
+/*
+ * Group operations. All block drivers are involved.
  * These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers */
+ * when appropriate for appropriate block drivers
+ */
 
 bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);
diff --git a/block/snapshot.c b/block/snapshot.c
index ccacda8bd5..3dd3c9d3bd 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo 
*sn_info,
 QEMUSnapshotInfo *sn_tab, *sn;
 int nb_sns, i, ret;
 
+assert(qemu_in_main_thread());
+
 ret = -ENOENT;
 nb_sns = bdrv_snapshot_list(bs, _tab);
 if (nb_sns < 0) {
@@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
 bool ret = false;
 
 assert(id || name);
+assert(qemu_in_main_thread());
 
 nb_sns = bdrv_snapshot_list(bs, _tab);
 if (nb_sns < 0) {
@@ -200,6 +203,7 @@ static BlockDriverState 
*bdrv_snapshot_fallback(BlockDriverState *bs)
 int bdrv_can_snapshot(BlockDriverState *bs)
 {
 BlockDriver *drv = bs->drv;
+assert(qemu_in_main_thread());
 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
 return 0;
 }
@@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs,
 {
 BlockDriver *drv = bs->drv;
 BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+assert(qemu_in_main_thread());
+
 if (!drv) {
 return -ENOMEDIUM;
 }
@@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 BdrvChild **fallback_ptr;
 int ret, open_ret;
 
+assert(qemu_in_main_thread());
+
 if (!drv) {
 error_setg(errp, "Block driver is closed");
 return -ENOMEDIUM;
@@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
 BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
 int ret;
 
+assert(qemu_in_main_thread());
+
 if (!drv) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
 return -ENOMEDIUM;
@@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs,
 {
 BlockDriver *drv = bs->drv;
 BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+assert(qemu_in_main_thread());
 if (!drv) {
 return -ENOMEDIUM;
 }
@@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 {
 BlockDriver *drv = bs->drv;
 
+assert(qemu_in_main_thread());
+
 if (!drv) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
 return -ENOMEDIUM;
@@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState 
*bs,
 int ret;
 Error *local_err = NULL;
 
+assert(qemu_in_main_thread());
+
 ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, _err);
 if (ret == -ENOENT || ret == -EINVAL) {
 error_free(local_err);
@@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList 
*devices,
 g_autoptr(GList) bdrvs = NULL;
 GList *iterbdrvs;
 
+assert(qemu_in_main_thread());
+
 if (bdrv_all_get_snapshot_devices(has_devices, devices, , errp) < 0) 
{
 return false;
 }
@@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name,
 g_autoptr(GList) bdrvs = NULL;
 GList *iterbdrvs;
 
+assert(qemu_in_main_thread());
+
 if (bdrv_all_get_snapshot_devices(has_devices, devices, , errp) < 0) 
{
 return -1;
 }
@@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name,
 g_autoptr(GList) bdrvs = NULL;
 GList *iterbdrvs;
 
+assert(qemu_in_main_thread());
+
 if 

[PATCH v5 31/31] block.c: assertions to the block layer permissions API

2021-11-23 Thread Emanuele Giuseppe Esposito
Now that we "covered" the three main cases where the
permission API was being used under BQL (fuse,
amend and invalidate_cache), we can safely assert for
the permission functions implemented in block.c

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/block.c b/block.c
index 805974676b..6056ec4bc5 100644
--- a/block.c
+++ b/block.c
@@ -2138,6 +2138,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, 
Error **errp)
 
 assert(a->bs);
 assert(a->bs == b->bs);
+assert(qemu_in_main_thread());
 
 if ((b->perm & a->shared_perm) == b->perm) {
 return true;
@@ -2161,6 +2162,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, 
Error **errp)
 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
 {
 BdrvChild *a, *b;
+assert(qemu_in_main_thread());
 
 /*
  * During the loop we'll look at each pair twice. That's correct because
@@ -2245,6 +2247,8 @@ static void bdrv_child_set_perm_abort(void *opaque)
 {
 BdrvChildSetPermState *s = opaque;
 
+assert(qemu_in_main_thread());
+
 s->child->perm = s->old_perm;
 s->child->shared_perm = s->old_shared_perm;
 }
@@ -2258,6 +2262,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t 
perm,
 uint64_t shared, Transaction *tran)
 {
 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+assert(qemu_in_main_thread());
 
 *s = (BdrvChildSetPermState) {
 .child = c,
@@ -2442,6 +2447,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, 
BlockReopenQueue *q,
 BdrvChild *c;
 int ret;
 uint64_t cumulative_perms, cumulative_shared_perms;
+assert(qemu_in_main_thread());
 
 bdrv_get_cumulative_perm(bs, _perms, _shared_perms);
 
@@ -2510,6 +2516,7 @@ static int bdrv_list_refresh_perms(GSList *list, 
BlockReopenQueue *q,
 {
 int ret;
 BlockDriverState *bs;
+assert(qemu_in_main_thread());
 
 for ( ; list; list = list->next) {
 bs = list->data;
@@ -2582,6 +2589,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error 
**errp)
 int ret;
 Transaction *tran = tran_new();
 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
+assert(qemu_in_main_thread());
 
 ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
 tran_finalize(tran, ret);
@@ -2648,6 +2656,7 @@ static void bdrv_filter_default_perms(BlockDriverState 
*bs, BdrvChild *c,
   uint64_t perm, uint64_t shared,
   uint64_t *nperm, uint64_t *nshared)
 {
+assert(qemu_in_main_thread());
 *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
 }
@@ -2659,6 +2668,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState 
*bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared)
 {
 assert(role & BDRV_CHILD_COW);
+assert(qemu_in_main_thread());
 
 /*
  * We want consistent read from backing files if the parent needs it.
@@ -2696,6 +2706,7 @@ static void 
bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
 {
 int flags;
 
+assert(qemu_in_main_thread());
 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
 
 flags = bdrv_reopen_get_flags(reopen_queue, bs);
@@ -6094,6 +6105,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor 
*gr, void *parent,
 {
 BlockPermission qapi_perm;
 XDbgBlockGraphEdge *edge;
+assert(qemu_in_main_thread());
 
 edge = g_new0(XDbgBlockGraphEdge, 1);
 
-- 
2.27.0




[PATCH v5 27/31] job.h: assertions in the callers of JobDriver funcion pointers

2021-11-23 Thread Emanuele Giuseppe Esposito
Also assert that job->run() callback is called in the job aiocontext.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 job.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/job.c b/job.c
index dbfa67bb0a..bb57ec6887 100644
--- a/job.c
+++ b/job.c
@@ -380,6 +380,8 @@ void job_ref(Job *job)
 
 void job_unref(Job *job)
 {
+assert(qemu_in_main_thread());
+
 if (--job->refcnt == 0) {
 assert(job->status == JOB_STATUS_NULL);
 assert(!timer_pending(>sleep_timer));
@@ -601,6 +603,7 @@ bool job_user_paused(Job *job)
 void job_user_resume(Job *job, Error **errp)
 {
 assert(job);
+assert(qemu_in_main_thread());
 if (!job->user_paused || job->pause_count <= 0) {
 error_setg(errp, "Can't resume a job that was not paused");
 return;
@@ -671,6 +674,7 @@ static void job_update_rc(Job *job)
 static void job_commit(Job *job)
 {
 assert(!job->ret);
+assert(qemu_in_main_thread());
 if (job->driver->commit) {
 job->driver->commit(job);
 }
@@ -679,6 +683,7 @@ static void job_commit(Job *job)
 static void job_abort(Job *job)
 {
 assert(job->ret);
+assert(qemu_in_main_thread());
 if (job->driver->abort) {
 job->driver->abort(job);
 }
@@ -686,6 +691,7 @@ static void job_abort(Job *job)
 
 static void job_clean(Job *job)
 {
+assert(qemu_in_main_thread());
 if (job->driver->clean) {
 job->driver->clean(job);
 }
@@ -725,6 +731,7 @@ static int job_finalize_single(Job *job)
 
 static void job_cancel_async(Job *job, bool force)
 {
+assert(qemu_in_main_thread());
 if (job->driver->cancel) {
 force = job->driver->cancel(job, force);
 } else {
@@ -824,6 +831,7 @@ static void job_completed_txn_abort(Job *job)
 
 static int job_prepare(Job *job)
 {
+assert(qemu_in_main_thread());
 if (job->ret == 0 && job->driver->prepare) {
 job->ret = job->driver->prepare(job);
 job_update_rc(job);
@@ -950,6 +958,7 @@ static void coroutine_fn job_co_entry(void *opaque)
 {
 Job *job = opaque;
 
+assert(job->aio_context == qemu_get_current_aio_context());
 assert(job && job->driver && job->driver->run);
 job_pause_point(job);
 job->ret = job->driver->run(job, >err);
@@ -1053,6 +1062,7 @@ void job_complete(Job *job, Error **errp)
 {
 /* Should not be reachable via external interface for internal jobs */
 assert(job->id);
+assert(qemu_in_main_thread());
 if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
 return;
 }
-- 
2.27.0




Re: [PATCH 2/4] include/sysemu/blockdev.h: rename if_name in block_if_name

2021-11-23 Thread Philippe Mathieu-Daudé
On 11/24/21 07:36, Emanuele Giuseppe Esposito wrote:
> In preparation to next patch, where we export it to be used
> also in softmmu/vlc.c

"vl.c"? :)

> 
> Signed-off-by: Emanuele Giuseppe Esposito 
> ---
>  blockdev.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 




[PATCH 0/4] block: minor refactoring in preparation to the block layer API split

2021-11-23 Thread Emanuele Giuseppe Esposito
These patches are taken from my old patches and feedback of
my series "block layer: split block APIs in global state and I/O".

The reason for a separate series is that the original one is
already too long, and these patches are just refactoring the code,
mainly deleting or moving functions in blockdev.h and block_int.h.

Signed-off-by: Emanuele Giuseppe Esposito 

Emanuele Giuseppe Esposito (4):
  block_int: make bdrv_backing_overridden static
  include/sysemu/blockdev.h: rename if_name in block_if_name
  include/sysemu/blockdev.h: move drive_add and inline drive_def
  include/sysemu/blockdev.h: remove drive_get_max_devs

 include/block/block_int.h  |  3 --
 include/sysemu/blockdev.h  |  7 ++---
 block.c|  4 ++-
 block/monitor/block-hmp-cmds.c |  2 +-
 blockdev.c | 54 --
 softmmu/vl.c   | 25 +++-
 6 files changed, 36 insertions(+), 59 deletions(-)

-- 
2.27.0




[PATCH 4/4] include/sysemu/blockdev.h: remove drive_get_max_devs

2021-11-23 Thread Emanuele Giuseppe Esposito
Remove drive_get_max_devs, as it is not used by anyone.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/sysemu/blockdev.h |  1 -
 blockdev.c| 17 -
 2 files changed, 18 deletions(-)

diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index aacc587a33..c4b7b8b54e 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -50,7 +50,6 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int 
unit);
 void drive_check_orphaned(void);
 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
 int drive_get_max_bus(BlockInterfaceType type);
-int drive_get_max_devs(BlockInterfaceType type);
 DriveInfo *drive_get_next(BlockInterfaceType type);
 
 DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
diff --git a/blockdev.c b/blockdev.c
index 928bb743a1..84897f9d70 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -168,23 +168,6 @@ void blockdev_auto_del(BlockBackend *blk)
 }
 }
 
-/**
- * Returns the current mapping of how many units per bus
- * a particular interface can support.
- *
- *  A positive integer indicates n units per bus.
- *  0 implies the mapping has not been established.
- * -1 indicates an invalid BlockInterfaceType was given.
- */
-int drive_get_max_devs(BlockInterfaceType type)
-{
-if (type >= IF_IDE && type < IF_COUNT) {
-return if_max_devs[type];
-}
-
-return -1;
-}
-
 static int drive_index_to_bus_id(BlockInterfaceType type, int index)
 {
 int max_devs = if_max_devs[type];
-- 
2.27.0




[PATCH 2/4] include/sysemu/blockdev.h: rename if_name in block_if_name

2021-11-23 Thread Emanuele Giuseppe Esposito
In preparation to next patch, where we export it to be used
also in softmmu/vlc.c

Signed-off-by: Emanuele Giuseppe Esposito 
---
 blockdev.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index b35072644e..1b6ffbbc73 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -71,7 +71,7 @@ void bdrv_set_monitor_owned(BlockDriverState *bs)
 QTAILQ_INSERT_TAIL(_bdrv_states, bs, monitor_list);
 }
 
-static const char *const if_name[IF_COUNT] = {
+static const char *const block_if_name[IF_COUNT] = {
 [IF_NONE] = "none",
 [IF_IDE] = "ide",
 [IF_SCSI] = "scsi",
@@ -120,7 +120,7 @@ void override_max_devs(BlockInterfaceType type, int 
max_devs)
 if (dinfo->type == type) {
 fprintf(stderr, "Cannot override units-per-bus property of"
 " the %s interface, because a drive of that type has"
-" already been added.\n", if_name[type]);
+" already been added.\n", block_if_name[type]);
 g_assert_not_reached();
 }
 }
@@ -212,7 +212,7 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, 
const char *file,
 return NULL;
 }
 if (type != IF_DEFAULT) {
-qemu_opt_set(opts, "if", if_name[type], _abort);
+qemu_opt_set(opts, "if", block_if_name[type], _abort);
 }
 if (index >= 0) {
 qemu_opt_set_number(opts, "index", index, _abort);
@@ -269,7 +269,7 @@ void drive_check_orphaned(void)
 qemu_opts_loc_restore(dinfo->opts);
 error_report("machine type does not support"
  " if=%s,bus=%d,unit=%d",
- if_name[dinfo->type], dinfo->bus, dinfo->unit);
+ block_if_name[dinfo->type], dinfo->bus, dinfo->unit);
 loc_pop();
 orphans = true;
 }
@@ -887,7 +887,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType 
block_default_type,
 value = qemu_opt_get(legacy_opts, "if");
 if (value) {
 for (type = 0;
- type < IF_COUNT && strcmp(value, if_name[type]);
+ type < IF_COUNT && strcmp(value, block_if_name[type]);
  type++) {
 }
 if (type == IF_COUNT) {
@@ -945,10 +945,10 @@ DriveInfo *drive_new(QemuOpts *all_opts, 
BlockInterfaceType block_default_type,
 mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
 }
 if (max_devs) {
-new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id,
+new_id = g_strdup_printf("%s%i%s%i", block_if_name[type], bus_id,
  mediastr, unit_id);
 } else {
-new_id = g_strdup_printf("%s%s%i", if_name[type],
+new_id = g_strdup_printf("%s%s%i", block_if_name[type],
  mediastr, unit_id);
 }
 qdict_put_str(bs_opts, "id", new_id);
-- 
2.27.0




[PATCH v5 00/31] block layer: split block APIs in global state and I/O

2021-11-23 Thread Emanuele Giuseppe Esposito
Currently, block layer APIs like block.h contain a mix of
functions that are either running in the main loop and under the
BQL, or are thread-safe functions and run in iothreads performing I/O.
The functions running under BQL also take care of modifying the
block graph, by using drain and/or aio_context_acquire/release.
This makes it very confusing to understand where each function
runs, and what assumptions it provided with regards to thread
safety.

We call the functions running under BQL "global state (GS) API", and
distinguish them from the thread-safe "I/O API".

The aim of this series is to split the relevant block headers in
global state and I/O sub-headers. The division will be done in
this way:
header.h will be split in header-global-state.h, header-io.h and
header-common.h. The latter will just contain the data structures
needed by header-global-state and header-io, and common helpers
that are neither in GS nor in I/O. header.h will remain for
legacy and to avoid changing all includes in all QEMU c files,
but will only include the two new headers. No function shall be
added in header.c .
Once we split all relevant headers, it will be much easier to see what
uses the AioContext lock and remove it, which is the overall main
goal of this and other series that I posted/will post.

In addition to splitting the relevant headers shown in this series,
it is also very helpful splitting the function pointers in some
block structures, to understand what runs under AioContext lock and
what doesn't. This is what patches 21-27 do.

Each function in the GS API will have an assertion, checking
that it is always running under BQL.
I/O functions are instead thread safe (or so should be), meaning
that they *can* run under BQL, but also in an iothread in another
AioContext. Therefore they do not provide any assertion, and
need to be audited manually to verify the correctness.

Adding assetions has helped finding 2 bugs already, as shown in
my series "Migration: fix missing iothread locking".

Tested this series by running unit tests, qemu-iotests and qtests
(x86_64).
Some functions in the GS API are used everywhere but not
properly tested. Therefore their assertion is never actually run in
the tests, so despite my very careful auditing, it is not impossible
to exclude that some will trigger while actually using QEMU.

Patch 1 introduces qemu_in_main_thread(), the function used in
all assertions. This had to be introduced otherwise all unit tests
would fail, since they run in the main loop but use the code in
stubs/iothread.c
Patches 2-27 (with the exception of patch 9-10, that are an additional
assert) are all structured in the same way: first we split the header
and in the next (or same, if small) patch we add assertions.
Patch 28-31 take care instead of the block layer permission API,
fixing some bugs where they are used in I/O functions.

Next steps once this get reviewed:
1) audit the GS API and replace the AioContext lock with drains,
or remove them when not necessary (requires further discussion).
2) [optional as it should be already the case] audit the I/O API
and check that thread safety is guaranteed

In order to keep this series a little bit smaller, move some
refactoring patches in another series, "block: minor refactoring
in preparation to the block layer API split".

Based-on: <20211124063640.3118897-1-eespo...@redhat.com>

Signed-off-by: Emanuele Giuseppe Esposito 
---
v5 -> v6:
* In short, apply all Hanna's comments. More in details,
  the following functions in the following headers have been moved:
  block-backend:
blk_replace_bs (to gs)
blk_nb_sectors (to io)
blk_name (to io)
blk_set_perm (to io)
blk_get_perm (to io)
blk_drain (to io)
blk_abort_aio_request (to io)
blk_make_empty (to gs)
blk_invalidate_cache (was in io, but had GS assertion)
blk_aio_cancel (to gs)
  block:
bdrv_replace_child_bs (to gs)
bdrv_get_device_name (to io)
bdrv_get_device_or_node_name (to io)
bdrv_drained_end_no_poll (to io)
bdrv_block_status (was in io, but had GS assertion)
bdrv_drain (to io)
bdrv_co_drain (to io)
bdrv_make_zero (was in GS, but did not have the assertion)
bdrv_save_vmstate (to io)
bdrv_load_vmstate (to io)
bdrv_aio_cancel_async (to io)
  block_int:
bdrv_get_parent_name (to io)
bdrv_apply_subtree_drain (to io)
bdrv_unapply_subtree_drain (to io)
bdrv_co_copy_range_from (was in io, but had GS assertion)
bdrv_co_copy_range_to (was in io, but had GS assertion)
->bdrv_save_vmstate (to io)
->bdrv_load_vmstate (to io)

  coding style (assertion after definitions):
bdrv_save_vmstate
bdrv_load_vmstate
block_job_next
block_job_get

  new patches:
block.c: modify .attach and .detach callbacks of child_of_bds
introduce pre_run as JobDriver callback to handle
  bdrv_co_amend usage of permission function
leave blk_set/get_perm as a TODO in fuse.c
make sure 

[PATCH v5 03/31] assertions for block global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
All the global state (GS) API functions will check that
qemu_in_main_thread() returns true. If not, it means
that the safety of BQL cannot be guaranteed, and
they need to be moved to I/O.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c| 135 -
 block/commit.c |   2 +
 block/io.c |  14 +
 blockdev.c |   1 +
 4 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 84de6867e6..49bee69e27 100644
--- a/block.c
+++ b/block.c
@@ -278,6 +278,8 @@ bool bdrv_is_read_only(BlockDriverState *bs)
 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp)
 {
+assert(qemu_in_main_thread());
+
 /* Do not set read_only if copy_on_read is enabled */
 if (bs->copy_on_read && read_only) {
 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read 
enabled",
@@ -311,6 +313,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const 
char *errmsg,
   Error **errp)
 {
 int ret = 0;
+assert(qemu_in_main_thread());
 
 if (!(bs->open_flags & BDRV_O_RDWR)) {
 return 0;
@@ -393,6 +396,7 @@ char *bdrv_get_full_backing_filename(BlockDriverState *bs, 
Error **errp)
 void bdrv_register(BlockDriver *bdrv)
 {
 assert(bdrv->format_name);
+assert(qemu_in_main_thread());
 QLIST_INSERT_HEAD(_drivers, bdrv, list);
 }
 
@@ -401,6 +405,8 @@ BlockDriverState *bdrv_new(void)
 BlockDriverState *bs;
 int i;
 
+assert(qemu_in_main_thread());
+
 bs = g_new0(BlockDriverState, 1);
 QLIST_INIT(>dirty_bitmaps);
 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
@@ -443,6 +449,8 @@ BlockDriver *bdrv_find_format(const char *format_name)
 BlockDriver *drv1;
 int i;
 
+assert(qemu_in_main_thread());
+
 drv1 = bdrv_do_find_format(format_name);
 if (drv1) {
 return drv1;
@@ -492,11 +500,13 @@ static int bdrv_format_is_whitelisted(const char 
*format_name, bool read_only)
 
 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 {
+assert(qemu_in_main_thread());
 return bdrv_format_is_whitelisted(drv->format_name, read_only);
 }
 
 bool bdrv_uses_whitelist(void)
 {
+assert(qemu_in_main_thread());
 return use_bdrv_whitelist;
 }
 
@@ -527,6 +537,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 {
 int ret;
 
+assert(qemu_in_main_thread());
+
 Coroutine *co;
 CreateCo cco = {
 .drv = drv,
@@ -702,6 +714,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, 
Error **errp)
 QDict *qdict;
 int ret;
 
+assert(qemu_in_main_thread());
+
 drv = bdrv_find_protocol(filename, true, errp);
 if (drv == NULL) {
 return -ENOENT;
@@ -799,6 +813,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes 
*bsz)
 {
 BlockDriver *drv = bs->drv;
 BlockDriverState *filtered = bdrv_filter_bs(bs);
+assert(qemu_in_main_thread());
 
 if (drv && drv->bdrv_probe_blocksizes) {
 return drv->bdrv_probe_blocksizes(bs, bsz);
@@ -819,6 +834,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry 
*geo)
 {
 BlockDriver *drv = bs->drv;
 BlockDriverState *filtered = bdrv_filter_bs(bs);
+assert(qemu_in_main_thread());
 
 if (drv && drv->bdrv_probe_geometry) {
 return drv->bdrv_probe_geometry(bs, geo);
@@ -910,6 +926,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 const char *p;
 int i;
 
+assert(qemu_in_main_thread());
 /* TODO Drivers without bdrv_file_open must be specified explicitly */
 
 /*
@@ -975,6 +992,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 {
 int score_max = 0, score;
 BlockDriver *drv = NULL, *d;
+assert(qemu_in_main_thread());
 
 QLIST_FOREACH(d, _drivers, list) {
 if (d->bdrv_probe) {
@@ -1122,6 +1140,7 @@ int bdrv_parse_aio(const char *mode, int *flags)
  */
 int bdrv_parse_discard_flags(const char *mode, int *flags)
 {
+assert(qemu_in_main_thread());
 *flags &= ~BDRV_O_UNMAP;
 
 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
@@ -1142,6 +1161,7 @@ int bdrv_parse_discard_flags(const char *mode, int *flags)
  */
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 {
+assert(qemu_in_main_thread());
 *flags &= ~BDRV_O_CACHE_MASK;
 
 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
@@ -1634,6 +1654,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver 
*drv,
 BlockDriverState *bs;
 int ret;
 
+assert(qemu_in_main_thread());
+
 bs = bdrv_new();
 bs->open_flags = flags;
 bs->options = options ?: qdict_new();
@@ -1659,6 +1681,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver 
*drv,
 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp)
 {
+

[PATCH v5 05/31] block-backend: special comments for blk_set/get_perm due to fuse

2021-11-23 Thread Emanuele Giuseppe Esposito
Fuse logic can be classified as I/O, so there is no BQL held
during its execution. And yet, it uses blk_{get/set}_perm
functions, that are classified as BQL and clearly require
the BQL lock. Since there is no easy solution for this,
add a couple of TODOs and FIXME in the relevant sections of the
code.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-backend.c | 10 ++
 block/export/fuse.c   | 16 ++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 1f0bda578e..7a4b50a8f0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -888,6 +888,11 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, 
uint64_t shared_perm,
  Error **errp)
 {
 int ret;
+/*
+ * FIXME: blk_{get/set}_perm should be always called under
+ * BQL, but it is not the case right now (see block/export/fuse.c)
+ */
+/* assert(qemu_in_main_thread()); */
 
 if (blk->root && !blk->disable_perm) {
 ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
@@ -904,6 +909,11 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, 
uint64_t shared_perm,
 
 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
 {
+/*
+ * FIXME: blk_{get/set}_perm should be always called under
+ * BQL, but it is not the case right now (see block/export/fuse.c)
+ */
+/* assert(qemu_in_main_thread()); */
 *perm = blk->perm;
 *shared_perm = blk->shared_perm;
 }
diff --git a/block/export/fuse.c b/block/export/fuse.c
index 823c126d23..7ceb8d783b 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -89,7 +89,10 @@ static int fuse_export_create(BlockExport *blk_exp,
 /* For growable exports, take the RESIZE permission */
 if (args->growable) {
 uint64_t blk_perm, blk_shared_perm;
-
+/*
+ * FIXME: blk_{get/set}_perm should not be here, as permissions
+ * should be modified only under BQL and here we are not!
+ */
 blk_get_perm(exp->common.blk, _perm, _shared_perm);
 
 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
@@ -400,6 +403,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t 
size,
 
 /* Growable exports have a permanent RESIZE permission */
 if (!exp->growable) {
+
+/*
+ * FIXME: blk_{get/set}_perm should not be here, as permissions
+ * should be modified only under BQL and here we are not!
+ */
 blk_get_perm(exp->common.blk, _perm, _shared_perm);
 
 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
@@ -413,7 +421,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t 
size,
truncate_flags, NULL);
 
 if (!exp->growable) {
-/* Must succeed, because we are only giving up the RESIZE permission */
+/*
+ * Must succeed, because we are only giving up the RESIZE permission.
+ * FIXME: blk_{get/set}_perm should not be here, as permissions
+ * should be modified only under BQL and here we are not!
+ */
 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, _abort);
 }
 
-- 
2.27.0




[PATCH v5 04/31] include/sysemu/block-backend: split header into I/O and global state (GS) API

2021-11-23 Thread Emanuele Giuseppe Esposito
Similarly to the previous patches, split block-backend.h
in block-backend-io.h and block-backend-global-state.h

In addition, remove "block/block.h" include as it seems
it is not necessary anymore, together with "qemu/iov.h"

block-backend-common.h contains the structures shared between
the two headers, and the functions that can't be categorized as
I/O or global state.

Assertions are added in the next patch.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/sysemu/block-backend-common.h   |  84 ++
 include/sysemu/block-backend-global-state.h | 121 +
 include/sysemu/block-backend-io.h   | 139 ++
 include/sysemu/block-backend.h  | 269 +---
 block/block-backend.c   |   9 +-
 5 files changed, 353 insertions(+), 269 deletions(-)
 create mode 100644 include/sysemu/block-backend-common.h
 create mode 100644 include/sysemu/block-backend-global-state.h
 create mode 100644 include/sysemu/block-backend-io.h

diff --git a/include/sysemu/block-backend-common.h 
b/include/sysemu/block-backend-common.h
new file mode 100644
index 00..6963bbf45a
--- /dev/null
+++ b/include/sysemu/block-backend-common.h
@@ -0,0 +1,84 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types.  Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users.  Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+/*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes.  Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+void (*change_media_cb)(void *opaque, bool load, Error **errp);
+/*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback.  Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+void (*eject_request_cb)(void *opaque, bool force);
+/*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+bool (*is_tray_open)(void *opaque);
+/*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+bool (*is_medium_locked)(void *opaque);
+/*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+void (*resize_cb)(void *opaque);
+/*
+ * Runs when the backend receives a drain request.
+ */
+void (*drained_begin)(void *opaque);
+/*
+ * Runs when the backend's last drain request ends.
+ */
+void (*drained_end)(void *opaque);
+/*
+ * Is the device still busy?
+ */
+bool (*drained_poll)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/sysemu/block-backend-global-state.h 
b/include/sysemu/block-backend-global-state.h
new file mode 100644
index 00..77009bf7a2
--- /dev/null
+++ b/include/sysemu/block-backend-global-state.h
@@ -0,0 +1,121 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GS_H
+#define BLOCK_BACKEND_GS_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+BlockBackend 

[PATCH v5 08/31] assertions for block_int global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 15 +++
 block/backup.c  |  1 +
 block/block-backend.c   |  3 +++
 block/commit.c  |  2 ++
 block/dirty-bitmap.c|  1 +
 block/io.c  |  1 +
 block/mirror.c  |  4 
 block/monitor/bitmap-qmp-cmds.c |  6 ++
 block/stream.c  |  2 ++
 blockdev.c  |  7 +++
 10 files changed, 42 insertions(+)

diff --git a/block.c b/block.c
index 49bee69e27..198ec636ff 100644
--- a/block.c
+++ b/block.c
@@ -662,6 +662,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver 
*drv,
 Error *local_err = NULL;
 int ret;
 
+assert(qemu_in_main_thread());
+
 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
 prealloc = qapi_enum_parse(_lookup, buf,
@@ -2492,6 +2494,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, 
uint64_t *perm,
 uint64_t cumulative_perms = 0;
 uint64_t cumulative_shared_perms = BLK_PERM_ALL;
 
+assert(qemu_in_main_thread());
+
 QLIST_FOREACH(c, >parents, next_parent) {
 cumulative_perms |= c->perm;
 cumulative_shared_perms &= c->shared_perm;
@@ -2552,6 +2556,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, 
uint64_t shared,
 Transaction *tran = tran_new();
 int ret;
 
+assert(qemu_in_main_thread());
+
 bdrv_child_set_perm(c, perm, shared, tran);
 
 ret = bdrv_refresh_perms(c->bs, _err);
@@ -2582,6 +2588,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, 
BdrvChild *c, Error **errp)
 uint64_t parent_perms, parent_shared;
 uint64_t perms, shared;
 
+assert(qemu_in_main_thread());
+
 bdrv_get_cumulative_perm(bs, _perms, _shared);
 bdrv_child_perm(bs, c->bs, c, c->role, NULL,
 parent_perms, parent_shared, , );
@@ -2724,6 +2732,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild 
*c,
 uint64_t perm, uint64_t shared,
 uint64_t *nperm, uint64_t *nshared)
 {
+assert(qemu_in_main_thread());
 if (role & BDRV_CHILD_FILTERED) {
 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
  BDRV_CHILD_COW)));
@@ -3084,6 +3093,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
*child_bs,
 BdrvChild *child = NULL;
 Transaction *tran = tran_new();
 
+assert(qemu_in_main_thread());
+
 ret = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, opaque,
, tran, errp);
@@ -7436,6 +7447,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
 {
 BlockDriverState *filtered;
 
+assert(qemu_in_main_thread());
+
 if (!bs || !bs->drv) {
 return false;
 }
@@ -7607,6 +7620,7 @@ static bool append_strong_runtime_options(QDict *d, 
BlockDriverState *bs)
  * would result in exactly bs->backing. */
 static bool bdrv_backing_overridden(BlockDriverState *bs)
 {
+assert(qemu_in_main_thread());
 if (bs->backing) {
 return strcmp(bs->auto_backing_file,
   bs->backing->bs->filename);
@@ -7995,6 +8009,7 @@ static BlockDriverState 
*bdrv_do_skip_filters(BlockDriverState *bs,
  */
 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
 {
+assert(qemu_in_main_thread());
 return bdrv_do_skip_filters(bs, true);
 }
 
diff --git a/block/backup.c b/block/backup.c
index 21d5983779..c53041772c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 
 assert(bs);
 assert(target);
+assert(qemu_in_main_thread());
 
 /* QMP interface protects us from these cases */
 assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
diff --git a/block/block-backend.c b/block/block-backend.c
index 11131ca68c..56670a774f 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1095,6 +1095,7 @@ static void blk_root_change_media(BdrvChild *child, bool 
load)
  */
 bool blk_dev_has_removable_media(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
 }
 
@@ -1112,6 +1113,7 @@ bool blk_dev_has_tray(BlockBackend *blk)
  */
 void blk_dev_eject_request(BlockBackend *blk, bool force)
 {
+assert(qemu_in_main_thread());
 if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
 blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
 }
@@ -1134,6 +1136,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk)
  */
 bool blk_dev_is_medium_locked(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
 return blk->dev_ops->is_medium_locked(blk->dev_opaque);
 }
diff --git a/block/commit.c b/block/commit.c

[PATCH v5 10/31] block.c: modify .attach and .detach callbacks of child_of_bds

2021-11-23 Thread Emanuele Giuseppe Esposito
According to the assertions put in the previous patch, we should
first drain and then modify the ->children list. In this way
we prevent other iothreads to read the list while it is being
updated.

In this case, moving the drain won't cause any harm, because
child is a parameter of the drain function so it will still be
included in the operation, despite not being in the list.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index 522a273140..5516c84ec4 100644
--- a/block.c
+++ b/block.c
@@ -1416,6 +1416,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
 
+bdrv_apply_subtree_drain(child, bs);
 assert_bdrv_graph_writable(bs);
 QLIST_INSERT_HEAD(>children, child, next);
 
@@ -1423,7 +1424,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 bdrv_backing_attach(child);
 }
 
-bdrv_apply_subtree_drain(child, bs);
 }
 
 static void bdrv_child_cb_detach(BdrvChild *child)
@@ -1434,10 +1434,9 @@ static void bdrv_child_cb_detach(BdrvChild *child)
 bdrv_backing_detach(child);
 }
 
-bdrv_unapply_subtree_drain(child, bs);
-
 assert_bdrv_graph_writable(bs);
 QLIST_REMOVE(child, next);
+bdrv_unapply_subtree_drain(child, bs);
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
-- 
2.27.0




[PATCH v5 06/31] block/block-backend.c: assertions for block-backend

2021-11-23 Thread Emanuele Giuseppe Esposito
All the global state (GS) API functions will check that
qemu_in_main_thread() returns true. If not, it means
that the safety of BQL cannot be guaranteed, and
they need to be moved to I/O.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-backend.c  | 83 ++
 softmmu/qdev-monitor.c |  2 +
 2 files changed, 85 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 7a4b50a8f0..11131ca68c 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -228,6 +228,7 @@ static void blk_root_activate(BdrvChild *child, Error 
**errp)
 
 void blk_set_force_allow_inactivate(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 blk->force_allow_inactivate = true;
 }
 
@@ -346,6 +347,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, 
uint64_t shared_perm)
 {
 BlockBackend *blk;
 
+assert(qemu_in_main_thread());
+
 blk = g_new0(BlockBackend, 1);
 blk->refcnt = 1;
 blk->ctx = ctx;
@@ -383,6 +386,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, 
uint64_t perm,
 {
 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
 
+assert(qemu_in_main_thread());
+
 if (blk_insert_bs(blk, bs, errp) < 0) {
 blk_unref(blk);
 return NULL;
@@ -411,6 +416,8 @@ BlockBackend *blk_new_open(const char *filename, const char 
*reference,
 uint64_t perm = 0;
 uint64_t shared = BLK_PERM_ALL;
 
+assert(qemu_in_main_thread());
+
 /*
  * blk_new_open() is mainly used in .bdrv_create implementations and the
  * tools where sharing isn't a major concern because the BDS stays private
@@ -488,6 +495,7 @@ static void drive_info_del(DriveInfo *dinfo)
 
 int blk_get_refcnt(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return blk ? blk->refcnt : 0;
 }
 
@@ -498,6 +506,7 @@ int blk_get_refcnt(BlockBackend *blk)
 void blk_ref(BlockBackend *blk)
 {
 assert(blk->refcnt > 0);
+assert(qemu_in_main_thread());
 blk->refcnt++;
 }
 
@@ -508,6 +517,7 @@ void blk_ref(BlockBackend *blk)
  */
 void blk_unref(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 if (blk) {
 assert(blk->refcnt > 0);
 if (blk->refcnt > 1) {
@@ -528,6 +538,7 @@ void blk_unref(BlockBackend *blk)
  */
 BlockBackend *blk_all_next(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return blk ? QTAILQ_NEXT(blk, link)
: QTAILQ_FIRST(_backends);
 }
@@ -536,6 +547,8 @@ void blk_remove_all_bs(void)
 {
 BlockBackend *blk = NULL;
 
+assert(qemu_in_main_thread());
+
 while ((blk = blk_all_next(blk)) != NULL) {
 AioContext *ctx = blk_get_aio_context(blk);
 
@@ -559,6 +572,7 @@ void blk_remove_all_bs(void)
  */
 BlockBackend *blk_next(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return blk ? QTAILQ_NEXT(blk, monitor_link)
: QTAILQ_FIRST(_block_backends);
 }
@@ -625,6 +639,7 @@ static void bdrv_next_reset(BdrvNextIterator *it)
 
 BlockDriverState *bdrv_first(BdrvNextIterator *it)
 {
+assert(qemu_in_main_thread());
 bdrv_next_reset(it);
 return bdrv_next(it);
 }
@@ -662,6 +677,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, 
Error **errp)
 {
 assert(!blk->name);
 assert(name && name[0]);
+assert(qemu_in_main_thread());
 
 if (!id_wellformed(name)) {
 error_setg(errp, "Invalid device name");
@@ -689,6 +705,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, 
Error **errp)
  */
 void monitor_remove_blk(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
+
 if (!blk->name) {
 return;
 }
@@ -715,6 +733,7 @@ BlockBackend *blk_by_name(const char *name)
 {
 BlockBackend *blk = NULL;
 
+assert(qemu_in_main_thread());
 assert(name);
 while ((blk = blk_next(blk)) != NULL) {
 if (!strcmp(name, blk->name)) {
@@ -749,6 +768,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
  */
 bool bdrv_has_blk(BlockDriverState *bs)
 {
+assert(qemu_in_main_thread());
 return bdrv_first_blk(bs) != NULL;
 }
 
@@ -759,6 +779,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
 {
 BdrvChild *c;
 
+assert(qemu_in_main_thread());
 QLIST_FOREACH(c, >parents, next_parent) {
 if (c->klass != _root) {
 return false;
@@ -808,6 +829,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
  */
 BlockBackendPublic *blk_get_public(BlockBackend *blk)
 {
+assert(qemu_in_main_thread());
 return >public;
 }
 
@@ -816,6 +838,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk)
  */
 BlockBackend *blk_by_public(BlockBackendPublic *public)
 {
+assert(qemu_in_main_thread());
 return container_of(public, BlockBackend, public);
 }
 
@@ -828,6 +851,8 @@ void blk_remove_bs(BlockBackend *blk)
 BlockDriverState *bs;
 BdrvChild *root;
 
+assert(qemu_in_main_thread());
+
 notifier_list_notify(>remove_bs_notifiers, blk);
 if 

[PATCH v5 12/31] assertions for blockjob_int.h

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 blockjob.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 4bad1408cb..10c807413e 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -84,6 +84,7 @@ BlockJob *block_job_get(const char *id)
 void block_job_free(Job *job)
 {
 BlockJob *bjob = container_of(job, BlockJob, job);
+assert(qemu_in_main_thread());
 
 block_job_remove_all_bdrv(bjob);
 blk_unref(bjob->blk);
@@ -436,6 +437,8 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 BlockBackend *blk;
 BlockJob *job;
 
+assert(qemu_in_main_thread());
+
 if (job_id == NULL && !(flags & JOB_INTERNAL)) {
 job_id = bdrv_get_device_name(bs);
 }
@@ -505,6 +508,7 @@ void block_job_iostatus_reset(BlockJob *job)
 void block_job_user_resume(Job *job)
 {
 BlockJob *bjob = container_of(job, BlockJob, job);
+assert(qemu_in_main_thread());
 block_job_iostatus_reset(bjob);
 }
 
-- 
2.27.0




[PATCH v5 14/31] include/block/blockjob.h: global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
blockjob functions run always under the BQL lock.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/blockjob.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index d200f33c10..fa0c3f7a47 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -77,6 +77,13 @@ typedef struct BlockJob {
 GSList *nodes;
 } BlockJob;
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /**
  * block_job_next:
  * @job: A block job, or %NULL.
@@ -158,6 +165,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
  */
 void block_job_iostatus_reset(BlockJob *job);
 
+/* Common functions that are neither I/O nor Global State */
+
 /**
  * block_job_is_internal:
  * @job: The job to determine if it is user-visible or not.
-- 
2.27.0




[PATCH v5 16/31] include/sysemu/blockdev.h: global state API

2021-11-23 Thread Emanuele Giuseppe Esposito
blockdev functions run always under the BQL lock.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/sysemu/blockdev.h | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index c4b7b8b54e..e53eb91be6 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -13,9 +13,6 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 
-void blockdev_mark_auto_del(BlockBackend *blk);
-void blockdev_auto_del(BlockBackend *blk);
-
 typedef enum {
 IF_DEFAULT = -1,/* for use with drive_add() only */
 /*
@@ -40,6 +37,16 @@ struct DriveInfo {
 QTAILQ_ENTRY(DriveInfo) next;
 };
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
 DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
-- 
2.27.0




[PATCH v5 22/31] block_int-common.h: assertion in the callers of BlockDriver function pointers

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
 block.c| 18 ++
 block/create.c | 10 ++
 2 files changed, 28 insertions(+)

diff --git a/block.c b/block.c
index b77ab0a104..180884b8c0 100644
--- a/block.c
+++ b/block.c
@@ -526,6 +526,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque)
 
 CreateCo *cco = opaque;
 assert(cco->drv);
+assert(qemu_in_main_thread());
 
 ret = cco->drv->bdrv_co_create_opts(cco->drv,
 cco->filename, cco->opts, _err);
@@ -1090,6 +1091,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t 
hint)
 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
   QDict *old_options)
 {
+assert(qemu_in_main_thread());
 if (bs->drv && bs->drv->bdrv_join_options) {
 bs->drv->bdrv_join_options(options, old_options);
 } else {
@@ -1598,6 +1600,7 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 {
 Error *local_err = NULL;
 int i, ret;
+assert(qemu_in_main_thread());
 
 bdrv_assign_node_name(bs, node_name, _err);
 if (local_err) {
@@ -1989,6 +1992,8 @@ static int bdrv_fill_options(QDict **options, const char 
*filename,
 BlockDriver *drv = NULL;
 Error *local_err = NULL;
 
+assert(qemu_in_main_thread());
+
 /*
  * Caution: while qdict_get_try_str() is fine, getting non-string
  * types would require more care.  When @options come from
@@ -2182,6 +2187,7 @@ static void bdrv_child_perm(BlockDriverState *bs, 
BlockDriverState *child_bs,
 uint64_t *nperm, uint64_t *nshared)
 {
 assert(bs->drv && bs->drv->bdrv_child_perm);
+assert(qemu_in_main_thread());
 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
  parent_perm, parent_shared,
  nperm, nshared);
@@ -2267,6 +2273,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
 {
 BlockDriverState *bs = opaque;
 uint64_t cumulative_perms, cumulative_shared_perms;
+assert(qemu_in_main_thread());
 
 if (bs->drv->bdrv_set_perm) {
 bdrv_get_cumulative_perm(bs, _perms,
@@ -2278,6 +2285,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
 static void bdrv_drv_set_perm_abort(void *opaque)
 {
 BlockDriverState *bs = opaque;
+assert(qemu_in_main_thread());
 
 if (bs->drv->bdrv_abort_perm_update) {
 bs->drv->bdrv_abort_perm_update(bs);
@@ -2293,6 +2301,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, 
uint64_t perm,
  uint64_t shared_perm, Transaction *tran,
  Error **errp)
 {
+assert(qemu_in_main_thread());
 if (!bs->drv) {
 return 0;
 }
@@ -4357,6 +4366,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
 
 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 assert(bs_queue != NULL);
+assert(qemu_in_main_thread());
 
 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
 ctx = bdrv_get_aio_context(bs_entry->state.bs);
@@ -4622,6 +4632,7 @@ static int bdrv_reopen_prepare(BDRVReopenState 
*reopen_state,
 
 assert(reopen_state != NULL);
 assert(reopen_state->bs->drv != NULL);
+assert(qemu_in_main_thread());
 drv = reopen_state->bs->drv;
 
 /* This function and each driver's bdrv_reopen_prepare() remove
@@ -4832,6 +4843,7 @@ static void bdrv_reopen_commit(BDRVReopenState 
*reopen_state)
 bs = reopen_state->bs;
 drv = bs->drv;
 assert(drv != NULL);
+assert(qemu_in_main_thread());
 
 /* If there are any driver level actions to take */
 if (drv->bdrv_reopen_commit) {
@@ -4873,6 +4885,7 @@ static void bdrv_reopen_abort(BDRVReopenState 
*reopen_state)
 assert(reopen_state != NULL);
 drv = reopen_state->bs->drv;
 assert(drv != NULL);
+assert(qemu_in_main_thread());
 
 if (drv->bdrv_reopen_abort) {
 drv->bdrv_reopen_abort(reopen_state);
@@ -4886,6 +4899,7 @@ static void bdrv_close(BlockDriverState *bs)
 BdrvChild *child, *next;
 
 assert(!bs->refcnt);
+assert(qemu_in_main_thread());
 
 bdrv_drained_begin(bs); /* complete I/O */
 bdrv_flush(bs);
@@ -6671,6 +6685,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
 int ret;
 uint64_t cumulative_perms, cumulative_shared_perms;
 
+assert(qemu_in_main_thread());
+
 if (!bs->drv) {
 return -ENOMEDIUM;
 }
@@ -7180,6 +7196,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
 BdrvAioNotifier *baf, *baf_tmp;
 
 assert(!bs->walking_aio_notifiers);
+assert(qemu_in_main_thread());
 bs->walking_aio_notifiers = true;
 QLIST_FOREACH_SAFE(baf, >aio_notifiers, list, baf_tmp) {
 if (baf->deleted) {
@@ -7207,6 +7224,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
 AioContext *new_context)
 {
  

[PATCH v5 19/31] block/copy-before-write.h: global state API + assertions

2021-11-23 Thread Emanuele Giuseppe Esposito
copy-before-write functions always run under BQL lock.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 block/copy-before-write.h | 7 +++
 block/copy-before-write.c | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 51847e711a..9a45de2fce 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -29,6 +29,13 @@
 #include "block/block_int.h"
 #include "block/block-copy.h"
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index c30a5ff8de..36a8d7ba52 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -223,6 +223,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 QDict *opts;
 
 assert(source->total_sectors == target->total_sectors);
+assert(qemu_in_main_thread());
 
 opts = qdict_new();
 qdict_put_str(opts, "driver", "copy-before-write");
@@ -245,6 +246,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 
 void bdrv_cbw_drop(BlockDriverState *bs)
 {
+assert(qemu_in_main_thread());
 bdrv_drop_filter(bs, _abort);
 bdrv_unref(bs);
 }
-- 
2.27.0




[PATCH v5 21/31] block_int-common.h: split function pointers in BlockDriver

2021-11-23 Thread Emanuele Giuseppe Esposito
Similar to the header split, also the function pointers in BlockDriver
can be split in I/O and global state.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block_int-common.h | 440 +--
 1 file changed, 235 insertions(+), 205 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 70534f94ae..0e63dc694f 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -96,6 +96,11 @@ typedef struct BdrvTrackedRequest {
 
 
 struct BlockDriver {
+/*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards.
+ */
+
 const char *format_name;
 int instance_size;
 
@@ -121,23 +126,7 @@ struct BlockDriver {
  * on those children.
  */
 bool is_format;
-/*
- * Return true if @to_replace can be replaced by a BDS with the
- * same data as @bs without it affecting @bs's behavior (that is,
- * without it being visible to @bs's parents).
- */
-bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
-int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
-int (*bdrv_probe_device)(const char *filename);
 
-/*
- * Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation.
- */
-void (*bdrv_parse_filename)(const char *filename, QDict *options,
-Error **errp);
 /*
  * Drivers not implementing bdrv_parse_filename nor bdrv_open should have
  * this field set to true, except ones that are defined only by their
@@ -159,7 +148,66 @@ struct BlockDriver {
  */
 bool supports_backing;
 
-/* For handling image reopen for split or non-split files */
+bool has_variable_length;
+
+/*
+ * Drivers setting this field must be able to work with just a plain
+ * filename with ':' as a prefix, and no other options.
+ * Options may be extracted from the filename by implementing
+ * bdrv_parse_filename.
+ */
+const char *protocol_name;
+
+/* List of options for creating images, terminated by name == NULL */
+QemuOptsList *create_opts;
+
+/* List of options for image amend */
+QemuOptsList *amend_opts;
+
+/*
+ * If this driver supports reopening images this contains a
+ * NULL-terminated list of the runtime options that can be
+ * modified. If an option in this list is unspecified during
+ * reopen then it _must_ be reset to its default value or return
+ * an error.
+ */
+const char *const *mutable_opts;
+
+/*
+ * Pointer to a NULL-terminated array of names of strong options
+ * that can be specified for bdrv_open(). A strong option is one
+ * that changes the data of a BDS.
+ * If this pointer is NULL, the array is considered empty.
+ * "filename" and "driver" are always considered strong.
+ */
+const char *const *strong_runtime_opts;
+
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+/*
+ * Return true if @to_replace can be replaced by a BDS with the
+ * same data as @bs without it affecting @bs's behavior (that is,
+ * without it being visible to @bs's parents).
+ */
+bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+int (*bdrv_probe_device)(const char *filename);
+
+/*
+ * Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation.
+ */
+void (*bdrv_parse_filename)(const char *filename, QDict *options,
+Error **errp);
+
+/* For handling image reopen for split or non-split files. */
 int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
 void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
@@ -175,7 +223,6 @@ struct BlockDriver {
   Error **errp);
 void (*bdrv_close)(BlockDriverState *bs);
 
-
 int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
Error **errp);
 int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
@@ -183,11 +230,6 @@ struct BlockDriver {
 QemuOpts *opts,
 Error **errp);
 
-int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
-  BlockdevAmendOptions *opts,
-  bool force,
- 

[PATCH v5 25/31] block-backend-common.h: split function pointers in BlockDevOps

2021-11-23 Thread Emanuele Giuseppe Esposito
Assertions in the callers of the function pointrs are already
added by previous patches.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
---
 include/sysemu/block-backend-common.h | 28 ++-
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/include/sysemu/block-backend-common.h 
b/include/sysemu/block-backend-common.h
index 6963bbf45a..ae630c624c 100644
--- a/include/sysemu/block-backend-common.h
+++ b/include/sysemu/block-backend-common.h
@@ -27,6 +27,14 @@
 
 /* Callbacks for block device models */
 typedef struct BlockDevOps {
+
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /*
  * Runs when virtual media changed (monitor commands eject, change)
  * Argument load is true on load and false on eject.
@@ -44,16 +52,26 @@ typedef struct BlockDevOps {
  * true, even if they do not support eject requests.
  */
 void (*eject_request_cb)(void *opaque, bool force);
-/*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
-bool (*is_tray_open)(void *opaque);
+
 /*
  * Is the virtual medium locked into the device?
  * Device models implement this only when device has such a lock.
  */
 bool (*is_medium_locked)(void *opaque);
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+/*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+bool (*is_tray_open)(void *opaque);
+
 /*
  * Runs when the size changed (e.g. monitor command block_resize)
  */
-- 
2.27.0




[PATCH v5 20/31] block/coroutines: I/O API

2021-11-23 Thread Emanuele Giuseppe Esposito
block coroutines functions run in different aiocontext, and are
not protected by the BQL. Therefore are I/O.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 block/coroutines.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/coroutines.h b/block/coroutines.h
index c8c14a29c8..c61abd271a 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -29,6 +29,12 @@
 
 /* For blk_bs() in generated block/block-gen.c */
 #include "sysemu/block-backend.h"
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
 
 int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
-- 
2.27.0




[PATCH v5 23/31] block_int-common.h: split function pointers in BdrvChildClass

2021-11-23 Thread Emanuele Giuseppe Esposito
Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/block/block_int-common.h | 67 +++-
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 0e63dc694f..3ceb2365a8 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -817,12 +817,16 @@ struct BdrvChildClass {
  */
 bool parent_is_bds;
 
+/*
+ * Global state (GS) API. These functions run under the BQL lock.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
 void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
 int *child_flags, QDict *child_options,
 int parent_flags, QDict *parent_options);
-
 void (*change_media)(BdrvChild *child, bool load);
-void (*resize)(BdrvChild *child);
 
 /*
  * Returns a name that is supposedly more useful for human users than the
@@ -839,6 +843,40 @@ struct BdrvChildClass {
  */
 char *(*get_parent_desc)(BdrvChild *child);
 
+/*
+ * Notifies the parent that the child has been activated/inactivated (e.g.
+ * when migration is completing) and it can start/stop requesting
+ * permissions and doing I/O on it.
+ */
+void (*activate)(BdrvChild *child, Error **errp);
+int (*inactivate)(BdrvChild *child);
+
+void (*attach)(BdrvChild *child);
+void (*detach)(BdrvChild *child);
+
+/*
+ * Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference.
+ */
+int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+   const char *filename, Error **errp);
+
+bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
+GSList **ignore, Error **errp);
+void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+AioContext *(*get_parent_aio_context)(BdrvChild *child);
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+void (*resize)(BdrvChild *child);
+
 /*
  * If this pair of functions is implemented, the parent doesn't issue new
  * requests after returning from .drained_begin() until .drained_end() is
@@ -863,31 +901,6 @@ struct BdrvChildClass {
  * activity on the child has stopped.
  */
 bool (*drained_poll)(BdrvChild *child);
-
-/*
- * Notifies the parent that the child has been activated/inactivated (e.g.
- * when migration is completing) and it can start/stop requesting
- * permissions and doing I/O on it.
- */
-void (*activate)(BdrvChild *child, Error **errp);
-int (*inactivate)(BdrvChild *child);
-
-void (*attach)(BdrvChild *child);
-void (*detach)(BdrvChild *child);
-
-/*
- * Notifies the parent that the filename of its child has changed (e.g.
- * because the direct child was removed from the backing chain), so that it
- * can update its reference.
- */
-int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
-   const char *filename, Error **errp);
-
-bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
-GSList **ignore, Error **errp);
-void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-
-AioContext *(*get_parent_aio_context)(BdrvChild *child);
 };
 
 extern const BdrvChildClass child_of_bds;
-- 
2.27.0




[PATCH v5 28/31] block.c: assert BQL lock held in bdrv_co_invalidate_cache

2021-11-23 Thread Emanuele Giuseppe Esposito
bdrv_co_invalidate_cache is special: it is an I/O function,
but uses the block layer permission API, which is GS.

Because of this, we can assert that either the function is
being called with BQL held, and thus can use the permission API,
or make sure that the permission API is not used, by ensuring that
bs (and parents) .open_flags does not contain BDRV_O_INACTIVE.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/block.c b/block.c
index a0309f827d..805974676b 100644
--- a/block.c
+++ b/block.c
@@ -6574,6 +6574,26 @@ void bdrv_init_with_whitelist(void)
 bdrv_init();
 }
 
+static bool bdrv_is_active(BlockDriverState *bs)
+{
+BdrvChild *parent;
+
+if (bs->open_flags & BDRV_O_INACTIVE) {
+return false;
+}
+
+QLIST_FOREACH(parent, >parents, next_parent) {
+if (parent->klass->parent_is_bds) {
+BlockDriverState *parent_bs = parent->opaque;
+if (!bdrv_is_active(parent_bs)) {
+return false;
+}
+}
+}
+
+   return true;
+}
+
 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
 BdrvChild *child, *parent;
@@ -6585,6 +6605,12 @@ int coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 return -ENOMEDIUM;
 }
 
+/*
+ * No need to muck with permissions if bs is active.
+ * TODO: should activation be a separate function?
+ */
+assert(qemu_in_main_thread() || bdrv_is_active(bs));
+
 QLIST_FOREACH(child, >children, next) {
 bdrv_co_invalidate_cache(child->bs, _err);
 if (local_err) {
-- 
2.27.0




[PATCH v5 30/31] crypto: delegate permission functions to JobDriver .pre_run

2021-11-23 Thread Emanuele Giuseppe Esposito
block_crypto_amend_options_generic_luks uses the block layer
permission API, therefore it should be called with the BQL held.

However, the same function is being called ib two BlockDriver
callbacks: bdrv_amend_options (under BQL) and bdrv_co_amend (I/O).

The latter is I/O because it is invoked by block/amend.c's
blockdev_amend_run(), a .run callback of the amend JobDriver

Therefore we want to 1) change block_crypto_amend_options_generic_luks
to use the permission API only when the BQL is held, and
2) use the .pre_run JobDriver callback to check for
permissions before switching to the job aiocontext. This has also
the benefit of applying the same permission operation to all
amend implementations, not only luks.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/amend.c  | 20 
 block/crypto.c | 18 --
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/block/amend.c b/block/amend.c
index 392df9ef83..fba6add51a 100644
--- a/block/amend.c
+++ b/block/amend.c
@@ -53,10 +53,30 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error 
**errp)
 return ret;
 }
 
+static int blockdev_amend_refresh_perms(Job *job, Error **errp)
+{
+BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
+
+return bdrv_child_refresh_perms(s->bs, s->bs->file, errp);
+}
+
+static int blockdev_amend_pre_run(Job *job, Error **errp)
+{
+return blockdev_amend_refresh_perms(job, errp);
+}
+
+static void blockdev_amend_clean(Job *job)
+{
+Error *errp;
+blockdev_amend_refresh_perms(job, );
+}
+
 static const JobDriver blockdev_amend_job_driver = {
 .instance_size = sizeof(BlockdevAmendJob),
 .job_type  = JOB_TYPE_AMEND,
 .run   = blockdev_amend_run,
+.pre_run   = blockdev_amend_pre_run,
+.clean = blockdev_amend_clean,
 };
 
 void qmp_x_blockdev_amend(const char *job_id,
diff --git a/block/crypto.c b/block/crypto.c
index c8ba4681e2..82f154516c 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -780,6 +780,7 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, 
Error **errp)
 static int
 block_crypto_amend_options_generic_luks(BlockDriverState *bs,
 QCryptoBlockAmendOptions 
*amend_options,
+bool under_bql,
 bool force,
 Error **errp)
 {
@@ -791,9 +792,12 @@ block_crypto_amend_options_generic_luks(BlockDriverState 
*bs,
 
 /* apply for exclusive read/write permissions to the underlying file*/
 crypto->updating_keys = true;
-ret = bdrv_child_refresh_perms(bs, bs->file, errp);
-if (ret) {
-goto cleanup;
+
+if (under_bql) {
+ret = bdrv_child_refresh_perms(bs, bs->file, errp);
+if (ret) {
+goto cleanup;
+}
 }
 
 ret = qcrypto_block_amend_options(crypto->block,
@@ -806,7 +810,9 @@ block_crypto_amend_options_generic_luks(BlockDriverState 
*bs,
 cleanup:
 /* release exclusive read/write permissions to the underlying file*/
 crypto->updating_keys = false;
-bdrv_child_refresh_perms(bs, bs->file, errp);
+if (under_bql) {
+bdrv_child_refresh_perms(bs, bs->file, errp);
+}
 return ret;
 }
 
@@ -834,7 +840,7 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
 goto cleanup;
 }
 ret = block_crypto_amend_options_generic_luks(bs, amend_options,
-  force, errp);
+  true, force, errp);
 cleanup:
 qapi_free_QCryptoBlockAmendOptions(amend_options);
 return ret;
@@ -853,7 +859,7 @@ coroutine_fn block_crypto_co_amend_luks(BlockDriverState 
*bs,
 .u.luks = *qapi_BlockdevAmendOptionsLUKS_base(>u.luks),
 };
 return block_crypto_amend_options_generic_luks(bs, _opts,
-   force, errp);
+   false, force, errp);
 }
 
 static void
-- 
2.27.0