Re: [PATCH v3] hw/dma: Let dma_buf_read() / dma_buf_write() propagate MemTxResult

2022-01-17 Thread Philippe Mathieu-Daudé via
On 1/17/22 13:51, Philippe Mathieu-Daudé wrote:
> From: Philippe Mathieu-Daudé 
> 
> Since commit 292e13142d2, dma_buf_rw() returns a MemTxResult type.
> Do not discard it, return it to the caller. Pass the previously
> returned value (the QEMUSGList residual size, which was rarely used)
> as an optional argument.
> 
> With this new API, SCSIRequest::residual might now be accessed via
> a pointer. Since the size_t type does not have the same size on
> 32 and 64-bit host architectures, convert it to a uint64_t, which
> is big enough to hold the residual size, and the type is constant
> on both 32/64-bit hosts.
> 
> Update the few dma_buf_read() / dma_buf_write() callers to the new
> API.
> 
> Reviewed-by: Klaus Jensen 
> Signed-off-by: Philippe Mathieu-Daudé 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> v3:
> - Reworded to precise the SCSIRequest::residual type change
> - Posted out of the "Use dma_addr_t type definition when relevant"
>   series (dhildenb)
> ---
>  include/hw/scsi/scsi.h |  2 +-
>  include/sysemu/dma.h   |  4 +--
>  hw/ide/ahci.c  |  8 +++---
>  hw/nvme/ctrl.c |  4 +--
>  hw/scsi/megasas.c  | 59 ++
>  hw/scsi/scsi-bus.c |  6 +++--
>  softmmu/dma-helpers.c  | 16 +++-
>  7 files changed, 59 insertions(+), 40 deletions(-)

Queued via memory-api.



Re: [PATCH v3 00/10] hw/dma: Use dma_addr_t type definition when relevant

2022-01-17 Thread Philippe Mathieu-Daudé via
On 1/11/22 19:42, Philippe Mathieu-Daudé wrote:

> Philippe Mathieu-Daudé (10):
>   stubs: Restrict fw_cfg to system emulation
>   hw/nvram: Restrict fw_cfg QOM interface to sysemu and tools
>   hw/pci: Restrict pci-bus stub to sysemu
>   hw/pci: Document pci_dma_map()
>   hw/dma: Remove CONFIG_USER_ONLY check
>   hw/rdma/rdma_utils: Rename rdma_pci_dma_map 'len' argument
>   hw/scsi: Rename SCSIRequest::resid as 'residual'
>   hw/dma: Fix format string issues using dma_addr_t
>   hw/dma: Move ScatterGatherEntry / QEMUSGList declarations around
>   hw/dma: Use dma_addr_t type definition when relevant

Queued via memory-api.



[PATCH] hw/sd: Correct the CURRENT_STATE bits in SPI-mode response

2022-01-17 Thread frank . chang
From: Frank Chang 

In SPI-mode, type B ("cleared on valid command") clear condition is not
supported, and as the "In idle state" bit in SPI-mode has type A
("according to current state") clear condition, the CURRENT_STATE bits
in an SPI-mode response should be the SD card's state after the command
is executed, instead of the state when it received the preceding
command.

Also, we don't need to clear the type B ("clear on valid command")
status bits after the response is updated in SPI-mode.

Signed-off-by: Frank Chang 
---
 hw/sd/sd.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index cd67a7bac8..9736b8912d 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1757,12 +1757,20 @@ int sd_do_command(SDState *sd, SDRequest *req,
 if (rtype == sd_illegal) {
 sd->card_status |= ILLEGAL_COMMAND;
 } else {
-/* Valid command, we can update the 'state before command' bits.
- * (Do this now so they appear in r1 responses.)
- */
 sd->current_cmd = req->cmd;
 sd->card_status &= ~CURRENT_STATE;
-sd->card_status |= (last_state << 9);
+
+if (!sd->spi) {
+/* Valid command, we can update the 'state before command' bits.
+ * (Do this now so they appear in r1 responses.)
+ */
+sd->card_status |= (last_state << 9);
+} else {
+/* Type B ("clear on valid command") is not supported
+ * in SPI-mode.
+ */
+sd->card_status |= (sd->state << 9);
+}
 }
 
 send_response:
@@ -1808,10 +1816,12 @@ send_response:
 trace_sdcard_response(sd_response_name(rtype), rsplen);
 
 if (rtype != sd_illegal) {
-/* Clear the "clear on valid command" status bits now we've
- * sent any response
- */
-sd->card_status &= ~CARD_STATUS_B;
+if (!sd->spi) {
+/* Clear the "clear on valid command" status bits now we've
+ * sent any response
+ */
+sd->card_status &= ~CARD_STATUS_B;
+}
 }
 
 #ifdef DEBUG_SD
-- 
2.31.1




Re: [PATCH v3] hw/dma: Let dma_buf_read() / dma_buf_write() propagate MemTxResult

2022-01-17 Thread Peter Xu
On Mon, Jan 17, 2022 at 01:51:30PM +0100, Philippe Mathieu-Daudé wrote:
> From: Philippe Mathieu-Daudé 
> 
> Since commit 292e13142d2, dma_buf_rw() returns a MemTxResult type.
> Do not discard it, return it to the caller. Pass the previously
> returned value (the QEMUSGList residual size, which was rarely used)
> as an optional argument.
> 
> With this new API, SCSIRequest::residual might now be accessed via
> a pointer. Since the size_t type does not have the same size on
> 32 and 64-bit host architectures, convert it to a uint64_t, which
> is big enough to hold the residual size, and the type is constant
> on both 32/64-bit hosts.
> 
> Update the few dma_buf_read() / dma_buf_write() callers to the new
> API.
> 
> Reviewed-by: Klaus Jensen 
> Signed-off-by: Philippe Mathieu-Daudé 
> Signed-off-by: Philippe Mathieu-Daudé 

Acked-by: Peter Xu 

-- 
Peter Xu




Re: iotest 040, 041, intermittent failure in netbsd VM

2022-01-17 Thread John Snow
On Mon, Jan 17, 2022 at 3:49 PM Peter Maydell  wrote:
>
> On Mon, 17 Jan 2022 at 20:35, John Snow  wrote:
>
> > Can you please try applying this temporary patch and running `./check
> > -qcow2 040 041` until you see a breakage and show me the output from
> > that?
>
> With this temporary patch the VM doesn't launch at all:

"Works for me", but I found out why.

>
> peter.mayd...@hackbox2.linaro.org:~/qemu-netbsd$ make -C build/
> vm-build-netbsd J=8 V=1 2>&1 | tee netbsd.log
> make: Entering directory '/home/peter.maydell/qemu-netbsd/build'
> /usr/bin/python3 -B /home/peter.maydell/qemu-netbsd/meson/meson.py
> introspect --targets --tests --benchmarks | /usr/bin/python3 -B
> scripts/mtest2m
> ake.py > Makefile.mtest
> { \
>   echo 'ninja-targets = \'; \
>   /usr/bin/ninja -t targets all | sed 's/:.*//; $!s/$/ \\/'; \
>   echo 'build-files = \'; \
>   /usr/bin/ninja -t query build.ninja | sed -n '1,/^  input:/d; /^
> outputs:/q; s/$/ \\/p'; \
> } > Makefile.ninja.tmp && mv Makefile.ninja.tmp Makefile.ninja
> (GIT="git" "/home/peter.maydell/qemu-netbsd/scripts/git-submodule.sh"
> update ui/keycodemapdb meson tests/fp/berkeley-testfloat-3
> tests/fp/berkeley-softfloat-3 dtc capstone slirp)
> (GIT="git" "/home/peter.maydell/qemu-netbsd/scripts/git-submodule.sh"
> update ui/keycodemapdb meson tests/fp/berkeley-testfloat-3
> tests/fp/berkeley-softfloat-3 dtc capstone slirp)
> /usr/bin/python3 -B /home/peter.maydell/qemu-netbsd/tests/vm/netbsd
> --debug  --jobs 8 --verbose--image
> "/home/peter.maydell/.cache/qemu-vm/images/netbsd.img"  --snapshot
> --build-qemu /home/peter.maydell/qemu-netbsd --
> DEBUG:root:Creating archive
> /home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar
> for src_dir dir: /home/peter.maydell/qemu-netbsd
> DEBUG:root:QEMU args: -nodefaults -m 4G -cpu max -netdev
> user,id=vnet,hostfwd=:127.0.0.1:0-:22,ipv6=no -device
> virtio-net-pci,netdev=vnet -vnc 127.0.0.1:0,to=20 -smp 8 -enable-kvm
> -drive 
> file=/home/peter.maydell/.cache/qemu-vm/images/netbsd.img,snapshot=on,if=none,id=drive0,cache=writeback
> -device virtio-blk,drive=drive0,bootindex=0 -drive
> file=/home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar,if=none,id=data-f706c,cache=writeback,format=raw
> -device virtio-blk,drive=data-f706c,serial=data-f706c,bootindex=1
> DEBUG:asyncio:Using selector: EpollSelector
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Registering
> .
> DEBUG:qemu.machine.machine:VM launch command: 'qemu-system-x86_64
> -display none -vga none -chardev
> socket,id=mon,path=/var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-monitor.sock
> -mon chardev=mon,mode=control -machine pc -chardev
> socket,id=console,path=/var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-console.sock,server=on,wait=off
> -serial chardev:console -nodefaults -m 4G -cpu max -netdev
> user,id=vnet,hostfwd=:127.0.0.1:0-:22,ipv6=no -device
> virtio-net-pci,netdev=vnet -vnc 127.0.0.1:0,to=20 -smp 8 -enable-kvm
> -drive 
> file=/home/peter.maydell/.cache/qemu-vm/images/netbsd.img,snapshot=on,if=none,id=drive0,cache=writeback
> -device virtio-blk,drive=drive0,bootindex=0 -drive
> file=/home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar,if=none,id=data-f706c,cache=writeback,format=raw
> -device virtio-blk,drive=data-f706c,serial=data-f706c,bootindex=1'
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Transitioning from
> 'Runstate.IDLE' to 'Runstate.CONNECTING'.
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Awaiting connection
> on /var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-monitor.sock
> ...
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Connection accepted.
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Awaiting greeting ...
> DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:<-- {
>   "QMP": {
> "version": {
>   "qemu": {
> "micro": 1,
> "minor": 11,
> "major": 2
>   },
>   "package": "(Debian 1:2.11+dfsg-1ubuntu7.38)"
> },
> "capabilities": []
>   }
> }

Well, today I learned that:

(1) vm-build-XXX targets use your host system's QEMU to run that VM
(2) my QMP library cannot talk to QEMU 2.11.

That doesn't explain the intermittent netbsd failure yet, though.
(I guess this wasn't a failure point for you due to the aggressive
caching of the VM images? Unlucky.)

Here's another hotfix, this one I cannot easily test quickly (I don't
have 2.11 handy and it no longer builds for me),
but I think it'll fix the VM installation problem against older QEMU versions:

diff --git a/python/qemu/aqmp/qmp_client.py b/python/qemu/aqmp/qmp_client.py
index 8105e29fa8..6b43e1dbbe 100644
--- a/python/qemu/aqmp/qmp_client.py
+++ b/python/qemu/aqmp/qmp_client.py
@@ -292,9 +292,9 @@ async def _negotiate(self) -> None:
 """
 self.logger.debug("Negotiating capabilities ...")

-arguments: Dict[str, List[str]] = {'enable': []}
+arguments: Dict[str, List[str]] = {}
 if 

Re: iotest 040, 041, intermittent failure in netbsd VM

2022-01-17 Thread Peter Maydell
On Mon, 17 Jan 2022 at 20:35, John Snow  wrote:

> Can you please try applying this temporary patch and running `./check
> -qcow2 040 041` until you see a breakage and show me the output from
> that?

With this temporary patch the VM doesn't launch at all:

peter.mayd...@hackbox2.linaro.org:~/qemu-netbsd$ make -C build/
vm-build-netbsd J=8 V=1 2>&1 | tee netbsd.log
make: Entering directory '/home/peter.maydell/qemu-netbsd/build'
/usr/bin/python3 -B /home/peter.maydell/qemu-netbsd/meson/meson.py
introspect --targets --tests --benchmarks | /usr/bin/python3 -B
scripts/mtest2m
ake.py > Makefile.mtest
{ \
  echo 'ninja-targets = \'; \
  /usr/bin/ninja -t targets all | sed 's/:.*//; $!s/$/ \\/'; \
  echo 'build-files = \'; \
  /usr/bin/ninja -t query build.ninja | sed -n '1,/^  input:/d; /^
outputs:/q; s/$/ \\/p'; \
} > Makefile.ninja.tmp && mv Makefile.ninja.tmp Makefile.ninja
(GIT="git" "/home/peter.maydell/qemu-netbsd/scripts/git-submodule.sh"
update ui/keycodemapdb meson tests/fp/berkeley-testfloat-3
tests/fp/berkeley-softfloat-3 dtc capstone slirp)
(GIT="git" "/home/peter.maydell/qemu-netbsd/scripts/git-submodule.sh"
update ui/keycodemapdb meson tests/fp/berkeley-testfloat-3
tests/fp/berkeley-softfloat-3 dtc capstone slirp)
/usr/bin/python3 -B /home/peter.maydell/qemu-netbsd/tests/vm/netbsd
--debug  --jobs 8 --verbose--image
"/home/peter.maydell/.cache/qemu-vm/images/netbsd.img"  --snapshot
--build-qemu /home/peter.maydell/qemu-netbsd --
DEBUG:root:Creating archive
/home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar
for src_dir dir: /home/peter.maydell/qemu-netbsd
DEBUG:root:QEMU args: -nodefaults -m 4G -cpu max -netdev
user,id=vnet,hostfwd=:127.0.0.1:0-:22,ipv6=no -device
virtio-net-pci,netdev=vnet -vnc 127.0.0.1:0,to=20 -smp 8 -enable-kvm
-drive 
file=/home/peter.maydell/.cache/qemu-vm/images/netbsd.img,snapshot=on,if=none,id=drive0,cache=writeback
-device virtio-blk,drive=drive0,bootindex=0 -drive
file=/home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar,if=none,id=data-f706c,cache=writeback,format=raw
-device virtio-blk,drive=data-f706c,serial=data-f706c,bootindex=1
DEBUG:asyncio:Using selector: EpollSelector
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Registering
.
DEBUG:qemu.machine.machine:VM launch command: 'qemu-system-x86_64
-display none -vga none -chardev
socket,id=mon,path=/var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-monitor.sock
-mon chardev=mon,mode=control -machine pc -chardev
socket,id=console,path=/var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-console.sock,server=on,wait=off
-serial chardev:console -nodefaults -m 4G -cpu max -netdev
user,id=vnet,hostfwd=:127.0.0.1:0-:22,ipv6=no -device
virtio-net-pci,netdev=vnet -vnc 127.0.0.1:0,to=20 -smp 8 -enable-kvm
-drive 
file=/home/peter.maydell/.cache/qemu-vm/images/netbsd.img,snapshot=on,if=none,id=drive0,cache=writeback
-device virtio-blk,drive=drive0,bootindex=0 -drive
file=/home/peter.maydell/qemu-netbsd/build/vm-test-72ra6_8s.tmp/data-f706c.tar,if=none,id=data-f706c,cache=writeback,format=raw
-device virtio-blk,drive=data-f706c,serial=data-f706c,bootindex=1'
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Transitioning from
'Runstate.IDLE' to 'Runstate.CONNECTING'.
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Awaiting connection
on /var/tmp/qemu-machine-0m15ou19/qemu-2335-7f3b78d7f128-monitor.sock
...
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Connection accepted.
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Awaiting greeting ...
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:<-- {
  "QMP": {
"version": {
  "qemu": {
"micro": 1,
"minor": 11,
"major": 2
  },
  "package": "(Debian 1:2.11+dfsg-1ubuntu7.38)"
},
"capabilities": []
  }
}
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Negotiating capabilities ...
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:--> {
  "execute": "qmp_capabilities",
  "arguments": {
"enable": []
  }
}
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:<-- {
  "error": {
"class": "GenericError",
"desc": "Parameter 'enable' is unexpected"
  }
}
ERROR:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Negotiation failed:
AssertionError
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Negotiation failed:
  | Traceback (most recent call last):
  |   File 
"/home/peter.maydell/qemu-netbsd/tests/vm/../../python/qemu/aqmp/qmp_client.py",
line 306, in _negotiate
  | assert 'return' in reply
  | AssertionError

ERROR:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Failed to establish
session: qemu.aqmp.qmp_client.NegotiationError: Negotiation failed:
DEBUG:qemu.aqmp.qmp_client.qemu-2335-7f3b78d7f128:Failed to establish session:
  | Traceback (most recent call last):
  |   File 
"/home/peter.maydell/qemu-netbsd/tests/vm/../../python/qemu/aqmp/qmp_client.py",
line 306, in _negotiate
  | assert 'return' in reply
  | AssertionError
  |
  | The above exception was the direct cause 

Re: iotest 040, 041, intermittent failure in netbsd VM

2022-01-17 Thread Peter Maydell
On Mon, 17 Jan 2022 at 20:35, John Snow  wrote:
>
> On Mon, Jan 17, 2022 at 5:05 AM Kevin Wolf  wrote:
> >
> > Am 10.01.2022 um 16:55 hat Peter Maydell geschrieben:
> > > Just saw this failure of iotests in a netbsd VM

> This trace says that we timed out while awaiting a connection from
> QEMU during the VM launch phase. i.e. python/qemu/qmp/machine.py line
> 340:
>
> def _post_launch(self) -> None:
> if self._qmp_connection:
> self._qmp.accept(self._qmp_timer)  <-- we timed out here.
>
> (Note to self: make this traceback look more obvious as to what was
> canceled and why. I think I can improve readability here a bit ...)
>
> Sky's the limit on why QEMU never connected to the socket, but:
>
> > > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040.out
> > > fcntl(): Invalid argument
>
> That looks fairly suspicious, and I don't know which process was
> responsible for printing it (or when, relative to the other outputs).
> I assume you don't see any such output like this on a good run.

The NetBSD VM prints those fcntl messages all over the place.
I think something in the build system (make? ninja? who knows)
triggers them.

> > > -OK
> > > +FAILED (errors=1)
> > >   TEST   iotest-qcow2: 041 [fail]
> > > QEMU  --
> > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64"
> > > -nodefaults -display none -accel qtest -machine virt
> > > QEMU_IMG  --
> > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img"
> > > QEMU_IO   --
> > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io"
> > > --cache writeback --aio threads -f qcow2
> > > QEMU_NBD  --
> > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd"
> > > IMGFMT-- qcow2
> > > IMGPROTO  -- file
> > > PLATFORM  -- NetBSD/amd64 localhost 9.2
> > > TEST_DIR  -- 
> > > /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch
> > > SOCK_DIR  -- /tmp/tmpuniuicbi
> > > GDB_OPTIONS   --
> > > VALGRIND_QEMU --
> > > PRINT_QEMU_OUTPUT --
> > >
> > > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041.out
> > > +++ 041.out.bad
> > > @@ -1,5 +1,32 @@
> > > -...
> > > +..ERROR:qemu.aqmp.qmp_client.qemu-15252:Failed
> > > to establish connection: concurrent.futures._base.CancelledError
> > > +E
> > > +==
> > > +ERROR: test_small_buffer (__main__.TestSingleBlockdev)
> > > +--
> > > +Traceback (most recent call last):
> > > +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line
> > > 233, in setUp
> > > +TestSingleDrive.setUp(self)
> > > +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line
> > > 54, in setUp
> > > +self.vm.launch()
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > > line 399, in launch
> > > +self._launch()
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > > line 434, in _launch
> > > +self._post_launch()
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/qtest.py",
> > > line 147, in _post_launch
> > > +super()._post_launch()
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > > line 340, in _post_launch
> > > +self._qmp.accept(self._qmp_timer)
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> > > line 69, in accept
> > > +timeout
> > > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> > > line 42, in _sync
> > > +asyncio.wait_for(future, timeout=timeout)
> > > +  File "/usr/pkg/lib/python3.7/asyncio/base_events.py", line 587, in
> > > run_until_complete
> > > +return future.result()
> > > +  File "/usr/pkg/lib/python3.7/asyncio/tasks.py", line 449, in wait_for
> > > +raise futures.TimeoutError()
> > > +concurrent.futures._base.TimeoutError
>
> Same problem here, except I don't see any output from QEMU to blame.
> As far as the Python code knows, it just never got a connection on the
> socket, so it timed out and died.

I think the NetBSD VM does for some reason get a bit slow to do
stuff. I've never worked out why. In the past we've had to bump
up various overoptimistic timeouts to help it out.

> I do expect this to print more information on failure than it
> currently is, though (bug somewhere in machine.py, I think).
> Can you please try applying this temporary patch and running `./check
> -qcow2 040 041` until you see a breakage and show me the output from
> that?
>
> diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
> index 67ab06ca2b..ca49e6fcd2 100644
> --- 

Re: iotest 040, 041, intermittent failure in netbsd VM

2022-01-17 Thread John Snow
On Mon, Jan 17, 2022 at 5:05 AM Kevin Wolf  wrote:
>
> Am 10.01.2022 um 16:55 hat Peter Maydell geschrieben:
> > Just saw this failure of iotests in a netbsd VM (the in-tree
> > tests/vm stuff). Pretty sure it's an intermittent as the
> > pulreq being tested has nothing io or block related.
> >
> >
> >   TEST   iotest-qcow2: 036
> >   TEST   iotest-qcow2: 037
> >   TEST   iotest-qcow2: 038 [not run]
> >   TEST   iotest-qcow2: 039 [not run]
> >   TEST   iotest-qcow2: 040 [fail]
> > QEMU  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64"
> > -nodefaults -display none -accel qtest -machine
> > virt
> > QEMU_IMG  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img"
> > QEMU_IO   --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io"
> > --cache writeback --aio threads -f qcow2
> > QEMU_NBD  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd"
> > IMGFMT-- qcow2
> > IMGPROTO  -- file
> > PLATFORM  -- NetBSD/amd64 localhost 9.2
> > TEST_DIR  -- 
> > /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch
> > SOCK_DIR  -- /tmp/tmpuniuicbi
> > GDB_OPTIONS   --
> > VALGRIND_QEMU --
> > PRINT_QEMU_OUTPUT --
> >
> > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040.out
> > fcntl(): Invalid argument
> > +++ 040.out.bad
> > @@ -1,5 +1,30 @@
> > -.
> > +ERROR:qemu.aqmp.qmp_client.qemu-7648:Failed
> > to establish connection: concurrent.futures._base.CancelledError
> > +E
> > +==
> > +ERROR: test_top_is_default_active (__main__.TestSingleDrive)
> > +--
> > +Traceback (most recent call last):
> > +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040", line
> > 94, in setUp
> > +self.vm.launch()
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > line 399, in launch
> > +self._launch()
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > line 434, in _launch
> > +self._post_launch()
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/qtest.py",
> > line 147, in _post_launch
> > +super()._post_launch()
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> > line 340, in _post_launch
> > +self._qmp.accept(self._qmp_timer)
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> > line 69, in accept
> > +timeout
> > +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> > line 42, in _sync
> > +asyncio.wait_for(future, timeout=timeout)
> > +  File "/usr/pkg/lib/python3.7/asyncio/base_events.py", line 587, in
> > run_until_complete
> > +return future.result()
> > +  File "/usr/pkg/lib/python3.7/asyncio/tasks.py", line 449, in wait_for
> > +raise futures.TimeoutError()
> > +concurrent.futures._base.TimeoutError
> > +
> >  --
> >  Ran 65 tests
> >

This trace says that we timed out while awaiting a connection from
QEMU during the VM launch phase. i.e. python/qemu/qmp/machine.py line
340:

def _post_launch(self) -> None:
if self._qmp_connection:
self._qmp.accept(self._qmp_timer)  <-- we timed out here.

(Note to self: make this traceback look more obvious as to what was
canceled and why. I think I can improve readability here a bit ...)

Sky's the limit on why QEMU never connected to the socket, but:

> > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040.out
> > fcntl(): Invalid argument

That looks fairly suspicious, and I don't know which process was
responsible for printing it (or when, relative to the other outputs).
I assume you don't see any such output like this on a good run.

> > -OK
> > +FAILED (errors=1)
> >   TEST   iotest-qcow2: 041 [fail]
> > QEMU  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64"
> > -nodefaults -display none -accel qtest -machine virt
> > QEMU_IMG  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img"
> > QEMU_IO   --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io"
> > --cache writeback --aio threads -f qcow2
> > QEMU_NBD  --
> > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd"
> > IMGFMT-- qcow2
> > IMGPROTO  -- file
> > PLATFORM  -- NetBSD/amd64 localhost 9.2
> > TEST_DIR  -- 
> > /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch
> > SOCK_DIR  -- /tmp/tmpuniuicbi
> > GDB_OPTIONS   --
> > VALGRIND_QEMU --
> > PRINT_QEMU_OUTPUT --
> >
> > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041.out
> > +++ 041.out.bad
> > @@ -1,5 +1,32 @@

Re: [PATCH v3 23/31] iotests: switch to AQMP

2022-01-17 Thread John Snow
On Fri, Jan 14, 2022 at 2:13 PM Eric Blake  wrote:
>
> On Mon, Jan 10, 2022 at 06:29:02PM -0500, John Snow wrote:
> > Simply import the type defition from the new location.
>
> definition
>

ACK

> >
> > Signed-off-by: John Snow 
> > Reviewed-by: Vladimir Sementsov-Ogievskiy 
> > Reviewed-by: Beraldo Leal 
> > ---
> >  tests/qemu-iotests/iotests.py | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)




Re: [PATCH 1/2] block/io: Update BSC only if want_zero is true

2022-01-17 Thread Nir Soffer
On Mon, Jan 17, 2022 at 6:26 PM Hanna Reitz  wrote:
>
> We update the block-status cache whenever we get new information from a
> bdrv_co_block_status() call to the block driver.  However, if we have
> passed want_zero=false to that call, it may flag areas containing zeroes
> as data, and so we would update the block-status cache with wrong
> information.
>
> Therefore, we should not update the cache with want_zero=false.
>
> Reported-by: Nir Soffer 
> Fixes: 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
>("block: block-status cache for data regions")
> Signed-off-by: Hanna Reitz 
> ---
>  block/io.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/block/io.c b/block/io.c
> index bb0a254def..4e4cb556c5 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2497,8 +2497,12 @@ static int coroutine_fn 
> bdrv_co_block_status(BlockDriverState *bs,
>   * non-protocol nodes, and then it is never used.  However, 
> filling
>   * the cache requires an RCU update, so double check here to 
> avoid
>   * such an update if possible.
> + *
> + * Check want_zero, because we only want to update the cache 
> when we
> + * have accurate information about what is zero and what is data.
>   */
> -if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
> +if (want_zero &&
> +ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
>  QLIST_EMPTY(>children))
>  {
>  /*
> --
> 2.33.1
>

ovirt-imageio tests pass with this change.
Thanks for the quick fix!

Reviewed-by: Nir Soffer 




Re: [PATCH 2/2] iotests/block-status-cache: New test

2022-01-17 Thread Nir Soffer
On Mon, Jan 17, 2022 at 6:26 PM Hanna Reitz  wrote:
>
> Add a new test to verify that want_zero=false block-status calls do not
> pollute the block-status cache for want_zero=true calls.
>
> We check want_zero=true calls and their results using `qemu-img map`
> (over NBD), and want_zero=false calls also using `qemu-img map` over
> NBD, but using the qemu:allocation-depth context.
>
> (This test case cannot be integrated into nbd-qemu-allocation, because
> that is a qcow2 test, and this is a raw test.)
>
> Signed-off-by: Hanna Reitz 
> ---
>  tests/qemu-iotests/tests/block-status-cache   | 130 ++
>  .../qemu-iotests/tests/block-status-cache.out |   5 +
>  2 files changed, 135 insertions(+)
>  create mode 100755 tests/qemu-iotests/tests/block-status-cache
>  create mode 100644 tests/qemu-iotests/tests/block-status-cache.out
>
> diff --git a/tests/qemu-iotests/tests/block-status-cache 
> b/tests/qemu-iotests/tests/block-status-cache
> new file mode 100755
> index 00..1f2c3109d3
> --- /dev/null
> +++ b/tests/qemu-iotests/tests/block-status-cache
> @@ -0,0 +1,130 @@
> +#!/usr/bin/env python3
> +# group: rw quick
> +#
> +# Test cases for the block-status cache.
> +#
> +# Copyright (C) 2022 Red Hat, Inc.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +import os
> +import signal
> +import iotests
> +from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd
> +
> +
> +image_size = 1 * 1024 * 1024
> +test_img = os.path.join(iotests.test_dir, 'test.img')
> +
> +nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid')
> +nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
> +
> +
> +class TestBscWithNbd(iotests.QMPTestCase):
> +def setUp(self) -> None:
> +"""Just create an empty image with a read-only NBD server on it"""
> +assert qemu_img_create('-f', iotests.imgfmt, test_img,
> +   str(image_size)) == 0
> +
> +assert qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, '-t', '-A', 
> '-r',
> +f'--pid-file={nbd_pidfile}', test_img) == 0

This is a good place to comment that -A (or better --allocation-depth)
is required for this test.

> +
> +def tearDown(self) -> None:
> +with open(nbd_pidfile, encoding='utf-8') as f:
> +pid = int(f.read())
> +os.kill(pid, signal.SIGTERM)
> +os.remove(nbd_pidfile)
> +os.remove(test_img)
> +
> +def test_with_zero_bug(self) -> None:
> +"""
> +Verify that the block-status cache is not corrupted by a
> +want_zero=false call.
> +We can provoke a want_zero=false call with `qemu-img map` over NBD 
> with
> +x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map`

x-dirty-bitmap=qemu:allocation-depth looks a bit dirty to me but I guess
we don't have a better way without depending on another nbd client.

If depending on libnbd is ok, this test can be much simpler:

$ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
 040960  data
  4096  10737377283  hole,zero
$ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
 0  10737418241  local
$ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
 0  10737418241  local
$ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
 0  10737418240  data

> +(which results in want_zero=true), then using said
> +qemu:allocation-depth context, and finally another normal `map` to
> +verify that the cache has not been corrupted.
> +"""
> +
> +nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}'
> +nbd_img_opts_alloc_depth = nbd_img_opts + \
> +',x-dirty-bitmap=qemu:allocation-depth'
> +
> +# Normal map, results in want_zero=true.
> +# This will probably detect an allocated data sector first (qemu 
> likes
> +# to allocate the first sector to facilitate alignment probing), and
> +# then the rest to be zero.  The BSC will thus contain (if anything)
> +# one range covering the first sector.
> +map_pre = qemu_img_pipe('map', '--output=json', '--image-opts',
> +nbd_img_opts)
> +
> +# qemu:allocation-depth maps for want_zero=false.
> +# 

Re: [PATCH 2/4] build: make check-block a meson test

2022-01-17 Thread Paolo Bonzini

On 1/17/22 11:09, Thomas Huth wrote:


+  qemu_iotests_formats = {
+    'raw': 'quick',
+    'qcow2': 'slow',
+    'qed': 'thorough',
+    'vmdk': 'thorough',
+    'vpc': 'thorough'
+  }


I think the default behavior for "quick" should be to test with qcow2 - 
most iotests require that format and that's what we're currently using 
with "make check-block" by default... i.e. please swap raw and qcow2 here.


You're right, also the tests should be disable on non-POSIX systems.

Paolo



Re: [PATCH 0/2] block/io: Update BSC only if want_zero is true

2022-01-17 Thread Hanna Reitz

Forgot to CC qemu-stable.

On 17.01.22 17:26, Hanna Reitz wrote:

Hi,

As reported by Nir
(https://lists.nongnu.org/archive/html/qemu-block/2022-01/msg00292.html)
there’s a problem with the block-status cache, namely that it is updated
when want_zero is false, but we return the result later even when the
caller now passes want_zero=true.  In the worst case, the
want_zero=false call may have resulted in the cache containing an entry
describing the whole image to contain data, and then all future requests
will be served from that cache entry.

There are a couple ways this could be fixed (e.g. one cache per
want_zero mode, or storing want_zero in the cache and comparing it when
the cached data is fetched), but I think the simplest way is to only
store want_zero=true block-status results in the cache.  (This way, the
cache will not work with want_zero=false, but the want_zero=true case is
the one for which we introduced the cache in the first place.  I think
want_zero=false generally is fast enough(tm), that’s why we introduced
want_zero after all.)

Patch 1 is the fix, patch 2 a regression test.


Hanna Reitz (2):
   block/io: Update BSC only if want_zero is true
   iotests/block-status-cache: New test

  block/io.c|   6 +-
  tests/qemu-iotests/tests/block-status-cache   | 130 ++
  .../qemu-iotests/tests/block-status-cache.out |   5 +
  3 files changed, 140 insertions(+), 1 deletion(-)
  create mode 100755 tests/qemu-iotests/tests/block-status-cache
  create mode 100644 tests/qemu-iotests/tests/block-status-cache.out






[PATCH 0/2] block/io: Update BSC only if want_zero is true

2022-01-17 Thread Hanna Reitz
Hi,

As reported by Nir
(https://lists.nongnu.org/archive/html/qemu-block/2022-01/msg00292.html)
there’s a problem with the block-status cache, namely that it is updated
when want_zero is false, but we return the result later even when the
caller now passes want_zero=true.  In the worst case, the
want_zero=false call may have resulted in the cache containing an entry
describing the whole image to contain data, and then all future requests
will be served from that cache entry.

There are a couple ways this could be fixed (e.g. one cache per
want_zero mode, or storing want_zero in the cache and comparing it when
the cached data is fetched), but I think the simplest way is to only
store want_zero=true block-status results in the cache.  (This way, the
cache will not work with want_zero=false, but the want_zero=true case is
the one for which we introduced the cache in the first place.  I think
want_zero=false generally is fast enough(tm), that’s why we introduced
want_zero after all.)

Patch 1 is the fix, patch 2 a regression test.


Hanna Reitz (2):
  block/io: Update BSC only if want_zero is true
  iotests/block-status-cache: New test

 block/io.c|   6 +-
 tests/qemu-iotests/tests/block-status-cache   | 130 ++
 .../qemu-iotests/tests/block-status-cache.out |   5 +
 3 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100755 tests/qemu-iotests/tests/block-status-cache
 create mode 100644 tests/qemu-iotests/tests/block-status-cache.out

-- 
2.33.1




[PATCH 2/2] iotests/block-status-cache: New test

2022-01-17 Thread Hanna Reitz
Add a new test to verify that want_zero=false block-status calls do not
pollute the block-status cache for want_zero=true calls.

We check want_zero=true calls and their results using `qemu-img map`
(over NBD), and want_zero=false calls also using `qemu-img map` over
NBD, but using the qemu:allocation-depth context.

(This test case cannot be integrated into nbd-qemu-allocation, because
that is a qcow2 test, and this is a raw test.)

Signed-off-by: Hanna Reitz 
---
 tests/qemu-iotests/tests/block-status-cache   | 130 ++
 .../qemu-iotests/tests/block-status-cache.out |   5 +
 2 files changed, 135 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/block-status-cache
 create mode 100644 tests/qemu-iotests/tests/block-status-cache.out

diff --git a/tests/qemu-iotests/tests/block-status-cache 
b/tests/qemu-iotests/tests/block-status-cache
new file mode 100755
index 00..1f2c3109d3
--- /dev/null
+++ b/tests/qemu-iotests/tests/block-status-cache
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# group: rw quick
+#
+# Test cases for the block-status cache.
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import signal
+import iotests
+from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd
+
+
+image_size = 1 * 1024 * 1024
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid')
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+
+
+class TestBscWithNbd(iotests.QMPTestCase):
+def setUp(self) -> None:
+"""Just create an empty image with a read-only NBD server on it"""
+assert qemu_img_create('-f', iotests.imgfmt, test_img,
+   str(image_size)) == 0
+
+assert qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, '-t', '-A', '-r',
+f'--pid-file={nbd_pidfile}', test_img) == 0
+
+def tearDown(self) -> None:
+with open(nbd_pidfile, encoding='utf-8') as f:
+pid = int(f.read())
+os.kill(pid, signal.SIGTERM)
+os.remove(nbd_pidfile)
+os.remove(test_img)
+
+def test_with_zero_bug(self) -> None:
+"""
+Verify that the block-status cache is not corrupted by a
+want_zero=false call.
+We can provoke a want_zero=false call with `qemu-img map` over NBD with
+x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map`
+(which results in want_zero=true), then using said
+qemu:allocation-depth context, and finally another normal `map` to
+verify that the cache has not been corrupted.
+"""
+
+nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}'
+nbd_img_opts_alloc_depth = nbd_img_opts + \
+',x-dirty-bitmap=qemu:allocation-depth'
+
+# Normal map, results in want_zero=true.
+# This will probably detect an allocated data sector first (qemu likes
+# to allocate the first sector to facilitate alignment probing), and
+# then the rest to be zero.  The BSC will thus contain (if anything)
+# one range covering the first sector.
+map_pre = qemu_img_pipe('map', '--output=json', '--image-opts',
+nbd_img_opts)
+
+# qemu:allocation-depth maps for want_zero=false.
+# want_zero=false should (with the file driver, which the server is
+# using) report everything as data.  While this is sufficient for
+# want_zero=false, this is nothing that should end up in the
+# block-status cache.
+# Due to a bug, this information did end up in the cache, though, and
+# this would lead to wrong information being returned on subsequent
+# want_zero=true calls.
+#
+# We need to run this map twice: On the first call, we probably still
+# have the first sector in the cache, and so this will be served from
+# the cache; and only the subsequent range will be queried from the
+# block driver.  This subsequent range will then be entered into the
+# cache.
+# If we did a want_zero=true call at this point, we would thus get
+# correct information: The first sector is not covered by the cache, so
+# we would get fresh block-status information from the driver, which
+# would 

[PATCH 1/2] block/io: Update BSC only if want_zero is true

2022-01-17 Thread Hanna Reitz
We update the block-status cache whenever we get new information from a
bdrv_co_block_status() call to the block driver.  However, if we have
passed want_zero=false to that call, it may flag areas containing zeroes
as data, and so we would update the block-status cache with wrong
information.

Therefore, we should not update the cache with want_zero=false.

Reported-by: Nir Soffer 
Fixes: 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
   ("block: block-status cache for data regions")
Signed-off-by: Hanna Reitz 
---
 block/io.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index bb0a254def..4e4cb556c5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2497,8 +2497,12 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
  * non-protocol nodes, and then it is never used.  However, filling
  * the cache requires an RCU update, so double check here to avoid
  * such an update if possible.
+ *
+ * Check want_zero, because we only want to update the cache when 
we
+ * have accurate information about what is zero and what is data.
  */
-if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+if (want_zero &&
+ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
 QLIST_EMPTY(>children))
 {
 /*
-- 
2.33.1




Re: [PATCH V2 for-6.2 2/2] block/rbd: workaround for ceph issue #53784

2022-01-17 Thread Stefano Garzarella

On Thu, Jan 13, 2022 at 03:44:26PM +0100, Peter Lieven wrote:

librbd had a bug until early 2022 that affected all versions of ceph that
supported fast-diff. This bug results in reporting of incorrect offsets
if the offset parameter to rbd_diff_iterate2 is not object aligned.

This patch works around this bug for pre Quincy versions of librbd.



I'm not sure, but maybe we could add the fixes tag also to this patch, 
since without this workaround we can have issues with buggy librbd.


Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b


Cc: qemu-sta...@nongnu.org
Signed-off-by: Peter Lieven 
---
block/rbd.c | 42 --
1 file changed, 40 insertions(+), 2 deletions(-)


Tested-by: Stefano Garzarella 




diff --git a/block/rbd.c b/block/rbd.c
index 20bb896c4a..d174d51659 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1320,6 +1320,7 @@ static int coroutine_fn 
qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
+uint64_t head = 0;

assert(offset + bytes <= s->image_size);

@@ -1347,7 +1348,43 @@ static int coroutine_fn 
qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}

-r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
+/*
+ * librbd had a bug until early 2022 that affected all versions of ceph 
that
+ * supported fast-diff. This bug results in reporting of incorrect offsets
+ * if the offset parameter to rbd_diff_iterate2 is not object aligned.
+ * Work around this bug by rounding down the offset to object boundaries.
+ * This is OK because we call rbd_diff_iterate2 with whole_object = true.
+ * However, this workaround only works for non cloned images with default
+ * striping.
+ *
+ * See: https://tracker.ceph.com/issues/53784
+ */
+
+/*  check if RBD image has non-default striping enabled */
+if (features & RBD_FEATURE_STRIPINGV2) {
+return status;
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+/*
+ * check if RBD image is a clone (= has a parent).
+ *
+ * rbd_get_parent_info is deprecated from Nautilus onwards, but the
+ * replacement rbd_get_parent is not present in Luminous and Mimic.
+ */
+if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
+return status;
+}
+#pragma GCC diagnostic pop
+
+head = req.offs & (s->object_size - 1);
+req.offs -= head;
+bytes += head;
+#endif
+
+r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
  qemu_rbd_diff_iterate_cb, );
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1366,7 +1403,8 @@ static int coroutine_fn 
qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}

-*pnum = req.bytes;
+assert(req.bytes > head);
+*pnum = req.bytes - head;
return status;
}

--
2.25.1








Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Hanna Reitz

On 17.01.22 12:38, Kevin Wolf wrote:

Am 17.01.2022 um 10:55 hat Hanna Reitz geschrieben:

On 17.01.22 10:52, Kevin Wolf wrote:

Am 17.01.2022 um 09:46 hat Hanna Reitz geschrieben:

On 16.01.22 19:09, Nir Soffer wrote:

On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:

Some of our tests started to fail since qemu-img 6.2.0 released.

Here is an example failure:

$ truncate -s1g /data/scratch/empty-1g.raw

$ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw /data/scratch/empty-1g.raw &

git bisect points to this commit:

$ git bisect good
0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
Author: Hanna Reitz 
Date:   Thu Aug 12 10:41:44 2021 +0200

   block: block-status cache for data regions

The commit message says:

   (Note that only caching data regions but not zero regions means that
   returning false information from the cache is not catastrophic: Treating
   zeroes as data is fine.  While we try to invalidate the cache on zero
   writes and discards, such incongruences may still occur when there are
   other processes writing to the image.)

I don't think it is fine to report zero as data. This can cause severe
performance
issues when users copy unneeded zero extents over the network, instead of
doing no work with zero bandwidth.

You’re right, it was meant as a contrast to whether this might lead to
catastrophic data failure.

But it was also meant as a risk estimation.  There wasn’t supposed to be a
situation where the cache would report zeroes as data (other than with
external writers), and so that’s why I thought it’d be fine.  But, well,
things are supposed to be one way, and then you (me) write buggy code...

Thanks for the reproducer steps, I can reproduce the bug with your script
(not with nbdinfo fwiw) and the problem seems to be this:

diff --git a/block/io.c b/block/io.c
index bb0a254def..83e94faeaf 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2498,7 +2498,8 @@ static int coroutine_fn
bdrv_co_block_status(BlockDriverState *bs,
    * the cache requires an RCU update, so double check here to
avoid
    * such an update if possible.
    */
-    if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+    if (want_zero &&
+    ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
   QLIST_EMPTY(>children))
   {
   /*

We should update the cache only when we have accurate zero information, so
only if want_zero was true.

Either that, or store the want_zero value as well and compare it before
using the cache.

Depends on whether we need the optimisation for want_zero=false cases,
too. I think this mostly means bdrv_is_allocated(_above)(), which seems
to be relevant for the commit and stream jobs and 'qemu-img rebase' at
least and some less commonly used stuff. There are some other cases of
is_allocated (like in mirror) where it doesn't make a difference because
we always process the maximum number of bytes with the first call.

I think we need the cache only for want_zero=true cases, because allocation
information is handled by the format layer, which doesn’t use the cache at
all.  Also, the reason for the cache was to skip slow SEEK_HOLE/DATA calls
on the protocol level, which is a want_zero=true case.

Hm, good point. But we would still call the protocol driver with
want_zero=false for raw images.

Obviously, this removes the commit case because it calls is_allocated
only on the overlay and that can't be raw. However, streaming (or more
specifically the copy-on-read driver) does seem to include the base
image in its bdrv_is_allocated_above(), which may be raw.

So it looks to me as if the optimisation were still relevant when you're
streaming from a raw base image into a qcow2 overlay.


I don’t think it usually is, because AFAIR want_zero was introduced to 
speed up these cases where we only want allocation information, and the 
driver should deliver zero information only when it comes at no 
additional cost.


For most protocol drivers, there is no other information to query, so 
“at no additional cost” is the same as “at no cost”, which seems 
impossible and so I’d assume that most drivers just return everything as 
allocated with want_zero=false (file-posix does this), and so a cache 
won’t gain us anything.  If drivers happen to be able to perform a 
miracle and really provide the information at no cost, then we also 
don’t need a cache.


NBD however is a protocol driver that does have other information to 
query, namely allocation information, and zero information falls out of 
there as a side effect.  So there caching would help, I suppose. But 
then again we didn’t really add the cache to be able to skip some NBD 
requests...


Conceptually, I think want_zero and the BSC are two solutions to the 
same problem.  We have introduced the BSC because we couldn’t make all 
relevant calls pass 

Re: [PATCH v3] hw/dma: Let dma_buf_read() / dma_buf_write() propagate MemTxResult

2022-01-17 Thread Philippe Mathieu-Daudé via
On 1/17/22 13:51, Philippe Mathieu-Daudé wrote:
> From: Philippe Mathieu-Daudé 
> 
> Since commit 292e13142d2, dma_buf_rw() returns a MemTxResult type.
> Do not discard it, return it to the caller. Pass the previously
> returned value (the QEMUSGList residual size, which was rarely used)
> as an optional argument.
> 
> With this new API, SCSIRequest::residual might now be accessed via
> a pointer. Since the size_t type does not have the same size on
> 32 and 64-bit host architectures, convert it to a uint64_t, which
> is big enough to hold the residual size, and the type is constant
> on both 32/64-bit hosts.
> 
> Update the few dma_buf_read() / dma_buf_write() callers to the new
> API.
> 
> Reviewed-by: Klaus Jensen 
> Signed-off-by: Philippe Mathieu-Daudé 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> v3:
> - Reworded to precise the SCSIRequest::residual type change
> - Posted out of the "Use dma_addr_t type definition when relevant"
>   series (dhildenb)

Based-on: <2022084309.28637-1-f4...@amsat.org>



[PATCH v3] hw/dma: Let dma_buf_read() / dma_buf_write() propagate MemTxResult

2022-01-17 Thread Philippe Mathieu-Daudé via
From: Philippe Mathieu-Daudé 

Since commit 292e13142d2, dma_buf_rw() returns a MemTxResult type.
Do not discard it, return it to the caller. Pass the previously
returned value (the QEMUSGList residual size, which was rarely used)
as an optional argument.

With this new API, SCSIRequest::residual might now be accessed via
a pointer. Since the size_t type does not have the same size on
32 and 64-bit host architectures, convert it to a uint64_t, which
is big enough to hold the residual size, and the type is constant
on both 32/64-bit hosts.

Update the few dma_buf_read() / dma_buf_write() callers to the new
API.

Reviewed-by: Klaus Jensen 
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Philippe Mathieu-Daudé 
---
v3:
- Reworded to precise the SCSIRequest::residual type change
- Posted out of the "Use dma_addr_t type definition when relevant"
  series (dhildenb)
---
 include/hw/scsi/scsi.h |  2 +-
 include/sysemu/dma.h   |  4 +--
 hw/ide/ahci.c  |  8 +++---
 hw/nvme/ctrl.c |  4 +--
 hw/scsi/megasas.c  | 59 ++
 hw/scsi/scsi-bus.c |  6 +++--
 softmmu/dma-helpers.c  | 16 +++-
 7 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index b27d133b113..1ffb367f94f 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -30,7 +30,7 @@ struct SCSIRequest {
 int16_t   status;
 int16_t   host_status;
 void  *hba_private;
-size_tresidual;
+uint64_t  residual;
 SCSICommand   cmd;
 NotifierList  cancel_notifiers;
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 36039c5e687..a1ac5bc1b54 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -301,9 +301,9 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
   QEMUSGList *sg, uint64_t offset, uint32_t align,
   BlockCompletionFunc *cb, void *opaque);
-dma_addr_t dma_buf_read(void *ptr, dma_addr_t len,
+MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
  QEMUSGList *sg, MemTxAttrs attrs);
-dma_addr_t dma_buf_write(void *ptr, dma_addr_t len,
+MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
   QEMUSGList *sg, MemTxAttrs attrs);
 
 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 6c727dd0c08..7ce001cacdb 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1384,9 +1384,9 @@ static void ahci_pio_transfer(const IDEDMA *dma)
 const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
 
 if (is_write) {
-dma_buf_write(s->data_ptr, size, >sg, attrs);
+dma_buf_write(s->data_ptr, size, NULL, >sg, attrs);
 } else {
-dma_buf_read(s->data_ptr, size, >sg, attrs);
+dma_buf_read(s->data_ptr, size, NULL, >sg, attrs);
 }
 }
 
@@ -1479,9 +1479,9 @@ static int ahci_dma_rw_buf(const IDEDMA *dma, bool 
is_write)
 }
 
 if (is_write) {
-dma_buf_read(p, l, >sg, MEMTXATTRS_UNSPECIFIED);
+dma_buf_read(p, l, NULL, >sg, MEMTXATTRS_UNSPECIFIED);
 } else {
-dma_buf_write(p, l, >sg, MEMTXATTRS_UNSPECIFIED);
+dma_buf_write(p, l, NULL, >sg, MEMTXATTRS_UNSPECIFIED);
 }
 
 /* free sglist, update byte count */
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index c3c49176110..1f62116af98 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1150,9 +1150,9 @@ static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t 
*ptr, uint32_t len,
 dma_addr_t residual;
 
 if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
-residual = dma_buf_write(ptr, len, >qsg, attrs);
+dma_buf_write(ptr, len, , >qsg, attrs);
 } else {
-residual = dma_buf_read(ptr, len, >qsg, attrs);
+dma_buf_read(ptr, len, , >qsg, attrs);
 }
 
 if (unlikely(residual)) {
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 6c1ae6b980f..de613c8b355 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -750,6 +750,7 @@ static int megasas_ctrl_get_info(MegasasState *s, 
MegasasCmd *cmd)
 size_t dcmd_size = sizeof(info);
 BusChild *kid;
 int num_pd_disks = 0;
+dma_addr_t residual;
 
 memset(, 0x0, dcmd_size);
 if (cmd->iov_size < dcmd_size) {
@@ -860,7 +861,9 @@ static int megasas_ctrl_get_info(MegasasState *s, 
MegasasCmd *cmd)
MFI_INFO_PDMIX_SATA |
MFI_INFO_PDMIX_LD);
 
-cmd->iov_size -= dma_buf_read(, dcmd_size, >qsg, 
MEMTXATTRS_UNSPECIFIED);
+dma_buf_read(, dcmd_size, , >qsg,
+ MEMTXATTRS_UNSPECIFIED);
+cmd->iov_size -= residual;
 return MFI_STAT_OK;
 }
 
@@ -868,6 +871,7 @@ static int 

Re: [PATCH v3 08/19] block/reqlist: add reqlist_wait_all()

2022-01-17 Thread Nikta Lapshin


On 12/22/21 20:40, Vladimir Sementsov-Ogievskiy wrote:


Add function to wait for all intersecting requests.
To be used in the further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy
---
  include/block/reqlist.h | 8 
  block/reqlist.c | 8 
  2 files changed, 16 insertions(+)

diff --git a/include/block/reqlist.h b/include/block/reqlist.h
index b904d80216..4695623bb3 100644
--- a/include/block/reqlist.h
+++ b/include/block/reqlist.h
@@ -53,6 +53,14 @@ BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t 
offset,
  bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
 int64_t bytes, CoMutex *lock);
  
+/*

+ * Wait for all intersecting requests. It just calls reqlist_wait_one() in a
+ * loops, caller is responsible to stop producing new requests in this region
+ * in parallel, otherwise reqlist_wait_all() may never return.
+ */
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+   int64_t bytes, CoMutex *lock);
+
  /*
   * Shrink request and wake all waiting coroutines (may be some of them are not
   * intersecting with shrunk request).
diff --git a/block/reqlist.c b/block/reqlist.c
index 5e320ba649..52a362a1d8 100644
--- a/block/reqlist.c
+++ b/block/reqlist.c
@@ -57,6 +57,14 @@ bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, 
int64_t offset,
  return true;
  }
  
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,

+   int64_t bytes, CoMutex *lock)
+{
+while (reqlist_wait_one(reqs, offset, bytes, lock)) {
+/* continue */
+}
+}
+
  void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes)
  {
  if (new_bytes == req->bytes) {



Reviewed-by: Nikita Lapshin


Re: [PATCH v3 07/19] block/dirty-bitmap: introduce bdrv_dirty_bitmap_status()

2022-01-17 Thread Vladimir Sementsov-Ogievskiy

17.01.2022 13:06, Nikta Lapshin wrote:

On 12/22/21 20:40, Vladimir Sementsov-Ogievskiy wrote:


Add a convenient function similar with bdrv_block_status() to get
status of dirty bitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy
---
  include/block/dirty-bitmap.h |  2 ++
  include/qemu/hbitmap.h   | 11 +++
  block/dirty-bitmap.c |  6 ++
  util/hbitmap.c   | 36 
  4 files changed, 55 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index f95d350b70..2ae7dc3d1d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -115,6 +115,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap 
*bitmap, int64_t offset,
  bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
  int64_t start, int64_t end, int64_t max_dirty_count,
  int64_t *dirty_start, int64_t *dirty_count);
+void bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+  int64_t bytes, bool *is_dirty, int64_t *count);
  BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
Error **errp);
  
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h

index 5e71b6d6f7..845fda12db 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -340,6 +340,17 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t 
start, int64_t end,
   int64_t max_dirty_count,
   int64_t *dirty_start, int64_t *dirty_count);
  
+/*

+ * bdrv_dirty_bitmap_status:
+ * @hb: The HBitmap to operate on
+ * @start: the offset to start from
+ * @end: end of requested area
+ * @is_dirty: is bitmap dirty at @offset
+ * @pnum: how many bits has same value starting from @offset
+ */
+void hbitmap_status(const HBitmap *hb, int64_t offset, int64_t bytes,
+bool *is_dirty, int64_t *pnum);
+


I think description should be changed, there is no start and no end
arguments in function.


  /**
   * hbitmap_iter_next:
   * @hbi: HBitmapIter to operate on.
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 94a0276833..e4a836749a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -875,6 +875,12 @@ bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap 
*bitmap,
 dirty_start, dirty_count);
  }
  
+void bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,

+  int64_t bytes, bool *is_dirty, int64_t *count)
+{
+hbitmap_status(bitmap->bitmap, offset, bytes, is_dirty, count);
+}
+
  /**
   * bdrv_merge_dirty_bitmap: merge src into dest.
   * Ensures permissions on bitmaps are reasonable; use for public API.
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 305b894a63..ae8d0eb4d2 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -301,6 +301,42 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t 
start, int64_t end,
  return true;
  }
  
+void hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,

+bool *is_dirty, int64_t *pnum)
+{
+int64_t next_dirty, next_zero;
+
+assert(start >= 0);
+assert(count > 0);
+assert(start + count <= hb->orig_size);
+
+next_dirty = hbitmap_next_dirty(hb, start, count);
+if (next_dirty == -1) {
+*pnum = count;
+*is_dirty = false;
+return;
+}
+
+if (next_dirty > start) {
+*pnum = next_dirty - start;
+*is_dirty = false;
+return;
+}
+
+assert(next_dirty == start);
+
+next_zero = hbitmap_next_zero(hb, start, count);
+if (next_zero == -1) {
+*pnum = count;
+*is_dirty = true;
+return;
+}
+
+assert(next_zero > start);
+*pnum = next_zero - start;
+*is_dirty = false;
+}
+


This function finds if this bitmap is dirty and also counts first bits.


Not exactly.

The idea was to have one function, that works like block_status:
it return status of bit at offset and count how many bits are of the same 
status after it.


I don't think that this is a problem, but may be it should be divided?


No, I need it as one function, for further commits.




  bool hbitmap_empty(const HBitmap *hb)
  {
  return hb->count == 0;


With corrected description
Reviewed-by: Nikita Lapshin



thanks!

--
Best regards,
Vladimir



Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Kevin Wolf
Am 17.01.2022 um 10:55 hat Hanna Reitz geschrieben:
> On 17.01.22 10:52, Kevin Wolf wrote:
> > Am 17.01.2022 um 09:46 hat Hanna Reitz geschrieben:
> > > On 16.01.22 19:09, Nir Soffer wrote:
> > > > On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:
> > > > > Some of our tests started to fail since qemu-img 6.2.0 released.
> > > > > 
> > > > > Here is an example failure:
> > > > > 
> > > > > $ truncate -s1g /data/scratch/empty-1g.raw
> > > > > 
> > > > > $ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw 
> > > > > /data/scratch/empty-1g.raw &
> > > > git bisect points to this commit:
> > > > 
> > > > $ git bisect good
> > > > 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
> > > > commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
> > > > Author: Hanna Reitz 
> > > > Date:   Thu Aug 12 10:41:44 2021 +0200
> > > > 
> > > >   block: block-status cache for data regions
> > > > 
> > > > The commit message says:
> > > > 
> > > >   (Note that only caching data regions but not zero regions means 
> > > > that
> > > >   returning false information from the cache is not catastrophic: 
> > > > Treating
> > > >   zeroes as data is fine.  While we try to invalidate the cache on 
> > > > zero
> > > >   writes and discards, such incongruences may still occur when 
> > > > there are
> > > >   other processes writing to the image.)
> > > > 
> > > > I don't think it is fine to report zero as data. This can cause severe
> > > > performance
> > > > issues when users copy unneeded zero extents over the network, instead 
> > > > of
> > > > doing no work with zero bandwidth.
> > > You’re right, it was meant as a contrast to whether this might lead to
> > > catastrophic data failure.
> > > 
> > > But it was also meant as a risk estimation.  There wasn’t supposed to be a
> > > situation where the cache would report zeroes as data (other than with
> > > external writers), and so that’s why I thought it’d be fine.  But, well,
> > > things are supposed to be one way, and then you (me) write buggy code...
> > > 
> > > Thanks for the reproducer steps, I can reproduce the bug with your script
> > > (not with nbdinfo fwiw) and the problem seems to be this:
> > > 
> > > diff --git a/block/io.c b/block/io.c
> > > index bb0a254def..83e94faeaf 100644
> > > --- a/block/io.c
> > > +++ b/block/io.c
> > > @@ -2498,7 +2498,8 @@ static int coroutine_fn
> > > bdrv_co_block_status(BlockDriverState *bs,
> > >    * the cache requires an RCU update, so double check here to
> > > avoid
> > >    * such an update if possible.
> > >    */
> > > -    if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
> > > +    if (want_zero &&
> > > +    ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
> > >   QLIST_EMPTY(>children))
> > >   {
> > >   /*
> > > 
> > > We should update the cache only when we have accurate zero information, so
> > > only if want_zero was true.
> > Either that, or store the want_zero value as well and compare it before
> > using the cache.
> > 
> > Depends on whether we need the optimisation for want_zero=false cases,
> > too. I think this mostly means bdrv_is_allocated(_above)(), which seems
> > to be relevant for the commit and stream jobs and 'qemu-img rebase' at
> > least and some less commonly used stuff. There are some other cases of
> > is_allocated (like in mirror) where it doesn't make a difference because
> > we always process the maximum number of bytes with the first call.
> 
> I think we need the cache only for want_zero=true cases, because allocation
> information is handled by the format layer, which doesn’t use the cache at
> all.  Also, the reason for the cache was to skip slow SEEK_HOLE/DATA calls
> on the protocol level, which is a want_zero=true case.

Hm, good point. But we would still call the protocol driver with
want_zero=false for raw images.

Obviously, this removes the commit case because it calls is_allocated
only on the overlay and that can't be raw. However, streaming (or more
specifically the copy-on-read driver) does seem to include the base
image in its bdrv_is_allocated_above(), which may be raw.

So it looks to me as if the optimisation were still relevant when you're
streaming from a raw base image into a qcow2 overlay.

Kevin




Re: [PATCH v3 04/19] block/copy-before-write: add bitmap open parameter

2022-01-17 Thread Vladimir Sementsov-Ogievskiy

14.01.2022 20:47, Hanna Reitz wrote:

On 22.12.21 18:40, Vladimir Sementsov-Ogievskiy wrote:

This brings "incremental" mode to copy-before-write filter: user can
specify bitmap so that filter will copy only "dirty" areas.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json  | 10 +-
  block/copy-before-write.c | 30 +-
  2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1d3dd9cb48..6904daeacf 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4167,11 +4167,19 @@
  #
  # @target: The target for copy-before-write operations.
  #
+# @bitmap: If specified, copy-before-write filter will do
+#  copy-before-write operations only for dirty regions of the
+#  bitmap. Bitmap size must be equal to length of file and
+#  target child of the filter. Note also, that bitmap is used
+#  only to initialize internal bitmap of the process, so further
+#  modifications (or removing) of specified bitmap doesn't
+#  influence the filter.
+#
  # Since: 6.2
  ##
  { 'struct': 'BlockdevOptionsCbw',
    'base': 'BlockdevOptionsGenericFormat',
-  'data': { 'target': 'BlockdevRef' } }
+  'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } }
  ##
  # @BlockdevOptions:
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 799223e3fb..4cd90d22df 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -149,6 +149,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  Error **errp)
  {
  BDRVCopyBeforeWriteState *s = bs->opaque;
+    BdrvDirtyBitmap *bitmap = NULL;
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -163,6 +164,33 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  return -EINVAL;
  }
+    if (qdict_haskey(options, "bitmap.node") ||
+    qdict_haskey(options, "bitmap.name"))
+    {
+    const char *bitmap_node, *bitmap_name;
+
+    if (!qdict_haskey(options, "bitmap.node")) {
+    error_setg(errp, "bitmap.node is not specified");
+    return -EINVAL;
+    }
+
+    if (!qdict_haskey(options, "bitmap.name")) {
+    error_setg(errp, "bitmap.name is not specified");
+    return -EINVAL;
+    }
+
+    bitmap_node = qdict_get_str(options, "bitmap.node");
+    bitmap_name = qdict_get_str(options, "bitmap.name");
+    qdict_del(options, "bitmap.node");
+    qdict_del(options, "bitmap.name");


I’m not really a fan of this manual parsing, but I can see nothing technically 
wrong with it.

Still, what do you think of using an input visitor, like:

QDict *bitmap_qdict;

qdict_extract_subqdict(options, _qdict, "bitmap.");
if (qdict_size(bitmap_qdict) > 0) {
     BlockDirtyBitmap *bmp_param;
     Visitor *v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp);
     visit_type_BlockDirtyBitmap(v, NULL, _param, errp);
     visit_free(v);
     qobject_unref(bitmap_qdict);

     bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, ...);
     qapi_free_BlockDirtyBitmap(bmp_param);
}

(+ error handling, which is why perhaps the first block should be put into a 
separate function cbw_get_bitmap_param() to simplify error handling)



Will try. Hmm. At some point we should start to generate _marshal_ wrappers and 
 handle _open() realizations like we do we qmp commands..


+
+    bitmap = block_dirty_bitmap_lookup(bitmap_node, bitmap_name, NULL,
+   errp);
+    if (!bitmap) {
+    return -EINVAL;
+    }
+    }
+
  bs->total_sectors = bs->file->bs->total_sectors;
  bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
  (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
@@ -170,7 +198,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
   bs->file->bs->supported_zero_flags);
-    s->bcs = block_copy_state_new(bs->file, s->target, NULL, errp);
+    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
  if (!s->bcs) {
  error_prepend(errp, "Cannot create block-copy-state: ");
  return -EINVAL;





--
Best regards,
Vladimir



Re: [PATCH 2/4] build: make check-block a meson test

2022-01-17 Thread Thomas Huth

On 15/01/2022 23.21, Paolo Bonzini wrote:

"meson test" can be asked to run tests verbosely; this makes it usable
also for qemu-iotests's own harness, and it lets "make check-block"
reuse mtest2make.py's infrastructure to find and build test dependencies.

Adjust check-block.sh to use the standard exit code that reports a test
as skipped.  Alternatively, in the future we could make it produce TAP
output, which is consistent with all other "make check" tests.

Signed-off-by: Paolo Bonzini 
---
  meson.build|  6 +++---
  scripts/mtest2make.py  | 10 +-
  tests/Makefile.include | 16 +++-
  tests/check-block.sh   | 28 +---
  tests/meson.build  |  1 +
  tests/qemu-iotests/meson.build | 29 +
  6 files changed, 58 insertions(+), 32 deletions(-)
  create mode 100644 tests/qemu-iotests/meson.build

[...]

diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build
new file mode 100644
index 00..94f161d59c
--- /dev/null
+++ b/tests/qemu-iotests/meson.build
@@ -0,0 +1,29 @@
+if have_tools
+  qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd]
+  qemu_iotests_env = {'PYTHON': python.full_path()}
+  qemu_iotests_formats = {
+'raw': 'quick',
+'qcow2': 'slow',
+'qed': 'thorough',
+'vmdk': 'thorough',
+'vpc': 'thorough'
+  }


I think the default behavior for "quick" should be to test with qcow2 - most 
iotests require that format and that's what we're currently using with "make 
check-block" by default... i.e. please swap raw and qcow2 here.


 Thomas




Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Nir Soffer
On Mon, Jan 17, 2022 at 10:46 AM Hanna Reitz  wrote:
>
> On 16.01.22 19:09, Nir Soffer wrote:
> > On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:
> >> Some of our tests started to fail since qemu-img 6.2.0 released.
> >>
> >> Here is an example failure:
> >>
> >> $ truncate -s1g /data/scratch/empty-1g.raw
> >>
> >> $ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw /data/scratch/empty-1g.raw 
> >> &
> > git bisect points to this commit:
> >
> > $ git bisect good
> > 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
> > commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
> > Author: Hanna Reitz 
> > Date:   Thu Aug 12 10:41:44 2021 +0200
> >
> >  block: block-status cache for data regions
> >
> > The commit message says:
> >
> >  (Note that only caching data regions but not zero regions means that
> >  returning false information from the cache is not catastrophic: 
> > Treating
> >  zeroes as data is fine.  While we try to invalidate the cache on zero
> >  writes and discards, such incongruences may still occur when there are
> >  other processes writing to the image.)
> >
> > I don't think it is fine to report zero as data. This can cause severe
> > performance
> > issues when users copy unneeded zero extents over the network, instead of
> > doing no work with zero bandwidth.
>
> You’re right, it was meant as a contrast to whether this might lead to
> catastrophic data failure.
>
> But it was also meant as a risk estimation.  There wasn’t supposed to be
> a situation where the cache would report zeroes as data (other than with
> external writers), and so that’s why I thought it’d be fine.  But, well,
> things are supposed to be one way, and then you (me) write buggy code...
>
> Thanks for the reproducer steps, I can reproduce the bug with your
> script (not with nbdinfo fwiw) and the problem seems to be this:
>
> diff --git a/block/io.c b/block/io.c
> index bb0a254def..83e94faeaf 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2498,7 +2498,8 @@ static int coroutine_fn
> bdrv_co_block_status(BlockDriverState *bs,
>* the cache requires an RCU update, so double check here
> to avoid
>* such an update if possible.
>*/
> -if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
> +if (want_zero &&
> +ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
>   QLIST_EMPTY(>children))
>   {
>   /*
>
> We should update the cache only when we have accurate zero information,
> so only if want_zero was true.

Indeed if I disable --allocation-depth in qemu-nbd, this issue cannot
be reproduced.

$ cat reproduce.py
import logging
import subprocess
import time

from ovirt_imageio._internal import nbd

logging.basicConfig(level=logging.DEBUG)

qemu_nbd = subprocess.Popen([
"./qemu-nbd",
"--read-only",
"--persistent",
"--shared=8",
##"--allocation-depth",
"--socket=/tmp/nbd.sock",
"--format=raw",
"/data/scratch/empty-1g.raw"
])

time.sleep(0.2)

with nbd.Client(nbd.UnixAddress("/tmp/nbd.sock")) as c1:
first = c1.extents(0, 128*1024**2)
second = c1.extents(0, 128*1024**2)

qemu_nbd.kill()
qemu_nbd.wait()

assert first == second, f"Expected {first}, got {second}"


So this is a result of getting allocation depth after getting base allocation:

2153 static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
2154  uint64_t bytes, NBDExtentArray *ea)
2155 {
2156 while (bytes) {
2157 int64_t num;
2158 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
2159   );

We can mitigate this in oVirt by disabling --allocation-depth for raw images,
since it does not provide any value in this case.

> > Do we have a way to disable this cache, at least for nbd?
>
> No, unfortunately not.  We could implement it, but I think the
> management layer would need to specify it for all protocol nodes where
> it wants the cache to be disabled.
>
> Auto-detecting would be difficult for qemu, because it would basically
> mean detecting whether there are any exports in the block graph
> somewhere above the node in question (not even immediately above it,
> because for example in your example there’s a raw node between the
> export and the protocol node whose block-status information we’re caching).
>
> I assume you’re now very skeptical of the cache, so even if we implement
> a fix for this problem, you’ll probably still want that option to
> disable it, right?

It would be safer if we could consume optimizations like this gradually, so in
case of issues like this, the admin can disable the cache in the
field. But this is
a high cost in testing - you need to test both with and without the cache and
this does not scale.

Nir




Re: [PATCH v3 07/19] block/dirty-bitmap: introduce bdrv_dirty_bitmap_status()

2022-01-17 Thread Nikta Lapshin

On 12/22/21 20:40, Vladimir Sementsov-Ogievskiy wrote:


Add a convenient function similar with bdrv_block_status() to get
status of dirty bitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy
---
  include/block/dirty-bitmap.h |  2 ++
  include/qemu/hbitmap.h   | 11 +++
  block/dirty-bitmap.c |  6 ++
  util/hbitmap.c   | 36 
  4 files changed, 55 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index f95d350b70..2ae7dc3d1d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -115,6 +115,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap 
*bitmap, int64_t offset,
  bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
  int64_t start, int64_t end, int64_t max_dirty_count,
  int64_t *dirty_start, int64_t *dirty_count);
+void bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+  int64_t bytes, bool *is_dirty, int64_t *count);
  BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
Error **errp);
  
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h

index 5e71b6d6f7..845fda12db 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -340,6 +340,17 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t 
start, int64_t end,
   int64_t max_dirty_count,
   int64_t *dirty_start, int64_t *dirty_count);
  
+/*

+ * bdrv_dirty_bitmap_status:
+ * @hb: The HBitmap to operate on
+ * @start: the offset to start from
+ * @end: end of requested area
+ * @is_dirty: is bitmap dirty at @offset
+ * @pnum: how many bits has same value starting from @offset
+ */
+void hbitmap_status(const HBitmap *hb, int64_t offset, int64_t bytes,
+bool *is_dirty, int64_t *pnum);
+


I think description should be changed, there is no start and no end
arguments in function.


  /**
   * hbitmap_iter_next:
   * @hbi: HBitmapIter to operate on.
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 94a0276833..e4a836749a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -875,6 +875,12 @@ bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap 
*bitmap,
 dirty_start, dirty_count);
  }
  
+void bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,

+  int64_t bytes, bool *is_dirty, int64_t *count)
+{
+hbitmap_status(bitmap->bitmap, offset, bytes, is_dirty, count);
+}
+
  /**
   * bdrv_merge_dirty_bitmap: merge src into dest.
   * Ensures permissions on bitmaps are reasonable; use for public API.
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 305b894a63..ae8d0eb4d2 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -301,6 +301,42 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t 
start, int64_t end,
  return true;
  }
  
+void hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,

+bool *is_dirty, int64_t *pnum)
+{
+int64_t next_dirty, next_zero;
+
+assert(start >= 0);
+assert(count > 0);
+assert(start + count <= hb->orig_size);
+
+next_dirty = hbitmap_next_dirty(hb, start, count);
+if (next_dirty == -1) {
+*pnum = count;
+*is_dirty = false;
+return;
+}
+
+if (next_dirty > start) {
+*pnum = next_dirty - start;
+*is_dirty = false;
+return;
+}
+
+assert(next_dirty == start);
+
+next_zero = hbitmap_next_zero(hb, start, count);
+if (next_zero == -1) {
+*pnum = count;
+*is_dirty = true;
+return;
+}
+
+assert(next_zero > start);
+*pnum = next_zero - start;
+*is_dirty = false;
+}
+


This function finds if this bitmap is dirty and also counts first bits.
I don't think that this is a problem, but may be it should be divided?


  bool hbitmap_empty(const HBitmap *hb)
  {
  return hb->count == 0;


With corrected description
Reviewed-by: Nikita Lapshin


Re: iotest 040, 041, intermittent failure in netbsd VM

2022-01-17 Thread Kevin Wolf
Am 10.01.2022 um 16:55 hat Peter Maydell geschrieben:
> Just saw this failure of iotests in a netbsd VM (the in-tree
> tests/vm stuff). Pretty sure it's an intermittent as the
> pulreq being tested has nothing io or block related.
> 
> 
>   TEST   iotest-qcow2: 036
>   TEST   iotest-qcow2: 037
>   TEST   iotest-qcow2: 038 [not run]
>   TEST   iotest-qcow2: 039 [not run]
>   TEST   iotest-qcow2: 040 [fail]
> QEMU  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64"
> -nodefaults -display none -accel qtest -machine
> virt
> QEMU_IMG  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img"
> QEMU_IO   --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io"
> --cache writeback --aio threads -f qcow2
> QEMU_NBD  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd"
> IMGFMT-- qcow2
> IMGPROTO  -- file
> PLATFORM  -- NetBSD/amd64 localhost 9.2
> TEST_DIR  -- /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch
> SOCK_DIR  -- /tmp/tmpuniuicbi
> GDB_OPTIONS   --
> VALGRIND_QEMU --
> PRINT_QEMU_OUTPUT --
> 
> --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040.out
> fcntl(): Invalid argument
> +++ 040.out.bad
> @@ -1,5 +1,30 @@
> -.
> +ERROR:qemu.aqmp.qmp_client.qemu-7648:Failed
> to establish connection: concurrent.futures._base.CancelledError
> +E
> +==
> +ERROR: test_top_is_default_active (__main__.TestSingleDrive)
> +--
> +Traceback (most recent call last):
> +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040", line
> 94, in setUp
> +self.vm.launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> line 399, in launch
> +self._launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> line 434, in _launch
> +self._post_launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/qtest.py",
> line 147, in _post_launch
> +super()._post_launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> line 340, in _post_launch
> +self._qmp.accept(self._qmp_timer)
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> line 69, in accept
> +timeout
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py",
> line 42, in _sync
> +asyncio.wait_for(future, timeout=timeout)
> +  File "/usr/pkg/lib/python3.7/asyncio/base_events.py", line 587, in
> run_until_complete
> +return future.result()
> +  File "/usr/pkg/lib/python3.7/asyncio/tasks.py", line 449, in wait_for
> +raise futures.TimeoutError()
> +concurrent.futures._base.TimeoutError
> +
>  --
>  Ran 65 tests
> 
> -OK
> +FAILED (errors=1)
>   TEST   iotest-qcow2: 041 [fail]
> QEMU  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64"
> -nodefaults -display none -accel qtest -machine virt
> QEMU_IMG  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img"
> QEMU_IO   --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io"
> --cache writeback --aio threads -f qcow2
> QEMU_NBD  --
> "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd"
> IMGFMT-- qcow2
> IMGPROTO  -- file
> PLATFORM  -- NetBSD/amd64 localhost 9.2
> TEST_DIR  -- /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch
> SOCK_DIR  -- /tmp/tmpuniuicbi
> GDB_OPTIONS   --
> VALGRIND_QEMU --
> PRINT_QEMU_OUTPUT --
> 
> --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041.out
> +++ 041.out.bad
> @@ -1,5 +1,32 @@
> -...
> +..ERROR:qemu.aqmp.qmp_client.qemu-15252:Failed
> to establish connection: concurrent.futures._base.CancelledError
> +E
> +==
> +ERROR: test_small_buffer (__main__.TestSingleBlockdev)
> +--
> +Traceback (most recent call last):
> +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line
> 233, in setUp
> +TestSingleDrive.setUp(self)
> +  File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line
> 54, in setUp
> +self.vm.launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> line 399, in launch
> +self._launch()
> +  File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py",
> line 434, in _launch
> +  

Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Hanna Reitz

On 17.01.22 10:52, Kevin Wolf wrote:

Am 17.01.2022 um 09:46 hat Hanna Reitz geschrieben:

On 16.01.22 19:09, Nir Soffer wrote:

On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:

Some of our tests started to fail since qemu-img 6.2.0 released.

Here is an example failure:

$ truncate -s1g /data/scratch/empty-1g.raw

$ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw /data/scratch/empty-1g.raw &

git bisect points to this commit:

$ git bisect good
0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
Author: Hanna Reitz 
Date:   Thu Aug 12 10:41:44 2021 +0200

  block: block-status cache for data regions

The commit message says:

  (Note that only caching data regions but not zero regions means that
  returning false information from the cache is not catastrophic: Treating
  zeroes as data is fine.  While we try to invalidate the cache on zero
  writes and discards, such incongruences may still occur when there are
  other processes writing to the image.)

I don't think it is fine to report zero as data. This can cause severe
performance
issues when users copy unneeded zero extents over the network, instead of
doing no work with zero bandwidth.

You’re right, it was meant as a contrast to whether this might lead to
catastrophic data failure.

But it was also meant as a risk estimation.  There wasn’t supposed to be a
situation where the cache would report zeroes as data (other than with
external writers), and so that’s why I thought it’d be fine.  But, well,
things are supposed to be one way, and then you (me) write buggy code...

Thanks for the reproducer steps, I can reproduce the bug with your script
(not with nbdinfo fwiw) and the problem seems to be this:

diff --git a/block/io.c b/block/io.c
index bb0a254def..83e94faeaf 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2498,7 +2498,8 @@ static int coroutine_fn
bdrv_co_block_status(BlockDriverState *bs,
   * the cache requires an RCU update, so double check here to
avoid
   * such an update if possible.
   */
-    if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+    if (want_zero &&
+    ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
  QLIST_EMPTY(>children))
  {
  /*

We should update the cache only when we have accurate zero information, so
only if want_zero was true.

Either that, or store the want_zero value as well and compare it before
using the cache.

Depends on whether we need the optimisation for want_zero=false cases,
too. I think this mostly means bdrv_is_allocated(_above)(), which seems
to be relevant for the commit and stream jobs and 'qemu-img rebase' at
least and some less commonly used stuff. There are some other cases of
is_allocated (like in mirror) where it doesn't make a difference because
we always process the maximum number of bytes with the first call.


I think we need the cache only for want_zero=true cases, because 
allocation information is handled by the format layer, which doesn’t use 
the cache at all.  Also, the reason for the cache was to skip slow 
SEEK_HOLE/DATA calls on the protocol level, which is a want_zero=true case.



Do we have a way to disable this cache, at least for nbd?

No, unfortunately not.  We could implement it, but I think the management
layer would need to specify it for all protocol nodes where it wants the
cache to be disabled.

Auto-detecting would be difficult for qemu, because it would basically mean
detecting whether there are any exports in the block graph somewhere above
the node in question (not even immediately above it, because for example in
your example there’s a raw node between the export and the protocol node
whose block-status information we’re caching).

I assume you’re now very skeptical of the cache, so even if we implement a
fix for this problem, you’ll probably still want that option to disable it,
right?

I don't think we can realistically provide an option for every
individual optimisation where there could be a bug. We just need to fix
the problem and make sure to return correct data when there is no
external writer.

Kevin






Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Kevin Wolf
Am 17.01.2022 um 09:46 hat Hanna Reitz geschrieben:
> On 16.01.22 19:09, Nir Soffer wrote:
> > On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:
> > > Some of our tests started to fail since qemu-img 6.2.0 released.
> > > 
> > > Here is an example failure:
> > > 
> > > $ truncate -s1g /data/scratch/empty-1g.raw
> > > 
> > > $ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw 
> > > /data/scratch/empty-1g.raw &
> > git bisect points to this commit:
> > 
> > $ git bisect good
> > 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
> > commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
> > Author: Hanna Reitz 
> > Date:   Thu Aug 12 10:41:44 2021 +0200
> > 
> >  block: block-status cache for data regions
> > 
> > The commit message says:
> > 
> >  (Note that only caching data regions but not zero regions means that
> >  returning false information from the cache is not catastrophic: 
> > Treating
> >  zeroes as data is fine.  While we try to invalidate the cache on zero
> >  writes and discards, such incongruences may still occur when there are
> >  other processes writing to the image.)
> > 
> > I don't think it is fine to report zero as data. This can cause severe
> > performance
> > issues when users copy unneeded zero extents over the network, instead of
> > doing no work with zero bandwidth.
> 
> You’re right, it was meant as a contrast to whether this might lead to
> catastrophic data failure.
> 
> But it was also meant as a risk estimation.  There wasn’t supposed to be a
> situation where the cache would report zeroes as data (other than with
> external writers), and so that’s why I thought it’d be fine.  But, well,
> things are supposed to be one way, and then you (me) write buggy code...
> 
> Thanks for the reproducer steps, I can reproduce the bug with your script
> (not with nbdinfo fwiw) and the problem seems to be this:
> 
> diff --git a/block/io.c b/block/io.c
> index bb0a254def..83e94faeaf 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2498,7 +2498,8 @@ static int coroutine_fn
> bdrv_co_block_status(BlockDriverState *bs,
>   * the cache requires an RCU update, so double check here to
> avoid
>   * such an update if possible.
>   */
> -    if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
> +    if (want_zero &&
> +    ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
>  QLIST_EMPTY(>children))
>  {
>  /*
> 
> We should update the cache only when we have accurate zero information, so
> only if want_zero was true.

Either that, or store the want_zero value as well and compare it before
using the cache.

Depends on whether we need the optimisation for want_zero=false cases,
too. I think this mostly means bdrv_is_allocated(_above)(), which seems
to be relevant for the commit and stream jobs and 'qemu-img rebase' at
least and some less commonly used stuff. There are some other cases of
is_allocated (like in mirror) where it doesn't make a difference because
we always process the maximum number of bytes with the first call.

> > Do we have a way to disable this cache, at least for nbd?
> 
> No, unfortunately not.  We could implement it, but I think the management
> layer would need to specify it for all protocol nodes where it wants the
> cache to be disabled.
> 
> Auto-detecting would be difficult for qemu, because it would basically mean
> detecting whether there are any exports in the block graph somewhere above
> the node in question (not even immediately above it, because for example in
> your example there’s a raw node between the export and the protocol node
> whose block-status information we’re caching).
> 
> I assume you’re now very skeptical of the cache, so even if we implement a
> fix for this problem, you’ll probably still want that option to disable it,
> right?

I don't think we can realistically provide an option for every
individual optimisation where there could be a bug. We just need to fix
the problem and make sure to return correct data when there is no
external writer.

Kevin




Re: [PATCH v2 2/4] scripts/qapi/commands: gen_commands(): add add_trace_points argument

2022-01-17 Thread Kevin Wolf
Am 17.01.2022 um 09:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 12.01.2022 13:50, Stefan Hajnoczi wrote:
> > On Tue, Jan 11, 2022 at 07:32:52PM -0500, John Snow wrote:
> > > On Tue, Jan 11, 2022 at 6:53 PM John Snow  wrote:
> > > > 
> > > > On Thu, Dec 23, 2021 at 6:08 AM Vladimir Sementsov-Ogievskiy
> > > >  wrote:
> > > > > 
> > > > > Add possibility to generate trace points for each qmp command.
> > > > > 
> > > > > We should generate both trace points and trace-events file, for 
> > > > > further
> > > > > trace point code generation.
> > > > > 
> > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > > > > ---
> > > > >   scripts/qapi/commands.py | 84 
> > > > > ++--
> > > > >   1 file changed, 73 insertions(+), 11 deletions(-)
> > > > > 
> > > > > diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
> > > > > index 21001bbd6b..9691c11f96 100644
> > > > > --- a/scripts/qapi/commands.py
> > > > > +++ b/scripts/qapi/commands.py
> > > > > @@ -53,7 +53,8 @@ def gen_command_decl(name: str,
> > > > >   def gen_call(name: str,
> > > > >arg_type: Optional[QAPISchemaObjectType],
> > > > >boxed: bool,
> > > > > - ret_type: Optional[QAPISchemaType]) -> str:
> > > > > + ret_type: Optional[QAPISchemaType],
> > > > > + add_trace_points: bool) -> str:
> > > > >   ret = ''
> > > > > 
> > > > >   argstr = ''
> > > > > @@ -71,21 +72,65 @@ def gen_call(name: str,
> > > > >   if ret_type:
> > > > >   lhs = 'retval = '
> > > > > 
> > > > > -ret = mcgen('''
> > > > > +qmp_name = f'qmp_{c_name(name)}'
> > > > > +upper = qmp_name.upper()
> > > > > +
> > > > > +if add_trace_points:
> > > > > +ret += mcgen('''
> > > > > +
> > > > > +if (trace_event_get_state_backends(TRACE_%(upper)s)) {
> > > > > +g_autoptr(GString) req_json = qobject_to_json(QOBJECT(args));
> > > > > +trace_%(qmp_name)s("", req_json->str);
> > > > > +}
> > > > > +''',
> > > > > + upper=upper, qmp_name=qmp_name)
> > > > > +
> > > > > +ret += mcgen('''
> > > > > 
> > > > >   %(lhs)sqmp_%(c_name)s(%(args)s);
> > > > > -error_propagate(errp, err);
> > > > >   ''',
> > > > >   c_name=c_name(name), args=argstr, lhs=lhs)
> > > > > -if ret_type:
> > > > > -ret += mcgen('''
> > > > > +
> > > > > +ret += mcgen('''
> > > > >   if (err) {
> > > > > +''')
> > > > > +
> > > > > +if add_trace_points:
> > > > > +ret += mcgen('''
> > > > > +trace_%(qmp_name)s("FAIL: ", error_get_pretty(err));
> > > > > +''',
> > > > > + qmp_name=qmp_name)
> > > > > +
> > > > > +ret += mcgen('''
> > > > > +error_propagate(errp, err);
> > > > >   goto out;
> > > > >   }
> > > > > +''')
> > > > > +
> > > > > +if ret_type:
> > > > > +ret += mcgen('''
> > > > > 
> > > > >   qmp_marshal_output_%(c_name)s(retval, ret, errp);
> > > > >   ''',
> > > > >c_name=ret_type.c_name())
> > > > > +
> > > > > +if add_trace_points:
> > > > > +if ret_type:
> > > > > +ret += mcgen('''
> > > > > +
> > > > > +if (trace_event_get_state_backends(TRACE_%(upper)s)) {
> > > > > +g_autoptr(GString) ret_json = qobject_to_json(*ret);
> > > > > +trace_%(qmp_name)s("RET:", ret_json->str);
> > > > > +}
> > > > > +''',
> > > > > + upper=upper, qmp_name=qmp_name)
> > > > > +else:
> > > > > +ret += mcgen('''
> > > > > +
> > > > > +trace_%(qmp_name)s("SUCCESS", "");
> > > > > +''',
> > > > > + qmp_name=qmp_name)
> > > > > +
> > > > >   return ret
> > > > > 
> > > > > 
> > > > > @@ -122,10 +167,14 @@ def gen_marshal_decl(name: str) -> str:
> > > > >proto=build_marshal_proto(name))
> > > > > 
> > > > > 
> > > > > +def gen_trace(name: str) -> str:
> > > > > +return f'qmp_{c_name(name)}(const char *tag, const char *json) 
> > > > > "%s%s"\n'
> > > > > +
> > > > >   def gen_marshal(name: str,
> > > > >   arg_type: Optional[QAPISchemaObjectType],
> > > > >   boxed: bool,
> > > > > -ret_type: Optional[QAPISchemaType]) -> str:
> > > > > +ret_type: Optional[QAPISchemaType],
> > > > > +add_trace_points: bool) -> str:
> > > > >   have_args = boxed or (arg_type and not arg_type.is_empty())
> > > > >   if have_args:
> > > > >   assert arg_type is not None
> > > > > @@ -180,7 +229,7 @@ def gen_marshal(name: str,
> > > > >   }
> > > > >   ''')
> > > > > 
> > > > > -ret += gen_call(name, arg_type, boxed, ret_type)
> > > > > +ret += gen_call(name, arg_type, boxed, ret_type, 
> > > > > add_trace_points)
> > > > > 
> > > > >   ret += mcgen('''
> > > > > 
> > > > > @@ -238,11 +287,12 @@ def gen_register_command(name: str,
> > > 

Re: [PATCH v2 3/4] scripts/qapi-gen.py: add --add-trace-points option

2022-01-17 Thread Vladimir Sementsov-Ogievskiy

12.01.2022 03:38, John Snow wrote:

On Thu, Dec 23, 2021 at 6:08 AM Vladimir Sementsov-Ogievskiy
 wrote:


Add and option to generate trace points. We should generate both trace
points and trace-events files for further trace point code generation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Philippe Mathieu-Daudé 
---
  scripts/qapi/gen.py  | 13 ++---
  scripts/qapi/main.py | 10 +++---
  2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
index 995a97d2b8..605b3fe68a 100644
--- a/scripts/qapi/gen.py
+++ b/scripts/qapi/gen.py
@@ -251,7 +251,7 @@ def __init__(self,
  self._builtin_blurb = builtin_blurb
  self._pydoc = pydoc
  self._current_module: Optional[str] = None
-self._module: Dict[str, Tuple[QAPIGenC, QAPIGenH]] = {}
+self._module: Dict[str, Tuple[QAPIGenC, QAPIGenH, QAPIGen]] = {}
  self._main_module: Optional[str] = None

  @property
@@ -264,6 +264,11 @@ def _genh(self) -> QAPIGenH:
  assert self._current_module is not None
  return self._module[self._current_module][1]

+@property
+def _gent(self) -> QAPIGen:
+assert self._current_module is not None
+return self._module[self._current_module][2]
+
  @staticmethod
  def _module_dirname(name: str) -> str:
  if QAPISchemaModule.is_user_module(name):
@@ -293,7 +298,8 @@ def _add_module(self, name: str, blurb: str) -> None:
  basename = self._module_filename(self._what, name)
  genc = QAPIGenC(basename + '.c', blurb, self._pydoc)
  genh = QAPIGenH(basename + '.h', blurb, self._pydoc)
-self._module[name] = (genc, genh)
+gent = QAPIGen(basename + '.trace-events')
+self._module[name] = (genc, genh, gent)
  self._current_module = name

  @contextmanager
@@ -304,11 +310,12 @@ def _temp_module(self, name: str) -> Iterator[None]:
  self._current_module = old_module

  def write(self, output_dir: str, opt_builtins: bool = False) -> None:
-for name, (genc, genh) in self._module.items():
+for name, (genc, genh, gent) in self._module.items():
  if QAPISchemaModule.is_builtin_module(name) and not opt_builtins:
  continue
  genc.write(output_dir)
  genh.write(output_dir)
+gent.write(output_dir)

  def _begin_builtin_module(self) -> None:
  pass
diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py
index f2ea6e0ce4..3adf0319cf 100644
--- a/scripts/qapi/main.py
+++ b/scripts/qapi/main.py
@@ -32,7 +32,8 @@ def generate(schema_file: str,
   output_dir: str,
   prefix: str,
   unmask: bool = False,
- builtins: bool = False) -> None:
+ builtins: bool = False,
+ add_trace_points: bool = False) -> None:
  """
  Generate C code for the given schema into the target directory.

@@ -49,7 +50,7 @@ def generate(schema_file: str,
  schema = QAPISchema(schema_file)
  gen_types(schema, output_dir, prefix, builtins)
  gen_visit(schema, output_dir, prefix, builtins)
-gen_commands(schema, output_dir, prefix)
+gen_commands(schema, output_dir, prefix, add_trace_points)
  gen_events(schema, output_dir, prefix)
  gen_introspect(schema, output_dir, prefix, unmask)

@@ -74,6 +75,8 @@ def main() -> int:
  parser.add_argument('-u', '--unmask-non-abi-names', action='store_true',
  dest='unmask',
  help="expose non-ABI names in introspection")
+parser.add_argument('--add-trace-points', action='store_true',
+help="add trace points to qmp marshals")


"Add trace events to generated marshaling functions." maybe?


  parser.add_argument('schema', action='store')
  args = parser.parse_args()

@@ -88,7 +91,8 @@ def main() -> int:
   output_dir=args.output_dir,
   prefix=args.prefix,
   unmask=args.unmask,
- builtins=args.builtins)
+ builtins=args.builtins,
+ add_trace_points=args.add_trace_points)
  except QAPIError as err:
  print(f"{sys.argv[0]}: {str(err)}", file=sys.stderr)
  return 1
--
2.31.1



I suppose the flag is so that non-QEMU invocations of the QAPI
generator (for tests, etc) will compile correctly without tracepoint
definitions, yeah?



Yes, I had troubles with tests and some other code units, so I decided to do 
less to not fix things I'm not interested in) If needed it may be done in 
separate.

--
Best regards,
Vladimir



Re: Inconsistent block status reply in qemu-nbd in qemu-img-6.2.0

2022-01-17 Thread Hanna Reitz

On 16.01.22 19:09, Nir Soffer wrote:

On Sun, Jan 16, 2022 at 1:17 PM Nir Soffer  wrote:

Some of our tests started to fail since qemu-img 6.2.0 released.

Here is an example failure:

$ truncate -s1g /data/scratch/empty-1g.raw

$ qemu-nbd -r -t -e8 -k /tmp/nbd.sock -A -f raw /data/scratch/empty-1g.raw &

git bisect points to this commit:

$ git bisect good
0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2 is the first bad commit
commit 0bc329fbb009f8601cec23bf2bc48ead0c5a5fa2
Author: Hanna Reitz 
Date:   Thu Aug 12 10:41:44 2021 +0200

 block: block-status cache for data regions

The commit message says:

 (Note that only caching data regions but not zero regions means that
 returning false information from the cache is not catastrophic: Treating
 zeroes as data is fine.  While we try to invalidate the cache on zero
 writes and discards, such incongruences may still occur when there are
 other processes writing to the image.)

I don't think it is fine to report zero as data. This can cause severe
performance
issues when users copy unneeded zero extents over the network, instead of
doing no work with zero bandwidth.


You’re right, it was meant as a contrast to whether this might lead to 
catastrophic data failure.


But it was also meant as a risk estimation.  There wasn’t supposed to be 
a situation where the cache would report zeroes as data (other than with 
external writers), and so that’s why I thought it’d be fine.  But, well, 
things are supposed to be one way, and then you (me) write buggy code...


Thanks for the reproducer steps, I can reproduce the bug with your 
script (not with nbdinfo fwiw) and the problem seems to be this:


diff --git a/block/io.c b/block/io.c
index bb0a254def..83e94faeaf 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2498,7 +2498,8 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
  * the cache requires an RCU update, so double check here 
to avoid

  * such an update if possible.
  */
-    if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+    if (want_zero &&
+    ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
 QLIST_EMPTY(>children))
 {
 /*

We should update the cache only when we have accurate zero information, 
so only if want_zero was true.



Do we have a way to disable this cache, at least for nbd?


No, unfortunately not.  We could implement it, but I think the 
management layer would need to specify it for all protocol nodes where 
it wants the cache to be disabled.


Auto-detecting would be difficult for qemu, because it would basically 
mean detecting whether there are any exports in the block graph 
somewhere above the node in question (not even immediately above it, 
because for example in your example there’s a raw node between the 
export and the protocol node whose block-status information we’re caching).


I assume you’re now very skeptical of the cache, so even if we implement 
a fix for this problem, you’ll probably still want that option to 
disable it, right?


Hanna


Nir





$ nbdinfo --map nbd+unix:///?socket=/tmp/nbd.sock
  0  10737418243  hole,zero

$ python
Python 3.10.1 (main, Dec  9 2021, 00:00:00) [GCC 11.2.1 20211203 (Red
Hat 11.2.1-7)] on linux
Type "help", "copyright", "credits" or "license" for more information.

Enable debug logs so we can see all nbd commands and replies...


import logging
logging.basicConfig(level=logging.DEBUG)

Connect to qemu-nbd:


from ovirt_imageio._internal import nbd
c = nbd.Client(nbd.UnixAddress("/tmp/nbd.sock"))

DEBUG:nbd:Connecting address='/tmp/nbd.sock' export_name='' dirty=False
DEBUG:nbd:Received server flags: 3
DEBUG:nbd:Sending client flags: 1
DEBUG:nbd:Sending option: 8 data: b''
DEBUG:nbd:Received reply magic=3e889045565a9 option=8 type=1 len=0
DEBUG:nbd:Structured reply enabled
DEBUG:nbd:Sending option: 10 data:
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x0fbase:allocation\x00\x00\x00\x15qemu:allocation-depth')
DEBUG:nbd:Received reply magic=3e889045565a9 option=10 type=4 len=19
DEBUG:nbd:Meta context base:allocation is available id=0
DEBUG:nbd:Received reply magic=3e889045565a9 option=10 type=4 len=25
DEBUG:nbd:Meta context qemu:allocation-depth is available id=1
DEBUG:nbd:Received reply magic=3e889045565a9 option=10 type=1 len=0
DEBUG:nbd:Sending option: 7 data: bytearray(b'\x00\x00\x00\x00\x00\x00')
DEBUG:nbd:Received reply magic=3e889045565a9 option=7 type=3 len=14
DEBUG:nbd:Received block size info minimum=1 preferred=4096 maximum=33554432
DEBUG:nbd:Received reply magic=3e889045565a9 option=7 type=3 len=12
DEBUG:nbd:Received export info size=1073741824 flags=1423
DEBUG:nbd:Received reply magic=3e889045565a9 option=7 type=1 len=0
DEBUG:nbd:Ready for transmission

Get extents (first call):


c.extents(0, 1024**3)

DEBUG:nbd:Sending NBD_CMD_BLOCK_STATUS handle=0 offset=0
length=1073741824 flags=0

Re: [PATCH v2 2/4] scripts/qapi/commands: gen_commands(): add add_trace_points argument

2022-01-17 Thread Vladimir Sementsov-Ogievskiy

10.01.2022 19:22, Stefan Hajnoczi wrote:

On Thu, Dec 23, 2021 at 12:07:54PM +0100, Vladimir Sementsov-Ogievskiy wrote:

Add possibility to generate trace points for each qmp command.

We should generate both trace points and trace-events file, for further
trace point code generation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  scripts/qapi/commands.py | 84 ++--
  1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 21001bbd6b..9691c11f96 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -53,7 +53,8 @@ def gen_command_decl(name: str,
  def gen_call(name: str,
   arg_type: Optional[QAPISchemaObjectType],
   boxed: bool,
- ret_type: Optional[QAPISchemaType]) -> str:
+ ret_type: Optional[QAPISchemaType],
+ add_trace_points: bool) -> str:


Please use the term "trace events" instead of "trace points". That's the
term that docs/devel/tracing.rst uses.


@@ -122,10 +167,14 @@ def gen_marshal_decl(name: str) -> str:
   proto=build_marshal_proto(name))
  
  
+def gen_trace(name: str) -> str:

+return f'qmp_{c_name(name)}(const char *tag, const char *json) "%s%s"\n'


This trace event is emitted in 3 different ways:
1. For arguments before calling a QMP command.
2. For the error message when the QMP command fails.
3. For the return value when a QMP command succeeds.

This makes parsing the trace akward because you get two events in
succession for a single call and they both have the same name.

Please generate 2 trace events:
1. qmp_enter_ 
2. qmp_exit_  

(That's similar to how the syscalls Linux kernel trace events work.)

Scripts processing the trace can easily differentiate between enter
(args) and exit (return value) events without parsing or keeping state
to count the second event.



OK, reasonable. This also makes patch 01 not needed.


--
Best regards,
Vladimir



Re: [PATCH v2 2/4] scripts/qapi/commands: gen_commands(): add add_trace_points argument

2022-01-17 Thread Vladimir Sementsov-Ogievskiy

12.01.2022 13:50, Stefan Hajnoczi wrote:

On Tue, Jan 11, 2022 at 07:32:52PM -0500, John Snow wrote:

On Tue, Jan 11, 2022 at 6:53 PM John Snow  wrote:


On Thu, Dec 23, 2021 at 6:08 AM Vladimir Sementsov-Ogievskiy
 wrote:


Add possibility to generate trace points for each qmp command.

We should generate both trace points and trace-events file, for further
trace point code generation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  scripts/qapi/commands.py | 84 ++--
  1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 21001bbd6b..9691c11f96 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -53,7 +53,8 @@ def gen_command_decl(name: str,
  def gen_call(name: str,
   arg_type: Optional[QAPISchemaObjectType],
   boxed: bool,
- ret_type: Optional[QAPISchemaType]) -> str:
+ ret_type: Optional[QAPISchemaType],
+ add_trace_points: bool) -> str:
  ret = ''

  argstr = ''
@@ -71,21 +72,65 @@ def gen_call(name: str,
  if ret_type:
  lhs = 'retval = '

-ret = mcgen('''
+qmp_name = f'qmp_{c_name(name)}'
+upper = qmp_name.upper()
+
+if add_trace_points:
+ret += mcgen('''
+
+if (trace_event_get_state_backends(TRACE_%(upper)s)) {
+g_autoptr(GString) req_json = qobject_to_json(QOBJECT(args));
+trace_%(qmp_name)s("", req_json->str);
+}
+''',
+ upper=upper, qmp_name=qmp_name)
+
+ret += mcgen('''

  %(lhs)sqmp_%(c_name)s(%(args)s);
-error_propagate(errp, err);
  ''',
  c_name=c_name(name), args=argstr, lhs=lhs)
-if ret_type:
-ret += mcgen('''
+
+ret += mcgen('''
  if (err) {
+''')
+
+if add_trace_points:
+ret += mcgen('''
+trace_%(qmp_name)s("FAIL: ", error_get_pretty(err));
+''',
+ qmp_name=qmp_name)
+
+ret += mcgen('''
+error_propagate(errp, err);
  goto out;
  }
+''')
+
+if ret_type:
+ret += mcgen('''

  qmp_marshal_output_%(c_name)s(retval, ret, errp);
  ''',
   c_name=ret_type.c_name())
+
+if add_trace_points:
+if ret_type:
+ret += mcgen('''
+
+if (trace_event_get_state_backends(TRACE_%(upper)s)) {
+g_autoptr(GString) ret_json = qobject_to_json(*ret);
+trace_%(qmp_name)s("RET:", ret_json->str);
+}
+''',
+ upper=upper, qmp_name=qmp_name)
+else:
+ret += mcgen('''
+
+trace_%(qmp_name)s("SUCCESS", "");
+''',
+ qmp_name=qmp_name)
+
  return ret


@@ -122,10 +167,14 @@ def gen_marshal_decl(name: str) -> str:
   proto=build_marshal_proto(name))


+def gen_trace(name: str) -> str:
+return f'qmp_{c_name(name)}(const char *tag, const char *json) "%s%s"\n'
+
  def gen_marshal(name: str,
  arg_type: Optional[QAPISchemaObjectType],
  boxed: bool,
-ret_type: Optional[QAPISchemaType]) -> str:
+ret_type: Optional[QAPISchemaType],
+add_trace_points: bool) -> str:
  have_args = boxed or (arg_type and not arg_type.is_empty())
  if have_args:
  assert arg_type is not None
@@ -180,7 +229,7 @@ def gen_marshal(name: str,
  }
  ''')

-ret += gen_call(name, arg_type, boxed, ret_type)
+ret += gen_call(name, arg_type, boxed, ret_type, add_trace_points)

  ret += mcgen('''

@@ -238,11 +287,12 @@ def gen_register_command(name: str,


  class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor):
-def __init__(self, prefix: str):
+def __init__(self, prefix: str, add_trace_points: bool):
  super().__init__(
  prefix, 'qapi-commands',
  ' * Schema-defined QAPI/QMP commands', None, __doc__)
  self._visited_ret_types: Dict[QAPIGenC, Set[QAPISchemaType]] = {}
+self.add_trace_points = add_trace_points

  def _begin_user_module(self, name: str) -> None:
  self._visited_ret_types[self._genc] = set()
@@ -261,6 +311,15 @@ def _begin_user_module(self, name: str) -> None:

  ''',
   commands=commands, visit=visit))
+
+if self.add_trace_points and c_name(commands) != 'qapi_commands':
+self._genc.add(mcgen('''
+#include "trace/trace-qapi.h"
+#include "qapi/qmp/qjson.h"
+#include "trace/trace-%(nm)s_trace_events.h"
+''',
+ nm=c_name(commands)))
+
  self._genh.add(mcgen('''
  #include "%(types)s.h"

@@ -322,7 +381,9 @@ def visit_command(self,
  with ifcontext(ifcond, self._genh, self._genc):
  self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type))
  self._genh.add(gen_marshal_decl(name))
-self._genc.add(gen_marshal(name, arg_type, boxed, ret_type))
+