On Mon, 17 Jan 2022 at 20:35, John Snow <js...@redhat.com> wrote: > > On Mon, Jan 17, 2022 at 5:05 AM Kevin Wolf <kw...@redhat.com> wrote: > > > > Am 10.01.2022 um 16:55 hat Peter Maydell geschrieben: > > > Just saw this failure of iotests in a netbsd VM
> This trace says that we timed out while awaiting a connection from > QEMU during the VM launch phase. i.e. python/qemu/qmp/machine.py line > 340: > > def _post_launch(self) -> None: > if self._qmp_connection: > self._qmp.accept(self._qmp_timer) <-- we timed out here. > > (Note to self: make this traceback look more obvious as to what was > canceled and why. I think I can improve readability here a bit ...) > > Sky's the limit on why QEMU never connected to the socket, but: > > > > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/040.out > > > fcntl(): Invalid argument > > That looks fairly suspicious, and I don't know which process was > responsible for printing it (or when, relative to the other outputs). > I assume you don't see any such output like this on a good run. The NetBSD VM prints those fcntl messages all over the place. I think something in the build system (make? ninja? who knows) triggers them. > > > -OK > > > +FAILED (errors=1) > > > TEST iotest-qcow2: 041 [fail] > > > QEMU -- > > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-system-aarch64" > > > -nodefaults -display none -accel qtest -machine virt > > > QEMU_IMG -- > > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-img" > > > QEMU_IO -- > > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-io" > > > --cache writeback --aio threads -f qcow2 > > > QEMU_NBD -- > > > "/home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/../../qemu-nbd" > > > IMGFMT -- qcow2 > > > IMGPROTO -- file > > > PLATFORM -- NetBSD/amd64 localhost 9.2 > > > TEST_DIR -- > > > /home/qemu/qemu-test.MPWquy/build/tests/qemu-iotests/scratch > > > SOCK_DIR -- /tmp/tmpuniuicbi > > > GDB_OPTIONS -- > > > VALGRIND_QEMU -- > > > PRINT_QEMU_OUTPUT -- > > > > > > --- /home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041.out > > > +++ 041.out.bad > > > @@ -1,5 +1,32 @@ > > > -........................................................................................................... > > > +..........................................ERROR:qemu.aqmp.qmp_client.qemu-15252:Failed > > > to establish connection: concurrent.futures._base.CancelledError > > > +E................................................................ > > > +====================================================================== > > > +ERROR: test_small_buffer (__main__.TestSingleBlockdev) > > > +---------------------------------------------------------------------- > > > +Traceback (most recent call last): > > > + File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line > > > 233, in setUp > > > + TestSingleDrive.setUp(self) > > > + File "/home/qemu/qemu-test.MPWquy/src/tests/qemu-iotests/041", line > > > 54, in setUp > > > + self.vm.launch() > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py", > > > line 399, in launch > > > + self._launch() > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py", > > > line 434, in _launch > > > + self._post_launch() > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/qtest.py", > > > line 147, in _post_launch > > > + super()._post_launch() > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/machine/machine.py", > > > line 340, in _post_launch > > > + self._qmp.accept(self._qmp_timer) > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py", > > > line 69, in accept > > > + timeout > > > + File "/home/qemu/qemu-test.MPWquy/src/python/qemu/aqmp/legacy.py", > > > line 42, in _sync > > > + asyncio.wait_for(future, timeout=timeout) > > > + File "/usr/pkg/lib/python3.7/asyncio/base_events.py", line 587, in > > > run_until_complete > > > + return future.result() > > > + File "/usr/pkg/lib/python3.7/asyncio/tasks.py", line 449, in wait_for > > > + raise futures.TimeoutError() > > > +concurrent.futures._base.TimeoutError > > Same problem here, except I don't see any output from QEMU to blame. > As far as the Python code knows, it just never got a connection on the > socket, so it timed out and died. I think the NetBSD VM does for some reason get a bit slow to do stuff. I've never worked out why. In the past we've had to bump up various overoptimistic timeouts to help it out. > I do expect this to print more information on failure than it > currently is, though (bug somewhere in machine.py, I think). > Can you please try applying this temporary patch and running `./check > -qcow2 040 041` until you see a breakage and show me the output from > that? > > diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py > index 67ab06ca2b..ca49e6fcd2 100644 > --- a/python/qemu/machine/machine.py > +++ b/python/qemu/machine/machine.py > @@ -403,16 +403,19 @@ def launch(self) -> None: > # Assume the VM didn't launch or is exiting. > # If we don't wait for the process, exitcode() may still be > # 'None' by the time control is ceded back to the caller. > + LOG.error('Error launching VM') > if self._launched: > + LOG.error('Process was forked, waiting on it') > self.wait() > else: > + LOG.error('Process was not forked, just cleaning up') > self._post_shutdown() > > - LOG.debug('Error launching VM') > if self._qemu_full_args: > - LOG.debug('Command: %r', ' '.join(self._qemu_full_args)) > + LOG.error('Command: %r', ' '.join(self._qemu_full_args)) > if self._iolog: > - LOG.debug('Output: %r', self._iolog) > + LOG.error('Output: %r', self._iolog) > + LOG.error('exitcode: %s', str(self.exitcode())) > raise > > def _launch(self) -> None: Yeah, I should be able to test this. -- PMM