Re: [PATCH v2 03/15] net/vhost-vdpa: Fix device compatibility check

2021-10-13 Thread Jason Wang



在 2021/10/8 下午9:34, Kevin Wolf 写道:

vhost-vdpa works only with specific devices. At startup, it second
guesses what the command line option handling will do and error out if
it thinks a non-virtio device will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Drop the old checks and implement .check_peer_type() instead to fix
this. As a nice side effect, it also removes one more dependency on the
legacy QemuOpts infrastructure and even reduces the code size.

Signed-off-by: Kevin Wolf 



Acked-by: Jason Wang 



---
  net/vhost-vdpa.c | 37 ++---
  1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 912686457c..6dc68d8677 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -147,12 +147,26 @@ static bool vhost_vdpa_has_ufo(NetClientState *nc)
  
  }
  
+static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,

+   Error **errp)
+{
+const char *driver = object_class_get_name(oc);
+
+if (!g_str_has_prefix(driver, "virtio-net-")) {
+error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
+return false;
+}
+
+return true;
+}
+
  static NetClientInfo net_vhost_vdpa_info = {
  .type = NET_CLIENT_DRIVER_VHOST_VDPA,
  .size = sizeof(VhostVDPAState),
  .cleanup = vhost_vdpa_cleanup,
  .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
  .has_ufo = vhost_vdpa_has_ufo,
+.check_peer_type = vhost_vdpa_check_peer_type,
  };
  
  static int net_vhost_vdpa_init(NetClientState *peer, const char *device,

@@ -179,24 +193,6 @@ static int net_vhost_vdpa_init(NetClientState *peer, const 
char *device,
  return ret;
  }
  
-static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)

-{
-const char *name = opaque;
-const char *driver, *netdev;
-
-driver = qemu_opt_get(opts, "driver");
-netdev = qemu_opt_get(opts, "netdev");
-if (!driver || !netdev) {
-return 0;
-}
-if (strcmp(netdev, name) == 0 &&
-!g_str_has_prefix(driver, "virtio-net-")) {
-error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
-return -1;
-}
-return 0;
-}
-
  int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
  NetClientState *peer, Error **errp)
  {
@@ -204,10 +200,5 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
  
  assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);

  opts = >u.vhost_vdpa;
-/* verify net frontend */
-if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
-  (char *)name, errp)) {
-return -1;
-}
  return net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, opts->vhostdev);
  }





Re: [PATCH v2 02/15] net/vhost-user: Fix device compatibility check

2021-10-13 Thread Jason Wang



在 2021/10/8 下午9:34, Kevin Wolf 写道:

vhost-user works only with specific devices. At startup, it second
guesses what the command line option handling will do and error out if
it thinks a non-virtio device will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Drop the old checks and implement .check_peer_type() instead to fix
this. As a nice side effect, it also removes one more dependency on the
legacy QemuOpts infrastructure and even reduces the code size.

Signed-off-by: Kevin Wolf 



Acked-by: Jason Wang 



---
  net/vhost-user.c | 41 ++---
  1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index 4a939124d2..b1a0247b59 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -198,6 +198,19 @@ static bool vhost_user_has_ufo(NetClientState *nc)
  return true;
  }
  
+static bool vhost_user_check_peer_type(NetClientState *nc, ObjectClass *oc,

+   Error **errp)
+{
+const char *driver = object_class_get_name(oc);
+
+if (!g_str_has_prefix(driver, "virtio-net-")) {
+error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
+return false;
+}
+
+return true;
+}
+
  static NetClientInfo net_vhost_user_info = {
  .type = NET_CLIENT_DRIVER_VHOST_USER,
  .size = sizeof(NetVhostUserState),
@@ -207,6 +220,7 @@ static NetClientInfo net_vhost_user_info = {
  .has_ufo = vhost_user_has_ufo,
  .set_vnet_be = vhost_user_set_vnet_endianness,
  .set_vnet_le = vhost_user_set_vnet_endianness,
+.check_peer_type = vhost_user_check_peer_type,
  };
  
  static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,

@@ -397,27 +411,6 @@ static Chardev *net_vhost_claim_chardev(
  return chr;
  }
  
-static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)

-{
-const char *name = opaque;
-const char *driver, *netdev;
-
-driver = qemu_opt_get(opts, "driver");
-netdev = qemu_opt_get(opts, "netdev");
-
-if (!driver || !netdev) {
-return 0;
-}
-
-if (strcmp(netdev, name) == 0 &&
-!g_str_has_prefix(driver, "virtio-net-")) {
-error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
-return -1;
-}
-
-return 0;
-}
-
  int net_init_vhost_user(const Netdev *netdev, const char *name,
  NetClientState *peer, Error **errp)
  {
@@ -433,12 +426,6 @@ int net_init_vhost_user(const Netdev *netdev, const char 
*name,
  return -1;
  }
  
-/* verify net frontend */

-if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
-  (char *)name, errp)) {
-return -1;
-}
-
  queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
  if (queues < 1 || queues > MAX_QUEUE_NUM) {
  error_setg(errp,





Re: [PATCH v2 01/15] net: Introduce NetClientInfo.check_peer_type()

2021-10-13 Thread Jason Wang



在 2021/10/8 下午9:34, Kevin Wolf 写道:

Some network backends (vhost-user and vhost-vdpa) work only with
specific devices. At startup, they second guess what the command line
option handling will do and error out if they think a non-virtio device
will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Add a callback where backends can check compatibility with a device when
it actually tries to attach, even on hotplug.

Signed-off-by: Kevin Wolf 



Acked-by: Jason Wang 



---
  include/net/net.h| 2 ++
  hw/core/qdev-properties-system.c | 6 ++
  2 files changed, 8 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index 5d1508081f..986288eb07 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -62,6 +62,7 @@ typedef struct SocketReadState SocketReadState;
  typedef void (SocketReadStateFinalize)(SocketReadState *rs);
  typedef void (NetAnnounce)(NetClientState *);
  typedef bool (SetSteeringEBPF)(NetClientState *, int);
+typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **);
  
  typedef struct NetClientInfo {

  NetClientDriver type;
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
  SetVnetBE *set_vnet_be;
  NetAnnounce *announce;
  SetSteeringEBPF *set_steering_ebpf;
+NetCheckPeerType *check_peer_type;
  } NetClientInfo;
  
  struct NetClientState {

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index e71f5d64d1..a91f60567a 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -431,6 +431,12 @@ static void set_netdev(Object *obj, Visitor *v, const char 
*name,
  goto out;
  }
  
+if (peers[i]->info->check_peer_type) {

+if (!peers[i]->info->check_peer_type(peers[i], obj->class, errp)) {
+goto out;
+}
+}
+
  ncs[i] = peers[i];
  ncs[i]->queue_index = i;
  }





[PATCH v4 2/8] python/machine: Handle QMP errors on close more meticulously

2021-10-13 Thread John Snow
To use the AQMP backend, Machine just needs to be a little more diligent
about what happens when closing a QMP connection. The operation is no
longer a freebie in the async world; it may return errors encountered in
the async bottom half on incoming message receipt, etc.

(AQMP's disconnect, ultimately, serves as the quiescence point where all
async contexts are gathered together, and any final errors reported at
that point.)

Because async QMP continues to check for messages asynchronously, it's
almost certainly likely that the loop will have exited due to EOF after
issuing the last 'quit' command. That error will ultimately be bubbled
up when attempting to close the QMP connection. The manager class here
then is free to discard it -- if it was expected.

Signed-off-by: John Snow 
Reviewed-by: Hanna Reitz 
---
 python/qemu/machine/machine.py | 48 +-
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 0bd40bc2f76..a0cf69786b4 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -342,9 +342,15 @@ def _post_shutdown(self) -> None:
 # Comprehensive reset for the failed launch case:
 self._early_cleanup()
 
-if self._qmp_connection:
-self._qmp.close()
-self._qmp_connection = None
+try:
+self._close_qmp_connection()
+except Exception as err:  # pylint: disable=broad-except
+LOG.warning(
+"Exception closing QMP connection: %s",
+str(err) if str(err) else type(err).__name__
+)
+finally:
+assert self._qmp_connection is None
 
 self._close_qemu_log_file()
 
@@ -420,6 +426,31 @@ def _launch(self) -> None:
close_fds=False)
 self._post_launch()
 
+def _close_qmp_connection(self) -> None:
+"""
+Close the underlying QMP connection, if any.
+
+Dutifully report errors that occurred while closing, but assume
+that any error encountered indicates an abnormal termination
+process and not a failure to close.
+"""
+if self._qmp_connection is None:
+return
+
+try:
+self._qmp.close()
+except EOFError:
+# EOF can occur as an Exception here when using the Async
+# QMP backend. It indicates that the server closed the
+# stream. If we successfully issued 'quit' at any point,
+# then this was expected. If the remote went away without
+# our permission, it's worth reporting that as an abnormal
+# shutdown case.
+if not (self._user_killed or self._quit_issued):
+raise
+finally:
+self._qmp_connection = None
+
 def _early_cleanup(self) -> None:
 """
 Perform any cleanup that needs to happen before the VM exits.
@@ -460,9 +491,14 @@ def _soft_shutdown(self, timeout: Optional[int]) -> None:
 self._early_cleanup()
 
 if self._qmp_connection:
-if not self._quit_issued:
-# Might raise ConnectionReset
-self.qmp('quit')
+try:
+if not self._quit_issued:
+# May raise ExecInterruptedError or StateError if the
+# connection dies or has *already* died.
+self.qmp('quit')
+finally:
+# Regardless, we want to quiesce the connection.
+self._close_qmp_connection()
 
 # May raise subprocess.TimeoutExpired
 self._subp.wait(timeout=timeout)
-- 
2.31.1




[PATCH v4 7/8] python/aqmp: Create sync QMP wrapper for iotests

2021-10-13 Thread John Snow
This is a wrapper around the async QMPClient that mimics the old,
synchronous QEMUMonitorProtocol class. It is designed to be
interchangeable with the old implementation.

It does not, however, attempt to mimic Exception compatibility.

Signed-off-by: John Snow 
Acked-by: Hanna Reitz 
---
 python/qemu/aqmp/legacy.py | 138 +
 1 file changed, 138 insertions(+)
 create mode 100644 python/qemu/aqmp/legacy.py

diff --git a/python/qemu/aqmp/legacy.py b/python/qemu/aqmp/legacy.py
new file mode 100644
index 000..9e7b9fb80b9
--- /dev/null
+++ b/python/qemu/aqmp/legacy.py
@@ -0,0 +1,138 @@
+"""
+Sync QMP Wrapper
+
+This class pretends to be qemu.qmp.QEMUMonitorProtocol.
+"""
+
+import asyncio
+from typing import (
+Awaitable,
+List,
+Optional,
+TypeVar,
+Union,
+)
+
+import qemu.qmp
+from qemu.qmp import QMPMessage, QMPReturnValue, SocketAddrT
+
+from .qmp_client import QMPClient
+
+
+# pylint: disable=missing-docstring
+
+
+class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol):
+def __init__(self, address: SocketAddrT,
+ server: bool = False,
+ nickname: Optional[str] = None):
+
+# pylint: disable=super-init-not-called
+self._aqmp = QMPClient(nickname)
+self._aloop = asyncio.get_event_loop()
+self._address = address
+self._timeout: Optional[float] = None
+
+_T = TypeVar('_T')
+
+def _sync(
+self, future: Awaitable[_T], timeout: Optional[float] = None
+) -> _T:
+return self._aloop.run_until_complete(
+asyncio.wait_for(future, timeout=timeout)
+)
+
+def _get_greeting(self) -> Optional[QMPMessage]:
+if self._aqmp.greeting is not None:
+# pylint: disable=protected-access
+return self._aqmp.greeting._asdict()
+return None
+
+# __enter__ and __exit__ need no changes
+# parse_address needs no changes
+
+def connect(self, negotiate: bool = True) -> Optional[QMPMessage]:
+self._aqmp.await_greeting = negotiate
+self._aqmp.negotiate = negotiate
+
+self._sync(
+self._aqmp.connect(self._address)
+)
+return self._get_greeting()
+
+def accept(self, timeout: Optional[float] = 15.0) -> QMPMessage:
+self._aqmp.await_greeting = True
+self._aqmp.negotiate = True
+
+self._sync(
+self._aqmp.accept(self._address),
+timeout
+)
+
+ret = self._get_greeting()
+assert ret is not None
+return ret
+
+def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
+return dict(
+self._sync(
+# pylint: disable=protected-access
+
+# _raw() isn't a public API, because turning off
+# automatic ID assignment is discouraged. For
+# compatibility with iotests *only*, do it anyway.
+self._aqmp._raw(qmp_cmd, assign_id=False),
+self._timeout
+)
+)
+
+# Default impl of cmd() delegates to cmd_obj
+
+def command(self, cmd: str, **kwds: object) -> QMPReturnValue:
+return self._sync(
+self._aqmp.execute(cmd, kwds),
+self._timeout
+)
+
+def pull_event(self,
+   wait: Union[bool, float] = False) -> Optional[QMPMessage]:
+if not wait:
+# wait is False/0: "do not wait, do not except."
+if self._aqmp.events.empty():
+return None
+
+# If wait is 'True', wait forever. If wait is False/0, the events
+# queue must not be empty; but it still needs some real amount
+# of time to complete.
+timeout = None
+if wait and isinstance(wait, float):
+timeout = wait
+
+return dict(
+self._sync(
+self._aqmp.events.get(),
+timeout
+)
+)
+
+def get_events(self, wait: Union[bool, float] = False) -> List[QMPMessage]:
+events = [dict(x) for x in self._aqmp.events.clear()]
+if events:
+return events
+
+event = self.pull_event(wait)
+return [event] if event is not None else []
+
+def clear_events(self) -> None:
+self._aqmp.events.clear()
+
+def close(self) -> None:
+self._sync(
+self._aqmp.disconnect()
+)
+
+def settimeout(self, timeout: Optional[float]) -> None:
+self._timeout = timeout
+
+def send_fd_scm(self, fd: int) -> None:
+self._aqmp.send_fd_scm(fd)
-- 
2.31.1




[PATCH v4 8/8] python, iotests: replace qmp with aqmp

2021-10-13 Thread John Snow
Swap out the synchronous QEMUMonitorProtocol from qemu.qmp with the sync
wrapper from qemu.aqmp instead.

Add an escape hatch in the form of the environment variable
QEMU_PYTHON_LEGACY_QMP which allows you to cajole QEMUMachine into using
the old implementation, proving that both implementations work
concurrently.

Signed-off-by: John Snow 
---
 python/qemu/machine/machine.py | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index a0cf69786b4..a487c397459 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -41,7 +41,6 @@
 )
 
 from qemu.qmp import (  # pylint: disable=import-error
-QEMUMonitorProtocol,
 QMPMessage,
 QMPReturnValue,
 SocketAddrT,
@@ -50,6 +49,12 @@
 from . import console_socket
 
 
+if os.environ.get('QEMU_PYTHON_LEGACY_QMP'):
+from qemu.qmp import QEMUMonitorProtocol
+else:
+from qemu.aqmp.legacy import QEMUMonitorProtocol
+
+
 LOG = logging.getLogger(__name__)
 
 
-- 
2.31.1




[PATCH v4 4/8] iotests: Accommodate async QMP Exception classes

2021-10-13 Thread John Snow
(But continue to support the old ones for now, too.)

There are very few cases of any user of QEMUMachine or a subclass
thereof relying on a QMP Exception type. If you'd like to check for
yourself, you want to grep for all of the derivatives of QMPError,
excluding 'AQMPError' and its derivatives. That'd be these:

- QMPError
- QMPConnectError
- QMPCapabilitiesError
- QMPTimeoutError
- QMPProtocolError
- QMPResponseError
- QMPBadPortError


Signed-off-by: John Snow 
Reviewed-by: Hanna Reitz 
---
 scripts/simplebench/bench_block_job.py| 3 ++-
 tests/qemu-iotests/tests/mirror-top-perms | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/simplebench/bench_block_job.py 
b/scripts/simplebench/bench_block_job.py
index 4f03c121697..a403c35b08f 100755
--- a/scripts/simplebench/bench_block_job.py
+++ b/scripts/simplebench/bench_block_job.py
@@ -28,6 +28,7 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
 from qemu.qmp import QMPConnectError
+from qemu.aqmp import ConnectError
 
 
 def bench_block_job(cmd, cmd_args, qemu_args):
@@ -49,7 +50,7 @@ def bench_block_job(cmd, cmd_args, qemu_args):
 vm.launch()
 except OSError as e:
 return {'error': 'popen failed: ' + str(e)}
-except (QMPConnectError, socket.timeout):
+except (QMPConnectError, ConnectError, socket.timeout):
 return {'error': 'qemu failed: ' + str(vm.get_log())}
 
 try:
diff --git a/tests/qemu-iotests/tests/mirror-top-perms 
b/tests/qemu-iotests/tests/mirror-top-perms
index 3d475aa3a54..a2d5c269d7a 100755
--- a/tests/qemu-iotests/tests/mirror-top-perms
+++ b/tests/qemu-iotests/tests/mirror-top-perms
@@ -21,8 +21,9 @@
 
 import os
 
-from qemu import qmp
+from qemu.aqmp import ConnectError
 from qemu.machine import machine
+from qemu.qmp import QMPConnectError
 
 import iotests
 from iotests import qemu_img
@@ -102,7 +103,7 @@ class TestMirrorTopPerms(iotests.QMPTestCase):
 self.vm_b.launch()
 print('ERROR: VM B launched successfully, this should not have '
   'happened')
-except qmp.QMPConnectError:
+except (QMPConnectError, ConnectError):
 assert 'Is another process using the image' in self.vm_b.get_log()
 
 result = self.vm.qmp('block-job-cancel',
-- 
2.31.1




[PATCH v4 3/8] python/aqmp: Remove scary message

2021-10-13 Thread John Snow
The scary message interferes with the iotests output. Coincidentally, if
iotests works by removing this, then it's good evidence that we don't
really need to scare people away from using it.

Signed-off-by: John Snow 
Acked-by: Hanna Reitz 
---
 python/qemu/aqmp/__init__.py | 12 
 1 file changed, 12 deletions(-)

diff --git a/python/qemu/aqmp/__init__.py b/python/qemu/aqmp/__init__.py
index d1b0e4dc3d3..880d5b6fa7f 100644
--- a/python/qemu/aqmp/__init__.py
+++ b/python/qemu/aqmp/__init__.py
@@ -22,7 +22,6 @@
 # the COPYING file in the top-level directory.
 
 import logging
-import warnings
 
 from .error import AQMPError
 from .events import EventListener
@@ -31,17 +30,6 @@
 from .qmp_client import ExecInterruptedError, ExecuteError, QMPClient
 
 
-_WMSG = """
-
-The Asynchronous QMP library is currently in development and its API
-should be considered highly fluid and subject to change. It should
-not be used by any other scripts checked into the QEMU tree.
-
-Proceed with caution!
-"""
-
-warnings.warn(_WMSG, FutureWarning)
-
 # Suppress logging unless an application engages it.
 logging.getLogger('qemu.aqmp').addHandler(logging.NullHandler())
 
-- 
2.31.1




[PATCH v4 5/8] iotests: Conditionally silence certain AQMP errors

2021-10-13 Thread John Snow
AQMP likes to be very chatty about errors it encounters. In general,
this is good because it allows us to get good diagnostic information for
otherwise complex async failures.

For example, during a failed QMP connection attempt, we might see:

+ERROR:qemu.aqmp.qmp_client.qemub-2536319:Negotiation failed: EOFError
+ERROR:qemu.aqmp.qmp_client.qemub-2536319:Failed to establish session: EOFError

This might be nice in iotests output, because failure scenarios
involving the new QMP library will be spelled out plainly in the output
diffs.

For tests that are intentionally causing this scenario though, filtering
that log output could be a hassle. For now, add a context manager that
simply lets us toggle this output off during a critical region.

(Additionally, a forthcoming patch allows the use of either legacy or
async QMP to be toggled with an environment variable. In this
circumstance, we can't amend the iotest output to just always expect the
error message, either. Just suppress it for now. More rigorous log
filtering can be investigated later if/when it is deemed safe to
permanently replace the legacy QMP library.)

Signed-off-by: John Snow 
Reviewed-by: Hanna Reitz 
---
 tests/qemu-iotests/iotests.py | 20 +++-
 tests/qemu-iotests/tests/mirror-top-perms | 12 
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e5fff6ddcfc..e2f9d873ada 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -30,7 +30,7 @@
 import subprocess
 import sys
 import time
-from typing import (Any, Callable, Dict, Iterable,
+from typing import (Any, Callable, Dict, Iterable, Iterator,
 List, Optional, Sequence, TextIO, Tuple, Type, TypeVar)
 import unittest
 
@@ -114,6 +114,24 @@
 sample_img_dir = os.environ['SAMPLE_IMG_DIR']
 
 
+@contextmanager
+def change_log_level(
+logger_name: str, level: int = logging.CRITICAL) -> Iterator[None]:
+"""
+Utility function for temporarily changing the log level of a logger.
+
+This can be used to silence errors that are expected or uninteresting.
+"""
+_logger = logging.getLogger(logger_name)
+current_level = _logger.level
+_logger.setLevel(level)
+
+try:
+yield
+finally:
+_logger.setLevel(current_level)
+
+
 def unarchive_sample_image(sample, fname):
 sample_fname = os.path.join(sample_img_dir, sample + '.bz2')
 with bz2.open(sample_fname) as f_in, open(fname, 'wb') as f_out:
diff --git a/tests/qemu-iotests/tests/mirror-top-perms 
b/tests/qemu-iotests/tests/mirror-top-perms
index a2d5c269d7a..0a51a613f39 100755
--- a/tests/qemu-iotests/tests/mirror-top-perms
+++ b/tests/qemu-iotests/tests/mirror-top-perms
@@ -26,7 +26,7 @@ from qemu.machine import machine
 from qemu.qmp import QMPConnectError
 
 import iotests
-from iotests import qemu_img
+from iotests import change_log_level, qemu_img
 
 
 image_size = 1 * 1024 * 1024
@@ -100,9 +100,13 @@ class TestMirrorTopPerms(iotests.QMPTestCase):
 self.vm_b.add_blockdev(f'file,node-name=drive0,filename={source}')
 self.vm_b.add_device('virtio-blk,drive=drive0,share-rw=on')
 try:
-self.vm_b.launch()
-print('ERROR: VM B launched successfully, this should not have '
-  'happened')
+# Silence AQMP errors temporarily.
+# TODO: Remove this and just allow the errors to be logged when
+# AQMP fully replaces QMP.
+with change_log_level('qemu.aqmp'):
+self.vm_b.launch()
+print('ERROR: VM B launched successfully, '
+  'this should not have happened')
 except (QMPConnectError, ConnectError):
 assert 'Is another process using the image' in self.vm_b.get_log()
 
-- 
2.31.1




[PATCH v4 6/8] iotests/300: avoid abnormal shutdown race condition

2021-10-13 Thread John Snow
Wait for the destination VM to close itself instead of racing to shut it
down first, which produces different error log messages from AQMP
depending on precisely when we tried to shut it down.

(For example: We may try to issue 'quit' immediately prior to the target
VM closing its QMP socket, which will cause an ECONNRESET error to be
logged. Waiting for the VM to exit itself avoids the race on shutdown
behavior.)

Reported-by: Hanna Reitz 
Signed-off-by: John Snow 
---
 tests/qemu-iotests/300 | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index 10f9f2a8da6..bbea7248005 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -24,8 +24,6 @@ import random
 import re
 from typing import Dict, List, Optional
 
-from qemu.machine import machine
-
 import iotests
 
 
@@ -461,12 +459,10 @@ class 
TestBlockBitmapMappingErrors(TestDirtyBitmapMigration):
   f"'{self.src_node_name}': Name is longer than 255 bytes",
   log)
 
-# Expect abnormal shutdown of the destination VM because of
-# the failed migration
-try:
-self.vm_b.shutdown()
-except machine.AbnormalShutdown:
-pass
+# Destination VM will terminate w/ error of its own accord
+# due to the failed migration.
+self.vm_b.wait()
+assert self.vm_b.exitcode() > 0
 
 def test_aliased_bitmap_name_too_long(self) -> None:
 # Longer than the maximum for bitmap names
-- 
2.31.1




[PATCH v4 0/8] Switch iotests to using Async QMP

2021-10-13 Thread John Snow
GitLab: https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
CI: https://gitlab.com/jsnow/qemu/-/pipelines/387972757

Hiya,

This series continues where the last two AQMP series left off and adds a
synchronous 'legacy' wrapper around the new AQMP interface, then drops
it straight into iotests to prove that AQMP is functional and totally
cool and fine. The disruption and churn to iotests is pretty minimal.

In the event that a regression happens and I am not physically proximate
to inflict damage upon, one may set the QEMU_PYTHON_LEGACY_QMP variable
to any non-empty string as it pleases you to engage the QMP machinery
you are used to.

I'd like to try and get this committed early in the 6.2 development
cycle to give ample time to smooth over any possible regressions. I've
tested it locally and via gitlab CI, across Python versions 3.6 through
3.10, and "worksforme". If something bad happens, we can revert the
actual switch-flip very trivially.

V4:

001/8:[] [--] 'python/machine: remove has_quit argument'
002/8:[] [--] 'python/machine: Handle QMP errors on close more meticulously'
003/8:[] [--] 'python/aqmp: Remove scary message'
004/8:[] [--] 'iotests: Accommodate async QMP Exception classes'
005/8:[] [--] 'iotests: Conditionally silence certain AQMP errors'
006/8:[down] 'iotests/300: avoid abnormal shutdown race condition'
007/8:[] [--] 'python/aqmp: Create sync QMP wrapper for iotests'
008/8:[] [--] 'python, iotests: replace qmp with aqmp'

006: Added to address a race condition in iotest 300.
 (Thanks for the repro, Hanna)

V3:

001/7:[] [--] 'python/machine: remove has_quit argument'
002/7:[0002] [FC] 'python/machine: Handle QMP errors on close more meticulously'
003/7:[] [--] 'python/aqmp: Remove scary message'
004/7:[0006] [FC] 'iotests: Accommodate async QMP Exception classes'
005/7:[0003] [FC] 'iotests: Conditionally silence certain AQMP errors'
006/7:[0009] [FC] 'python/aqmp: Create sync QMP wrapper for iotests'
007/7:[] [--] 'python, iotests: replace qmp with aqmp'

002: Account for force-kill cases, too.
003: Shuffled earlier into the series to prevent a mid-series regression.
004: Rewrite the imports to be less "heterogeneous" ;)
005: Add in a TODO for me to trip over in the future.
006: Fix a bug surfaced by a new iotest where waiting with pull_event for a
 timeout of 0.0 will cause a timeout exception to be raised even if there
 was an event ready to be read.

V2: A distant dream, half-remembered.
V1: Apocrypha.

John Snow (8):
  python/machine: remove has_quit argument
  python/machine: Handle QMP errors on close more meticulously
  python/aqmp: Remove scary message
  iotests: Accommodate async QMP Exception classes
  iotests: Conditionally silence certain AQMP errors
  iotests/300: avoid abnormal shutdown race condition
  python/aqmp: Create sync QMP wrapper for iotests
  python, iotests: replace qmp with aqmp

 python/qemu/aqmp/__init__.py  |  12 --
 python/qemu/aqmp/legacy.py| 138 ++
 python/qemu/machine/machine.py|  85 +
 scripts/simplebench/bench_block_job.py|   3 +-
 tests/qemu-iotests/040|   7 +-
 tests/qemu-iotests/218|   2 +-
 tests/qemu-iotests/255|   2 +-
 tests/qemu-iotests/300|  12 +-
 tests/qemu-iotests/iotests.py |  20 +++-
 tests/qemu-iotests/tests/mirror-top-perms |  17 ++-
 10 files changed, 242 insertions(+), 56 deletions(-)
 create mode 100644 python/qemu/aqmp/legacy.py

-- 
2.31.1





[PATCH v4 1/8] python/machine: remove has_quit argument

2021-10-13 Thread John Snow
If we spy on the QMP commands instead, we don't need callers to remember
to pass it. Seems like a fair trade-off.

The one slightly weird bit is overloading this instance variable for
wait(), where we use it to mean "don't issue the qmp 'quit'
command". This means that wait() will "fail" if the QEMU process does
not terminate of its own accord.

In most cases, we probably did already actually issue quit -- some
iotests do this -- but in some others, we may be waiting for QEMU to
terminate for some other reason, such as a test wherein we tell the
guest (directly) to shut down.

Signed-off-by: John Snow 
Reviewed-by: Hanna Reitz 
---
 python/qemu/machine/machine.py | 34 +++---
 tests/qemu-iotests/040 |  7 +--
 tests/qemu-iotests/218 |  2 +-
 tests/qemu-iotests/255 |  2 +-
 4 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 056d340e355..0bd40bc2f76 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -170,6 +170,7 @@ def __init__(self,
 self._console_socket: Optional[socket.socket] = None
 self._remove_files: List[str] = []
 self._user_killed = False
+self._quit_issued = False
 
 def __enter__(self: _T) -> _T:
 return self
@@ -368,6 +369,7 @@ def _post_shutdown(self) -> None:
 command = ''
 LOG.warning(msg, -int(exitcode), command)
 
+self._quit_issued = False
 self._user_killed = False
 self._launched = False
 
@@ -443,15 +445,13 @@ def _hard_shutdown(self) -> None:
 self._subp.kill()
 self._subp.wait(timeout=60)
 
-def _soft_shutdown(self, timeout: Optional[int],
-   has_quit: bool = False) -> None:
+def _soft_shutdown(self, timeout: Optional[int]) -> None:
 """
 Perform early cleanup, attempt to gracefully shut down the VM, and wait
 for it to terminate.
 
 :param timeout: Timeout in seconds for graceful shutdown.
 A value of None is an infinite wait.
-:param has_quit: When True, don't attempt to issue 'quit' QMP command
 
 :raise ConnectionReset: On QMP communication errors
 :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
@@ -460,21 +460,19 @@ def _soft_shutdown(self, timeout: Optional[int],
 self._early_cleanup()
 
 if self._qmp_connection:
-if not has_quit:
+if not self._quit_issued:
 # Might raise ConnectionReset
-self._qmp.cmd('quit')
+self.qmp('quit')
 
 # May raise subprocess.TimeoutExpired
 self._subp.wait(timeout=timeout)
 
-def _do_shutdown(self, timeout: Optional[int],
- has_quit: bool = False) -> None:
+def _do_shutdown(self, timeout: Optional[int]) -> None:
 """
 Attempt to shutdown the VM gracefully; fallback to a hard shutdown.
 
 :param timeout: Timeout in seconds for graceful shutdown.
 A value of None is an infinite wait.
-:param has_quit: When True, don't attempt to issue 'quit' QMP command
 
 :raise AbnormalShutdown: When the VM could not be shut down gracefully.
 The inner exception will likely be ConnectionReset or
@@ -482,13 +480,13 @@ def _do_shutdown(self, timeout: Optional[int],
 may result in its own exceptions, likely subprocess.TimeoutExpired.
 """
 try:
-self._soft_shutdown(timeout, has_quit)
+self._soft_shutdown(timeout)
 except Exception as exc:
 self._hard_shutdown()
 raise AbnormalShutdown("Could not perform graceful shutdown") \
 from exc
 
-def shutdown(self, has_quit: bool = False,
+def shutdown(self,
  hard: bool = False,
  timeout: Optional[int] = 30) -> None:
 """
@@ -498,7 +496,6 @@ def shutdown(self, has_quit: bool = False,
 If the VM has not yet been launched, or shutdown(), wait(), or kill()
 have already been called, this method does nothing.
 
-:param has_quit: When true, do not attempt to issue 'quit' QMP command.
 :param hard: When true, do not attempt graceful shutdown, and
  suppress the SIGKILL warning log message.
 :param timeout: Optional timeout in seconds for graceful shutdown.
@@ -512,7 +509,7 @@ def shutdown(self, has_quit: bool = False,
 self._user_killed = True
 self._hard_shutdown()
 else:
-self._do_shutdown(timeout, has_quit)
+self._do_shutdown(timeout)
 finally:
 self._post_shutdown()
 
@@ -529,7 +526,8 @@ def wait(self, timeout: Optional[int] = 30) -> None:
 :param timeout: Optional timeout in seconds. Default 30 seconds.
  

iotest 030 SIGSEGV

2021-10-13 Thread John Snow
In trying to replace the QMP library backend, I have now twice stumbled
upon a SIGSEGV in iotest 030 in the last three weeks or so.

I didn't have debug symbols on at the time, so I've got only this stack
trace:

(gdb) thread apply all bt

Thread 8 (Thread 0x7f0a6b8c4640 (LWP 1873554)):
#0  0x7f0a748a53ff in poll () at /lib64/libc.so.6
#1  0x7f0a759bfa36 in g_main_context_iterate.constprop () at
/lib64/libglib-2.0.so.0
#2  0x7f0a7596d163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
#3  0x557dac31d121 in iothread_run (opaque=opaque@entry=0x557dadd98800)
at ../../iothread.c:73
#4  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a6b8c3650) at
../../util/qemu-thread-posix.c:557
#5  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#6  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 7 (Thread 0x7f0a6b000640 (LWP 1873555)):
#0  0x7f0a747ed7d2 in sigtimedwait () at /lib64/libc.so.6
#1  0x7f0a74b72cdc in sigwait () at /lib64/libpthread.so.0
#2  0x557dac2e403b in dummy_cpu_thread_fn (arg=arg@entry=0x557dae041c10)
at ../../accel/dummy-cpus.c:46
#3  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a6afff650) at
../../util/qemu-thread-posix.c:557
#4  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#5  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 6 (Thread 0x7f0a56afa640 (LWP 1873582)):
#0  0x7f0a74b71308 in do_futex_wait.constprop () at
/lib64/libpthread.so.0
#1  0x7f0a74b71433 in __new_sem_wait_slow.constprop.0 () at
/lib64/libpthread.so.0
#2  0x557dac4d8f1f in qemu_sem_timedwait (sem=sem@entry=0x557dadd62878,
ms=ms@entry=1) at ../../util/qemu-thread-posix.c:327
#3  0x557dac4f5ac4 in worker_thread (opaque=opaque@entry=0x557dadd62800)
at ../../util/thread-pool.c:91
#4  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a56af9650) at
../../util/qemu-thread-posix.c:557
#5  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#6  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 5 (Thread 0x7f0a57dff640 (LWP 1873580)):
#0  0x7f0a74b71308 in do_futex_wait.constprop () at
/lib64/libpthread.so.0
#1  0x7f0a74b71433 in __new_sem_wait_slow.constprop.0 () at
/lib64/libpthread.so.0
#2  0x557dac4d8f1f in qemu_sem_timedwait (sem=sem@entry=0x557dadd62878,
ms=ms@entry=1) at ../../util/qemu-thread-posix.c:327
#3  0x557dac4f5ac4 in worker_thread (opaque=opaque@entry=0x557dadd62800)
at ../../util/thread-pool.c:91
#4  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a57dfe650) at
../../util/qemu-thread-posix.c:557
#5  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#6  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 4 (Thread 0x7f0a572fb640 (LWP 1873581)):
#0  0x7f0a74b7296f in pread64 () at /lib64/libpthread.so.0
#1  0x557dac39f18f in pread64 (__offset=,
__nbytes=, __buf=, __fd=) at
/usr/include/bits/unistd.h:105
#2  handle_aiocb_rw_linear (aiocb=aiocb@entry=0x7f0a573fc150,
buf=0x7f0a6a47e000 '\377' ...) at
../../block/file-posix.c:1481
#3  0x557dac39f664 in handle_aiocb_rw (opaque=0x7f0a573fc150) at
../../block/file-posix.c:1521
#4  0x557dac4f5b54 in worker_thread (opaque=opaque@entry=0x557dadd62800)
at ../../util/thread-pool.c:104
#5  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a572fa650) at
../../util/qemu-thread-posix.c:557
#6  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#7  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 3 (Thread 0x7f0a714e8640 (LWP 1873552)):
#0  0x7f0a748aaedd in syscall () at /lib64/libc.so.6
#1  0x557dac4d916a in qemu_futex_wait (val=,
f=) at /home/jsnow/src/qemu/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x557dace1f1e8 ) at
../../util/qemu-thread-posix.c:480
#3  0x557dac4e189a in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a714e7650) at
../../util/qemu-thread-posix.c:557
#5  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#6  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 2 (Thread 0x7f0a70ae5640 (LWP 1873553)):
#0  0x7f0a74b71308 in do_futex_wait.constprop () at
/lib64/libpthread.so.0
#1  0x7f0a74b71433 in __new_sem_wait_slow.constprop.0 () at
/lib64/libpthread.so.0
#2  0x557dac4d8f1f in qemu_sem_timedwait (sem=sem@entry=0x557dadd62878,
ms=ms@entry=1) at ../../util/qemu-thread-posix.c:327
#3  0x557dac4f5ac4 in worker_thread (opaque=opaque@entry=0x557dadd62800)
at ../../util/thread-pool.c:91
#4  0x557dac4d7f89 in qemu_thread_start (args=0x7f0a70ae4650) at
../../util/qemu-thread-posix.c:557
#5  0x7f0a74b683f9 in start_thread () at /lib64/libpthread.so.0
#6  0x7f0a748b04c3 in clone () at /lib64/libc.so.6

Thread 1 (Thread 0x7f0a714ebec0 (LWP 1873551)):
#0  bdrv_inherits_from_recursive (parent=parent@entry=0x557dadfb5050,
child=0xafafafafafafafaf, child@entry=0x557dae857010) at ../../block.c:3124
#1  

Re: [PATCH v2 09/15] softmmu/qdev-monitor: add error handling in qdev_set_id

2021-10-13 Thread Eric Blake
On Wed, Oct 13, 2021 at 03:10:38PM +0200, Damien Hedde wrote:
> > > @@ -691,7 +703,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error 
> > > **errp)
> > >   }
> > >   }
> > > -qdev_set_id(dev, g_strdup(qemu_opts_id(opts)));
> > > +/*
> > > + * set dev's parent and register its id.
> > > + * If it fails it means the id is already taken.
> > > + */
> > > +if (!qdev_set_id(dev, g_strdup(qemu_opts_id(opts)), errp)) {
> > > +goto err_del_dev;
> > 
> > ...nor on this, which means the g_strdup() leaks on failure.
> > 
> 
> Since we strdup the id just before calling qdev_set_id.
> Maybe we should do the the strdup in qdev_set_id (and free things on error
> there too). It seems simplier than freeing things on the callee side just in
> case of an error.

Indeed.  If we expected qdev_set_id() to be passed something that it
can later free, we would have used 'char *'; but because we used
'const char *' for that parameter, it really does make more sense for
the callers to pass in any string and for qdev_set_id() to do the
necessary strdup()ing, as well as clean up on error.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v2 00/15] qdev: Add JSON -device

2021-10-13 Thread Kevin Wolf
Am 13.10.2021 um 17:30 hat Michael S. Tsirkin geschrieben:
> On Fri, Oct 08, 2021 at 03:34:27PM +0200, Kevin Wolf wrote:
> > It's still a long way until we'll have QAPIfied devices, but there are
> > some improvements that we can already make now to make the future switch
> > easier.
> > 
> > One important part of this is having code paths without QemuOpts, which
> > we want to get rid of and replace with the keyval parser in the long
> > run. This series adds support for JSON syntax to -device, which bypasses
> > QemuOpts.
> > 
> > While we're not using QAPI yet, devices are based on QOM, so we already
> > do have type checks and an implied schema. JSON syntax supported now can
> > be supported by QAPI later and regarding command line compatibility,
> > actually switching to it becomes an implementation detail this way (of
> > course, it will still add valuable user-visible features like
> > introspection and documentation).
> > 
> > Apart from making things more future proof, this also immediately adds
> > a way to do non-scalar properties on the command line. nvme could have
> > used list support recently, and the lack of it in -device led to some
> > rather unnatural solution in the first version (doing the relationship
> > between a device and objects backwards) and loss of features in the
> > following. With this series, using a list as a device property should be
> > possible without any weird tricks.
> > 
> > Unfortunately, even QMP device_add goes through QemuOpts before this
> > series, which destroys any type safety QOM provides and also can't
> > support non-scalar properties. This is a bug, but it turns out that
> > libvirt actually relies on it and passes only strings for everything.
> > So this series still leaves device_add alone until libvirt is fixed.
> 
> 
> Reviewed-by: Michael S. Tsirkin 
> 
> I assume you are merging this?

Yes, I can merge it through my tree. Thanks for the review!

Kevin




Re: [PATCH v3 0/7] Switch iotests to using Async QMP

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 10:49 AM Hanna Reitz  wrote:

> On 13.10.21 16:00, John Snow wrote:
> >
> >
> > On Wed, Oct 13, 2021 at 8:51 AM John Snow  wrote:
> >
> >
> >
> > On Wed, Oct 13, 2021 at 4:45 AM Hanna Reitz 
> wrote:
> >
> > On 13.10.21 00:34, John Snow wrote:
> > > Based-on: <20211012214152.802483-1-js...@redhat.com>
> > >[PULL 00/10] Python patches
> > > GitLab:
> >
> https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
> > > CI: https://gitlab.com/jsnow/qemu/-/pipelines/387210591
> > >
> > > Hiya,
> > >
> > > This series continues where the last two AQMP series left
> > off and adds a
> > > synchronous 'legacy' wrapper around the new AQMP interface,
> > then drops
> > > it straight into iotests to prove that AQMP is functional
> > and totally
> > > cool and fine. The disruption and churn to iotests is pretty
> > minimal.
> > >
> > > In the event that a regression happens and I am not
> > physically proximate
> > > to inflict damage upon, one may set the
> > QEMU_PYTHON_LEGACY_QMP variable
> > > to any non-empty string as it pleases you to engage the QMP
> > machinery
> > > you are used to.
> > >
> > > I'd like to try and get this committed early in the 6.2
> > development
> > > cycle to give ample time to smooth over any possible
> > regressions. I've
> > > tested it locally and via gitlab CI, across Python versions
> > 3.6 through
> > > 3.10, and "worksforme". If something bad happens, we can
> > revert the
> > > actual switch-flip very trivially.
> >
> > So running iotests locally, I got one failure:
> >
> > $ TEST_DIR=/tmp/vdi-tests ./check -c writethrough -vdi 300
> > [...]
> > 300 fail   [10:28:06] [10:28:11]
> > 5.1s output mismatch (see 300.out.bad)
> > --- /home/maxx/projects/qemu/tests/qemu-iotests/300.out
> > +++ 300.out.bad
> > @@ -1,4 +1,5 @@
> > -...
> >
>  +..ERROR:qemu.aqmp.qmp_client.qemu-b-222963:Task.Reader:
> >
> > ConnectionResetError: [Errno 104] Connection reset by peer
> > +.
> >
>--
> >   Ran 39 tests
> > [...]
> >
> >
> > Oh, unfortunate.
> >
> >
> > I’m afraid I can’t really give a reproducer or anything.  It
> > feels like
> >
> >
> > Thank you for the report!
> >
> > just some random spurious timing-related error. Although then
> > again,
> > 300 does have an `except machine.AbnormalShutdown` clause at one
> > point...  So perhaps that’s the culprit, and we need to
> > disable logging
> > there.
> >
> >
> > I'll investigate!
> >
> >
> > Unfortunately, even in a loop some 150 times I couldn't reproduce this
> > one. As you point out, it appears to be just a failure caused by
> > logging. The test logic itself completes as expected.
> >
> > Still, I would expect, on a "clean" shutdown of the destination host
> > (where the destination process fails to load the migration stream and
> > voluntarily exits with an error code) to end with a FIN/ACK for TCP or
> > ... uh, whatever happens for a UNIX socket. Where's the Connection
> > Reset coming from? Did the destination VM process *crash*?
> >
> > I'm not so sure that I *should* silence this error, but I also can't
> > reproduce it at all to answer these questions, so uh. uhhh. I guess I
> > will just hammer it on a loop a few hundred times more and see if I
> > get lucky.
>
> I could reproduce it, by running 20 instances concurrently.  (Needs a
> change to testrunner.py, so that the reference outputs don’t collide:
>
> diff --git a/tests/qemu-iotests/testrunner.py
> b/tests/qemu-iotests/testrunner.py
> index a56b6da396..fd0a3a1eeb 100644
> --- a/tests/qemu-iotests/testrunner.py
> +++ b/tests/qemu-iotests/testrunner.py
> @@ -221,7 +221,7 @@ def find_reference(self, test: str) -> str:
>
>   def do_run_test(self, test: str) -> TestResult:
>   f_test = Path(test)
> -f_bad = Path(f_test.name + '.out.bad')
> +f_bad = Path(f'{os.getpid()}-{f_test.name}.out.bad')
>   f_notrun = Path(f_test.name + '.notrun')
>   f_casenotrun = Path(f_test.name + '.casenotrun')
>   f_reference = Path(self.find_reference(test))
>
> )
>
> And then:
>
> $ while TEST_DIR=/tmp/vdi-$$ ./check -vdi 300; do; done
>
> Which pretty quickly shows the error in at least one of those loops
> (under a minute).
>
> As far as I can tell, changing the log level in 300 does indeed fix it:
>
> diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
> 

Re: [PATCH 09/13] iotests: split linters.py out from 297

2021-10-13 Thread Hanna Reitz

On 13.10.21 17:07, John Snow wrote:



On Wed, Oct 13, 2021 at 7:50 AM Hanna Reitz  wrote:

On 04.10.21 23:04, John Snow wrote:
> Now, 297 is just the iotests-specific incantations and
linters.py is as
> minimal as I can think to make it. The only remaining element in
here
> that ought to be configuration and not code is the list of skip
files,

Yeah...

> but they're still numerous enough that repeating them for mypy and
> pylint configurations both would be ... a hassle.

I agree.

> Signed-off-by: John Snow 
> ---
>   tests/qemu-iotests/297        | 72 +++---
>   tests/qemu-iotests/linters.py | 83
+++
>   2 files changed, 88 insertions(+), 67 deletions(-)
>   create mode 100644 tests/qemu-iotests/linters.py

I’d like to give an A-b because now the statuscode-returning
function is
in a library.  But I already gave an A-b on the last patch precisely
because of the interface, and I shouldn’t be so grumpy.

Reviewed-by: Hanna Reitz 


I'm not entirely sure I understand your dislike(?) of status codes. 
I'm not trying to ignore the feedback, but I don't think I understand 
it fully.


It’s the fact that we only use status codes because they are part of the 
interface of shell commands.  A python function isn’t a shell command, 
so I find it weird to use that interface there.  Returning True/False 
would make more sense, for example.


I understand we have the same thing with qemu* commands in iotests.py, 
so I shouldn’t be so stuck on it...


Would it be better if I removed check=False and allowed the functions 
to raise an Exception on non-zero subprocess return? (Possibly having 
the function itself print the stdout on the error case before re-raising.)


Yes, I would like that better! :)

Hanna




Re: [PATCH v2 00/15] qdev: Add JSON -device

2021-10-13 Thread Michael S. Tsirkin
On Fri, Oct 08, 2021 at 03:34:27PM +0200, Kevin Wolf wrote:
> It's still a long way until we'll have QAPIfied devices, but there are
> some improvements that we can already make now to make the future switch
> easier.
> 
> One important part of this is having code paths without QemuOpts, which
> we want to get rid of and replace with the keyval parser in the long
> run. This series adds support for JSON syntax to -device, which bypasses
> QemuOpts.
> 
> While we're not using QAPI yet, devices are based on QOM, so we already
> do have type checks and an implied schema. JSON syntax supported now can
> be supported by QAPI later and regarding command line compatibility,
> actually switching to it becomes an implementation detail this way (of
> course, it will still add valuable user-visible features like
> introspection and documentation).
> 
> Apart from making things more future proof, this also immediately adds
> a way to do non-scalar properties on the command line. nvme could have
> used list support recently, and the lack of it in -device led to some
> rather unnatural solution in the first version (doing the relationship
> between a device and objects backwards) and loss of features in the
> following. With this series, using a list as a device property should be
> possible without any weird tricks.
> 
> Unfortunately, even QMP device_add goes through QemuOpts before this
> series, which destroys any type safety QOM provides and also can't
> support non-scalar properties. This is a bug, but it turns out that
> libvirt actually relies on it and passes only strings for everything.
> So this series still leaves device_add alone until libvirt is fixed.


Reviewed-by: Michael S. Tsirkin 

I assume you are merging this?

> v2:
> - Drop type safe QMP device_add because libvirt gets it wrong, too
> - More network patches to eliminate dependencies on the legacy QemuOpts
>   data structures which would not contain all devices any more after
>   this series. Fix some bugs there as a side effect.
> - Remove an unnecessary use of ERRP_GUARD()
> - Replaced error handling patch for qdev_set_id() with Damien's
> - Drop the deprecation patch until libvirt uses the new JSON syntax
> 
> Damien Hedde (1):
>   softmmu/qdev-monitor: add error handling in qdev_set_id
> 
> Kevin Wolf (14):
>   net: Introduce NetClientInfo.check_peer_type()
>   net/vhost-user: Fix device compatibility check
>   net/vhost-vdpa: Fix device compatibility check
>   qom: Reduce use of error_propagate()
>   iotests/245: Fix type for iothread property
>   iotests/051: Fix typo
>   qdev: Avoid using string visitor for properties
>   qdev: Make DeviceState.id independent of QemuOpts
>   qemu-option: Allow deleting opts during qemu_opts_foreach()
>   qdev: Add Error parameter to hide_device() callbacks
>   virtio-net: Store failover primary opts pointer locally
>   virtio-net: Avoid QemuOpts in failover_find_primary_device()
>   qdev: Base object creation on QDict rather than QemuOpts
>   vl: Enable JSON syntax for -device
> 
>  qapi/qdev.json  | 15 +++--
>  include/hw/qdev-core.h  | 15 +++--
>  include/hw/virtio/virtio-net.h  |  2 +
>  include/monitor/qdev.h  | 27 +++-
>  include/net/net.h   |  2 +
>  hw/arm/virt.c   |  2 +-
>  hw/core/qdev-properties-system.c|  6 ++
>  hw/core/qdev.c  | 11 +++-
>  hw/net/virtio-net.c | 85 -
>  hw/pci-bridge/pci_expander_bridge.c |  2 +-
>  hw/ppc/e500.c   |  2 +-
>  hw/vfio/pci.c   |  4 +-
>  hw/xen/xen-legacy-backend.c |  3 +-
>  net/vhost-user.c| 41 
>  net/vhost-vdpa.c| 37 ---
>  qom/object.c|  7 +-
>  qom/object_interfaces.c | 19 ++
>  softmmu/qdev-monitor.c  | 99 +++--
>  softmmu/vl.c| 63 --
>  util/qemu-option.c  |  4 +-
>  tests/qemu-iotests/051  |  2 +-
>  tests/qemu-iotests/051.pc.out   |  4 +-
>  tests/qemu-iotests/245  |  4 +-
>  23 files changed, 278 insertions(+), 178 deletions(-)
> 
> -- 
> 2.31.1




Re: [PATCH 10/13] iotests/linters: Add entry point for linting via Python CI

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 8:11 AM Hanna Reitz  wrote:

> On 04.10.21 23:05, John Snow wrote:
> > We need at least a tiny little shim here to join test file discovery
> > with test invocation. This logic could conceivably be hosted somewhere
> > in python/, but I felt it was strictly the least-rude thing to keep the
> > test logic here in iotests/, even if this small function isn't itself an
> > iotest.
> >
> > Note that we don't actually even need the executable bit here, we'll be
> > relying on the ability to run this module as a script using Python CLI
> > arguments. No chance it gets misunderstood as an actual iotest that way.
> >
> > (It's named, not in tests/, doesn't have the execute bit, and doesn't
> > have an execution shebang.)
> >
> > Signed-off-by: John Snow 
> >
> > ---
> >
> > (1) I think that the test file discovery logic and skip list belong
> together,
> >  and that those items belong in iotests/. I think they also belong in
> >  whichever directory pylintrc and mypy.ini are in, also in iotests/.
>
> Agreed.
>
> > (2) Moving this logic into python/tests/ is challenging because I'd have
> >  to import iotests code from elsewhere in the source tree, which just
> >  inverts an existing problem I have been trying to rid us of --
> >  needing to muck around with PYTHONPATH or sys.path hacking in python
> >  scripts. I'm keen to avoid this.
>
> OK.
>
> > (3) If we moved all python tests into tests/ and gave them *.py
> >  extensions, we wouldn't even need the test discovery functions
> >  anymore, and all of linters.py could be removed entirely, including
> >  this execution shim. We could rely on mypy/pylint's own file
> >  discovery mechanisms at that point. More work than I'm up for with
> >  just this series, but I could be coaxed into doing it if there was
> >  some promise of not rejecting all that busywork ;)
>
> I believe the only real value this would gain is that the tests would
> have nicer names and we would have to delint them.  If we find those
> goals to justify the effort, then we can put in the effort; and we can
> do that slowly, test by test.  I don’t think we must do it now in one
> big lump just to get rid of the file discovery functions.
>
>
Yeah, I agree -- just do it over time and as-needed. I'm sure I will be
bothered by something-or-other sooner-or-later and I'll wind up doing it
anyway. Just maybe not this week!


> > Signed-off-by: John Snow 
> > ---
> >   tests/qemu-iotests/linters.py | 18 ++
> >   1 file changed, 18 insertions(+)
> >
> > diff --git a/tests/qemu-iotests/linters.py
> b/tests/qemu-iotests/linters.py
> > index f6a2dc139fd..191df173064 100644
> > --- a/tests/qemu-iotests/linters.py
> > +++ b/tests/qemu-iotests/linters.py
> > @@ -16,6 +16,7 @@
> >   import os
> >   import re
> >   import subprocess
> > +import sys
> >   from typing import List, Mapping, Optional
> >
> >
> > @@ -81,3 +82,20 @@ def run_linter(
> >
> >   return p.returncode
> >
> > +
> > +def main() -> int:
> > +"""
> > +Used by the Python CI system as an entry point to run these linters.
> > +"""
> > +files = get_test_files()
> > +
> > +if sys.argv[1] == '--pylint':
> > +return run_linter('pylint', files)
> > +elif sys.argv[1] == '--mypy':
> > +return run_linter('mypy', files)
>
> So I can run it as `python linters.py --pylint foo bar` and it won’t
> complain? :)
>
> I don’t feel like it’s important, but, well, it isn’t right either.
>
>
Alright. I hacked it together to be "minimal" in terms of SLOC, but I can
make it ... less minimal.


Re: [PATCH 09/13] iotests: split linters.py out from 297

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 7:50 AM Hanna Reitz  wrote:

> On 04.10.21 23:04, John Snow wrote:
> > Now, 297 is just the iotests-specific incantations and linters.py is as
> > minimal as I can think to make it. The only remaining element in here
> > that ought to be configuration and not code is the list of skip files,
>
> Yeah...
>
> > but they're still numerous enough that repeating them for mypy and
> > pylint configurations both would be ... a hassle.
>
> I agree.
>
> > Signed-off-by: John Snow 
> > ---
> >   tests/qemu-iotests/297| 72 +++---
> >   tests/qemu-iotests/linters.py | 83 +++
> >   2 files changed, 88 insertions(+), 67 deletions(-)
> >   create mode 100644 tests/qemu-iotests/linters.py
>
> I’d like to give an A-b because now the statuscode-returning function is
> in a library.  But I already gave an A-b on the last patch precisely
> because of the interface, and I shouldn’t be so grumpy.
>
> Reviewed-by: Hanna Reitz 
>
>
I'm not entirely sure I understand your dislike(?) of status codes. I'm not
trying to ignore the feedback, but I don't think I understand it fully.

Would it be better if I removed check=False and allowed the functions to
raise an Exception on non-zero subprocess return? (Possibly having the
function itself print the stdout on the error case before re-raising.)

--js


Re: [PATCH v3 0/7] Switch iotests to using Async QMP

2021-10-13 Thread Hanna Reitz

On 13.10.21 16:00, John Snow wrote:



On Wed, Oct 13, 2021 at 8:51 AM John Snow  wrote:



On Wed, Oct 13, 2021 at 4:45 AM Hanna Reitz  wrote:

On 13.10.21 00:34, John Snow wrote:
> Based-on: <20211012214152.802483-1-js...@redhat.com>
>            [PULL 00/10] Python patches
> GitLab:
https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
> CI: https://gitlab.com/jsnow/qemu/-/pipelines/387210591
>
> Hiya,
>
> This series continues where the last two AQMP series left
off and adds a
> synchronous 'legacy' wrapper around the new AQMP interface,
then drops
> it straight into iotests to prove that AQMP is functional
and totally
> cool and fine. The disruption and churn to iotests is pretty
minimal.
>
> In the event that a regression happens and I am not
physically proximate
> to inflict damage upon, one may set the
QEMU_PYTHON_LEGACY_QMP variable
> to any non-empty string as it pleases you to engage the QMP
machinery
> you are used to.
>
> I'd like to try and get this committed early in the 6.2
development
> cycle to give ample time to smooth over any possible
regressions. I've
> tested it locally and via gitlab CI, across Python versions
3.6 through
> 3.10, and "worksforme". If something bad happens, we can
revert the
> actual switch-flip very trivially.

So running iotests locally, I got one failure:

$ TEST_DIR=/tmp/vdi-tests ./check -c writethrough -vdi 300
[...]
300 fail   [10:28:06] [10:28:11]
5.1s output mismatch (see 300.out.bad)
--- /home/maxx/projects/qemu/tests/qemu-iotests/300.out
+++ 300.out.bad
@@ -1,4 +1,5 @@
-...
+..ERROR:qemu.aqmp.qmp_client.qemu-b-222963:Task.Reader:

ConnectionResetError: [Errno 104] Connection reset by peer
+.
  --
  Ran 39 tests
[...]


Oh, unfortunate.


I’m afraid I can’t really give a reproducer or anything.  It
feels like


Thank you for the report!

just some random spurious timing-related error. Although then
again,
300 does have an `except machine.AbnormalShutdown` clause at one
point...  So perhaps that’s the culprit, and we need to
disable logging
there.


I'll investigate!


Unfortunately, even in a loop some 150 times I couldn't reproduce this 
one. As you point out, it appears to be just a failure caused by 
logging. The test logic itself completes as expected.


Still, I would expect, on a "clean" shutdown of the destination host 
(where the destination process fails to load the migration stream and 
voluntarily exits with an error code) to end with a FIN/ACK for TCP or 
... uh, whatever happens for a UNIX socket. Where's the Connection 
Reset coming from? Did the destination VM process *crash*?


I'm not so sure that I *should* silence this error, but I also can't 
reproduce it at all to answer these questions, so uh. uhhh. I guess I 
will just hammer it on a loop a few hundred times more and see if I 
get lucky.


I could reproduce it, by running 20 instances concurrently.  (Needs a 
change to testrunner.py, so that the reference outputs don’t collide:


diff --git a/tests/qemu-iotests/testrunner.py 
b/tests/qemu-iotests/testrunner.py

index a56b6da396..fd0a3a1eeb 100644
--- a/tests/qemu-iotests/testrunner.py
+++ b/tests/qemu-iotests/testrunner.py
@@ -221,7 +221,7 @@ def find_reference(self, test: str) -> str:

 def do_run_test(self, test: str) -> TestResult:
 f_test = Path(test)
-    f_bad = Path(f_test.name + '.out.bad')
+    f_bad = Path(f'{os.getpid()}-{f_test.name}.out.bad')
 f_notrun = Path(f_test.name + '.notrun')
 f_casenotrun = Path(f_test.name + '.casenotrun')
 f_reference = Path(self.find_reference(test))

)

And then:

$ while TEST_DIR=/tmp/vdi-$$ ./check -vdi 300; do; done

Which pretty quickly shows the error in at least one of those loops 
(under a minute).


As far as I can tell, changing the log level in 300 does indeed fix it:

diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index 10f9f2a8da..096f5dabf0 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -27,6 +27,7 @@ from typing import Dict, List, Optional
 from qemu.machine import machine

 import iotests
+from iotests import change_log_level


 BlockBitmapMapping = List[Dict[str, object]]
@@ -464,7 +465,8 @@ class 
TestBlockBitmapMappingErrors(TestDirtyBitmapMigration):

 # Expect abnormal shutdown of the destination VM because of

Re: [PATCH 05/13] iotests/297: Create main() function

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 7:03 AM Hanna Reitz  wrote:

> On 04.10.21 23:04, John Snow wrote:
> > Instead of running "run_linters" directly, create a main() function that
> > will be responsible for environment setup, leaving run_linters()
> > responsible only for execution of the linters.
> >
> > (That environment setup will be moved over in forthcoming commits.)
> >
> > Signed-off-by: John Snow 
> > Reviewed-by: Vladimir Sementsov-Ogievskiy 
> > Reviewed-by: Philippe Mathieu-Daudé 
> > Reviewed-by: Hanna Reitz 
> > ---
> >   tests/qemu-iotests/297 | 12 
> >   1 file changed, 8 insertions(+), 4 deletions(-)
> >
> > diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
> > index 65b1e7058c2..f9fcb039e27 100755
> > --- a/tests/qemu-iotests/297
> > +++ b/tests/qemu-iotests/297
> > @@ -89,8 +89,12 @@ def run_linters():
> >   print(p.stdout)
> >
> >
> > -for linter in ('pylint-3', 'mypy'):
> > -if shutil.which(linter) is None:
> > -iotests.notrun(f'{linter} not found')
> > +def main() -> None:
> > +for linter in ('pylint-3', 'mypy'):
> > +if shutil.which(linter) is None:
> > +iotests.notrun(f'{linter} not found')
>
> Now that I see it here: Given patch 4, shouldn’t we replace
> `shutil.which()` by some other check?
>
>
Yeah, ideally. Sorry, I was lazy and didn't do that part yet. Nobody had
asked O:-)

I'll bolster the previous patch for the next go-around. (Or maybe I'll send
a fixup patch to the list, depending on what the rest of your replies look
like.)

--js


Re: [PATCH 02/13] iotests/297: Split mypy configuration out into mypy.ini

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 6:53 AM Hanna Reitz  wrote:

> On 04.10.21 23:04, John Snow wrote:
> > More separation of code and configuration.
> >
> > Signed-off-by: John Snow 
> > ---
> >   tests/qemu-iotests/297  | 14 +-
> >   tests/qemu-iotests/mypy.ini | 12 
> >   2 files changed, 13 insertions(+), 13 deletions(-)
> >   create mode 100644 tests/qemu-iotests/mypy.ini
>
> Reviewed-by: Hanna Reitz 
>
> > diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
> > index bc3a0ceb2aa..b8101e6024a 100755
> > --- a/tests/qemu-iotests/297
> > +++ b/tests/qemu-iotests/297
> > @@ -73,19 +73,7 @@ def run_linters():
> >   sys.stdout.flush()
> >
> >   env['MYPYPATH'] = env['PYTHONPATH']
> > -p = subprocess.run(('mypy',
> > -'--warn-unused-configs',
> > -'--disallow-subclassing-any',
> > -'--disallow-any-generics',
> > -'--disallow-incomplete-defs',
> > -'--disallow-untyped-decorators',
> > -'--no-implicit-optional',
> > -'--warn-redundant-casts',
> > -'--warn-unused-ignores',
> > -'--no-implicit-reexport',
> > -'--namespace-packages',
> > -'--scripts-are-modules',
> > -*files),
> > +p = subprocess.run(('mypy', *files),
> >  env=env,
> >  check=False,
> >  stdout=subprocess.PIPE,
> > diff --git a/tests/qemu-iotests/mypy.ini b/tests/qemu-iotests/mypy.ini
> > new file mode 100644
> > index 000..4c0339f5589
> > --- /dev/null
> > +++ b/tests/qemu-iotests/mypy.ini
> > @@ -0,0 +1,12 @@
> > +[mypy]
> > +disallow_any_generics = True
> > +disallow_incomplete_defs = True
> > +disallow_subclassing_any = True
> > +disallow_untyped_decorators = True
> > +implicit_reexport = False
>
> Out of curiosity: Any reason you chose to invert this one, but none of
> the rest?  (i.e. no_implicit_optional = True -> implicit_optional =
> False; or disallow* = True -> allow* = False)
>
>
Anything written as '--no-xxx' I wrote as 'xxx = False', but
"implicit_optional = False" isn't a supported option, so that one kept the
"no" in it.

--js


Re: [PATCH v3 7/7] python, iotests: replace qmp with aqmp

2021-10-13 Thread Eric Blake
On Tue, Oct 12, 2021 at 06:34:45PM -0400, John Snow wrote:
> Swap out the synchronous QEMUMonitorProtocol from qemu.qmp with the sync
> wrapper from qemu.aqmp instead.
> 
> Add an escape hatch in the form of the environment variable
> QEMU_PYTHON_LEGACY_QMP which allows you to cajole QEMUMachine into using
> the old implementatin, proving that both implementations work

implementation

> concurrently.
> 
> Signed-off-by: John Snow 
> Reviewed-by: Hanna Reitz 
> Tested-by: Hanna Reitz 
> ---
>  python/qemu/machine/machine.py | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v3 0/7] Switch iotests to using Async QMP

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 8:51 AM John Snow  wrote:

>
>
> On Wed, Oct 13, 2021 at 4:45 AM Hanna Reitz  wrote:
>
>> On 13.10.21 00:34, John Snow wrote:
>> > Based-on: <20211012214152.802483-1-js...@redhat.com>
>> >[PULL 00/10] Python patches
>> > GitLab:
>> https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
>> > CI: https://gitlab.com/jsnow/qemu/-/pipelines/387210591
>> >
>> > Hiya,
>> >
>> > This series continues where the last two AQMP series left off and adds a
>> > synchronous 'legacy' wrapper around the new AQMP interface, then drops
>> > it straight into iotests to prove that AQMP is functional and totally
>> > cool and fine. The disruption and churn to iotests is pretty minimal.
>> >
>> > In the event that a regression happens and I am not physically proximate
>> > to inflict damage upon, one may set the QEMU_PYTHON_LEGACY_QMP variable
>> > to any non-empty string as it pleases you to engage the QMP machinery
>> > you are used to.
>> >
>> > I'd like to try and get this committed early in the 6.2 development
>> > cycle to give ample time to smooth over any possible regressions. I've
>> > tested it locally and via gitlab CI, across Python versions 3.6 through
>> > 3.10, and "worksforme". If something bad happens, we can revert the
>> > actual switch-flip very trivially.
>>
>> So running iotests locally, I got one failure:
>>
>> $ TEST_DIR=/tmp/vdi-tests ./check -c writethrough -vdi 300
>> [...]
>> 300 fail   [10:28:06] [10:28:11]
>> 5.1s output mismatch (see 300.out.bad)
>> --- /home/maxx/projects/qemu/tests/qemu-iotests/300.out
>> +++ 300.out.bad
>> @@ -1,4 +1,5 @@
>> -...
>> +..ERROR:qemu.aqmp.qmp_client.qemu-b-222963:Task.Reader:
>> ConnectionResetError: [Errno 104] Connection reset by peer
>> +.
>>   --
>>   Ran 39 tests
>> [...]
>>
>>
> Oh, unfortunate.
>
>
>>
>> I’m afraid I can’t really give a reproducer or anything.  It feels like
>>
>
> Thank you for the report!
>
>
>> just some random spurious timing-related error.  Although then again,
>> 300 does have an `except machine.AbnormalShutdown` clause at one
>> point...  So perhaps that’s the culprit, and we need to disable logging
>> there.
>>
>>
> I'll investigate!
>

Unfortunately, even in a loop some 150 times I couldn't reproduce this one.
As you point out, it appears to be just a failure caused by logging. The
test logic itself completes as expected.

Still, I would expect, on a "clean" shutdown of the destination host (where
the destination process fails to load the migration stream and voluntarily
exits with an error code) to end with a FIN/ACK for TCP or ... uh, whatever
happens for a UNIX socket. Where's the Connection Reset coming from? Did
the destination VM process *crash*?

I'm not so sure that I *should* silence this error, but I also can't
reproduce it at all to answer these questions, so uh. uhhh. I guess I will
just hammer it on a loop a few hundred times more and see if I get lucky.


Re: [PATCH v2 08/15] qdev: Make DeviceState.id independent of QemuOpts

2021-10-13 Thread Damien Hedde




On 10/8/21 15:34, Kevin Wolf wrote:

DeviceState.id is a pointer to a string that is stored in the QemuOpts
object DeviceState.opts and freed together with it. We want to create
devices without going through QemuOpts in the future, so make this a
separately allocated string.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 



Reviewed-by: Damien Hedde 



Re: [PATCH v2 02/15] net/vhost-user: Fix device compatibility check

2021-10-13 Thread Damien Hedde




On 10/8/21 15:34, Kevin Wolf wrote:

vhost-user works only with specific devices. At startup, it second
guesses what the command line option handling will do and error out if
it thinks a non-virtio device will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Drop the old checks and implement .check_peer_type() instead to fix
this. As a nice side effect, it also removes one more dependency on the
legacy QemuOpts infrastructure and even reduces the code size.

Signed-off-by: Kevin Wolf 


Reviewed-by: Damien Hedde 



Re: [PATCH v2 03/15] net/vhost-vdpa: Fix device compatibility check

2021-10-13 Thread Damien Hedde




On 10/8/21 15:34, Kevin Wolf wrote:

vhost-vdpa works only with specific devices. At startup, it second
guesses what the command line option handling will do and error out if
it thinks a non-virtio device will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Drop the old checks and implement .check_peer_type() instead to fix
this. As a nice side effect, it also removes one more dependency on the
legacy QemuOpts infrastructure and even reduces the code size.

Signed-off-by: Kevin Wolf 


Reviewed-by: Damien Hedde 



Re: [PATCH v2 01/15] net: Introduce NetClientInfo.check_peer_type()

2021-10-13 Thread Damien Hedde




On 10/8/21 15:34, Kevin Wolf wrote:

Some network backends (vhost-user and vhost-vdpa) work only with
specific devices. At startup, they second guess what the command line
option handling will do and error out if they think a non-virtio device
will attach to them.

This second guessing is not only ugly, it can lead to wrong error
messages ('-device floppy,netdev=foo' should complain about an unknown
property, not about the wrong kind of network device being attached) and
completely ignores hotplugging.

Add a callback where backends can check compatibility with a device when
it actually tries to attach, even on hotplug.

Signed-off-by: Kevin Wolf 


Reviewed-by: Damien Hedde 

---
  include/net/net.h| 2 ++
  hw/core/qdev-properties-system.c | 6 ++
  2 files changed, 8 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index 5d1508081f..986288eb07 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -62,6 +62,7 @@ typedef struct SocketReadState SocketReadState;
  typedef void (SocketReadStateFinalize)(SocketReadState *rs);
  typedef void (NetAnnounce)(NetClientState *);
  typedef bool (SetSteeringEBPF)(NetClientState *, int);
+typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **);
  
  typedef struct NetClientInfo {

  NetClientDriver type;
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
  SetVnetBE *set_vnet_be;
  NetAnnounce *announce;
  SetSteeringEBPF *set_steering_ebpf;
+NetCheckPeerType *check_peer_type;
  } NetClientInfo;
  
  struct NetClientState {

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index e71f5d64d1..a91f60567a 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -431,6 +431,12 @@ static void set_netdev(Object *obj, Visitor *v, const char 
*name,
  goto out;
  }
  
+if (peers[i]->info->check_peer_type) {

+if (!peers[i]->info->check_peer_type(peers[i], obj->class, errp)) {
+goto out;
+}
+}
+
  ncs[i] = peers[i];
  ncs[i]->queue_index = i;
  }





Re: [PATCH v2 09/15] softmmu/qdev-monitor: add error handling in qdev_set_id

2021-10-13 Thread Damien Hedde




On 10/11/21 23:00, Eric Blake wrote:

On Fri, Oct 08, 2021 at 03:34:36PM +0200, Kevin Wolf wrote:

From: Damien Hedde 

qdev_set_id() is mostly used when the user adds a device (using
-device cli option or device_add qmp command). This commit adds
an error parameter to handle the case where the given id is
already taken.

Also document the function and add a return value in order to
be able to capture success/failure: the function now returns the
id in case of success, or NULL in case of failure.

The commit modifies the 2 calling places (qdev-monitor and
xen-legacy-backend) to add the error object parameter.

Note that the id is, right now, guaranteed to be unique because
all ids came from the "device" QemuOptsList where the id is used
as key. This addition is a preparation for a future commit which
will relax the uniqueness.

Signed-off-by: Damien Hedde 
Signed-off-by: Kevin Wolf 
---



+} else {
+error_setg(errp, "Duplicate device ID '%s'", id);
+return NULL;


id is not freed on this error path...



  
  DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)

@@ -691,7 +703,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
  }
  }
  
-qdev_set_id(dev, g_strdup(qemu_opts_id(opts)));

+/*
+ * set dev's parent and register its id.
+ * If it fails it means the id is already taken.
+ */
+if (!qdev_set_id(dev, g_strdup(qemu_opts_id(opts)), errp)) {
+goto err_del_dev;


...nor on this, which means the g_strdup() leaks on failure.



Since we strdup the id just before calling qdev_set_id.
Maybe we should do the the strdup in qdev_set_id (and free things on 
error there too). It seems simplier than freeing things on the callee 
side just in case of an error.



Damien







Re: [PATCH v3 0/7] Switch iotests to using Async QMP

2021-10-13 Thread John Snow
On Wed, Oct 13, 2021 at 4:45 AM Hanna Reitz  wrote:

> On 13.10.21 00:34, John Snow wrote:
> > Based-on: <20211012214152.802483-1-js...@redhat.com>
> >[PULL 00/10] Python patches
> > GitLab:
> https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
> > CI: https://gitlab.com/jsnow/qemu/-/pipelines/387210591
> >
> > Hiya,
> >
> > This series continues where the last two AQMP series left off and adds a
> > synchronous 'legacy' wrapper around the new AQMP interface, then drops
> > it straight into iotests to prove that AQMP is functional and totally
> > cool and fine. The disruption and churn to iotests is pretty minimal.
> >
> > In the event that a regression happens and I am not physically proximate
> > to inflict damage upon, one may set the QEMU_PYTHON_LEGACY_QMP variable
> > to any non-empty string as it pleases you to engage the QMP machinery
> > you are used to.
> >
> > I'd like to try and get this committed early in the 6.2 development
> > cycle to give ample time to smooth over any possible regressions. I've
> > tested it locally and via gitlab CI, across Python versions 3.6 through
> > 3.10, and "worksforme". If something bad happens, we can revert the
> > actual switch-flip very trivially.
>
> So running iotests locally, I got one failure:
>
> $ TEST_DIR=/tmp/vdi-tests ./check -c writethrough -vdi 300
> [...]
> 300 fail   [10:28:06] [10:28:11]
> 5.1s output mismatch (see 300.out.bad)
> --- /home/maxx/projects/qemu/tests/qemu-iotests/300.out
> +++ 300.out.bad
> @@ -1,4 +1,5 @@
> -...
> +..ERROR:qemu.aqmp.qmp_client.qemu-b-222963:Task.Reader:
> ConnectionResetError: [Errno 104] Connection reset by peer
> +.
>   --
>   Ran 39 tests
> [...]
>
>
Oh, unfortunate.


>
> I’m afraid I can’t really give a reproducer or anything.  It feels like
>

Thank you for the report!


> just some random spurious timing-related error.  Although then again,
> 300 does have an `except machine.AbnormalShutdown` clause at one
> point...  So perhaps that’s the culprit, and we need to disable logging
> there.
>
>
I'll investigate!


Re: [PATCH 13/13] iotests: [RFC] drop iotest 297

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:05, John Snow wrote:

(This is highlighting a what-if, which might make it clear why any
special infrastructure is still required at all. It's not intended to
actually be merged at this step -- running JUST the iotest linters from
e.g. 'make check' is not yet accommodated, so there's no suitable
replacement for 297 for block test authors.)


OK, acknowledged. :)

Hanna


Drop 297. As a consequence, we no longer need to pass an environment
variable to the mypy/pylint invocations, so that can be dropped. We also
now no longer need to hide output-except-on-error, so that can be
dropped as well.

The only thing that necessitates any special running logic anymore is
the skip list and the python-test-detection code. Without those, we
could easily codify the tests as simply:

[pylint|mypy] *.py tests/*.py

... and drop this entire file. We're not quite there yet, though.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297| 52 ---
  tests/qemu-iotests/297.out|  2 --
  tests/qemu-iotests/linters.py | 20 ++
  3 files changed, 2 insertions(+), 72 deletions(-)
  delete mode 100755 tests/qemu-iotests/297
  delete mode 100644 tests/qemu-iotests/297.out





Re: [PATCH 12/13] python: Add iotest linters to test suite

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:05, John Snow wrote:

Run mypy and pylint on the iotests files directly from the Python CI
test infrastructure. This ensures that any accidental breakages to the
qemu.[qmp|aqmp|machine|utils] packages will be caught by that test
suite.

It also ensures that these linters are run with well-known versions and
test against a wide variety of python versions, which helps to find
accidental cross-version python compatibility issues.

Signed-off-by: John Snow 
---
  python/tests/iotests-mypy.sh   | 4 
  python/tests/iotests-pylint.sh | 4 
  2 files changed, 8 insertions(+)
  create mode 100755 python/tests/iotests-mypy.sh
  create mode 100755 python/tests/iotests-pylint.sh


Reviewed-by: Hanna Reitz 




Re: [PATCH 11/13] iotests/linters: Add workaround for mypy bug #9852

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:05, John Snow wrote:

This one is insidious: if you write an import as "from {namespace}
import {subpackage}" as mirror-top-perms (now) does, mypy will fail on
every-other invocation *if* the package being imported is a typed,
installed, namespace-scoped package.

Upsettingly, that's exactly what 'qemu.[aqmp|qmp|machine]' et al are in
the context of Python CI tests.

Now, I could just edit mirror-top-perms to avoid this invocation, but
since I tripped on a landmine, I might as well head it off at the pass
and make sure nobody else trips on that same landmine.

It seems to have something to do with the order in which files are
checked as well, meaning the random order in which set(os.listdir())
produces the list of files to test will cause problems intermittently
and not just strictly "every other run".

This will be fixed in mypy >= 0.920, which is not released yet. The
workaround for now is to disable incremental checking, which avoids the
issue.

Note: This workaround is not applied when running iotest 297 directly,
because the bug does not surface there! Given the nature of CI jobs not
starting with any stale cache to begin with, this really only has a
half-second impact on manual runs of the Python test suite when executed
directly by a developer on their local machine. The workaround may be
removed when the Python package requirements can stipulate mypy 0.920 or
higher, which can happen as soon as it is released. (Barring any
unforseen compatibility issues that 0.920 may bring with it.)


See also:
  https://github.com/python/mypy/issues/11010
  https://github.com/python/mypy/issues/9852

Signed-off-by: John Snow 
---
  tests/qemu-iotests/linters.py | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH 10/13] iotests/linters: Add entry point for linting via Python CI

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:05, John Snow wrote:

We need at least a tiny little shim here to join test file discovery
with test invocation. This logic could conceivably be hosted somewhere
in python/, but I felt it was strictly the least-rude thing to keep the
test logic here in iotests/, even if this small function isn't itself an
iotest.

Note that we don't actually even need the executable bit here, we'll be
relying on the ability to run this module as a script using Python CLI
arguments. No chance it gets misunderstood as an actual iotest that way.

(It's named, not in tests/, doesn't have the execute bit, and doesn't
have an execution shebang.)

Signed-off-by: John Snow 

---

(1) I think that the test file discovery logic and skip list belong together,
 and that those items belong in iotests/. I think they also belong in
 whichever directory pylintrc and mypy.ini are in, also in iotests/.


Agreed.


(2) Moving this logic into python/tests/ is challenging because I'd have
 to import iotests code from elsewhere in the source tree, which just
 inverts an existing problem I have been trying to rid us of --
 needing to muck around with PYTHONPATH or sys.path hacking in python
 scripts. I'm keen to avoid this.


OK.


(3) If we moved all python tests into tests/ and gave them *.py
 extensions, we wouldn't even need the test discovery functions
 anymore, and all of linters.py could be removed entirely, including
 this execution shim. We could rely on mypy/pylint's own file
 discovery mechanisms at that point. More work than I'm up for with
 just this series, but I could be coaxed into doing it if there was
 some promise of not rejecting all that busywork ;)


I believe the only real value this would gain is that the tests would 
have nicer names and we would have to delint them.  If we find those 
goals to justify the effort, then we can put in the effort; and we can 
do that slowly, test by test.  I don’t think we must do it now in one 
big lump just to get rid of the file discovery functions.



Signed-off-by: John Snow 
---
  tests/qemu-iotests/linters.py | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/tests/qemu-iotests/linters.py b/tests/qemu-iotests/linters.py
index f6a2dc139fd..191df173064 100644
--- a/tests/qemu-iotests/linters.py
+++ b/tests/qemu-iotests/linters.py
@@ -16,6 +16,7 @@
  import os
  import re
  import subprocess
+import sys
  from typing import List, Mapping, Optional
  
  
@@ -81,3 +82,20 @@ def run_linter(
  
  return p.returncode
  
+

+def main() -> int:
+"""
+Used by the Python CI system as an entry point to run these linters.
+"""
+files = get_test_files()
+
+if sys.argv[1] == '--pylint':
+return run_linter('pylint', files)
+elif sys.argv[1] == '--mypy':
+return run_linter('mypy', files)


So I can run it as `python linters.py --pylint foo bar` and it won’t 
complain? :)


I don’t feel like it’s important, but, well, it isn’t right either.

Hanna


+
+raise ValueError(f"Unrecognized argument(s): '{sys.argv[1:]}'")
+
+
+if __name__ == '__main__':
+sys.exit(main())





Re: [PATCH 09/13] iotests: split linters.py out from 297

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Now, 297 is just the iotests-specific incantations and linters.py is as
minimal as I can think to make it. The only remaining element in here
that ought to be configuration and not code is the list of skip files,


Yeah...


but they're still numerous enough that repeating them for mypy and
pylint configurations both would be ... a hassle.


I agree.


Signed-off-by: John Snow 
---
  tests/qemu-iotests/297| 72 +++---
  tests/qemu-iotests/linters.py | 83 +++
  2 files changed, 88 insertions(+), 67 deletions(-)
  create mode 100644 tests/qemu-iotests/linters.py


I’d like to give an A-b because now the statuscode-returning function is 
in a library.  But I already gave an A-b on the last patch precisely 
because of the interface, and I shouldn’t be so grumpy.


Reviewed-by: Hanna Reitz 




Re: [PATCH 08/13] iotests/297: refactor run_[mypy|pylint] as generic execution shim

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

There's virtually nothing special here anymore; we can combine these
into a single, rather generic function.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297 | 46 +++---
  1 file changed, 25 insertions(+), 21 deletions(-)


Acked-by: Hanna Reitz 




Re: [PATCH 07/13] iotests/297: Split run_linters apart into run_pylint and run_mypy

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Signed-off-by: John Snow 

---

Note, this patch really ought to be squashed with the next one,


Yes, it should be.


but I am
performing a move known as "Hedging my bets."
It's easier to squash than de-squash :)


True.  Still, should be squashed. ;)


Signed-off-by: John Snow 
---
  tests/qemu-iotests/297 | 19 ---
  1 file changed, 12 insertions(+), 7 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH 06/13] iotests/297: Separate environment setup from test execution

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Move environment setup into main(), leaving pure test execution behind
in run_linters().

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297 | 23 ++-
  1 file changed, 14 insertions(+), 9 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH 05/13] iotests/297: Create main() function

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Instead of running "run_linters" directly, create a main() function that
will be responsible for environment setup, leaving run_linters()
responsible only for execution of the linters.

(That environment setup will be moved over in forthcoming commits.)

Signed-off-by: John Snow 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Hanna Reitz 
---
  tests/qemu-iotests/297 | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 65b1e7058c2..f9fcb039e27 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -89,8 +89,12 @@ def run_linters():
  print(p.stdout)
  
  
-for linter in ('pylint-3', 'mypy'):

-if shutil.which(linter) is None:
-iotests.notrun(f'{linter} not found')
+def main() -> None:
+for linter in ('pylint-3', 'mypy'):
+if shutil.which(linter) is None:
+iotests.notrun(f'{linter} not found')


Now that I see it here: Given patch 4, shouldn’t we replace 
`shutil.which()` by some other check?


Hanna

  
-iotests.script_main(run_linters)

+run_linters()
+
+
+iotests.script_main(main)





Re: [PATCH 03/13] iotests/297: Add get_files() function

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Split out file discovery into its own method to begin separating out
configuration/setup and test execution.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297 | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH 02/13] iotests/297: Split mypy configuration out into mypy.ini

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

More separation of code and configuration.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297  | 14 +-
  tests/qemu-iotests/mypy.ini | 12 
  2 files changed, 13 insertions(+), 13 deletions(-)
  create mode 100644 tests/qemu-iotests/mypy.ini


Reviewed-by: Hanna Reitz 


diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index bc3a0ceb2aa..b8101e6024a 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -73,19 +73,7 @@ def run_linters():
  sys.stdout.flush()
  
  env['MYPYPATH'] = env['PYTHONPATH']

-p = subprocess.run(('mypy',
-'--warn-unused-configs',
-'--disallow-subclassing-any',
-'--disallow-any-generics',
-'--disallow-incomplete-defs',
-'--disallow-untyped-decorators',
-'--no-implicit-optional',
-'--warn-redundant-casts',
-'--warn-unused-ignores',
-'--no-implicit-reexport',
-'--namespace-packages',
-'--scripts-are-modules',
-*files),
+p = subprocess.run(('mypy', *files),
 env=env,
 check=False,
 stdout=subprocess.PIPE,
diff --git a/tests/qemu-iotests/mypy.ini b/tests/qemu-iotests/mypy.ini
new file mode 100644
index 000..4c0339f5589
--- /dev/null
+++ b/tests/qemu-iotests/mypy.ini
@@ -0,0 +1,12 @@
+[mypy]
+disallow_any_generics = True
+disallow_incomplete_defs = True
+disallow_subclassing_any = True
+disallow_untyped_decorators = True
+implicit_reexport = False


Out of curiosity: Any reason you chose to invert this one, but none of 
the rest?  (i.e. no_implicit_optional = True -> implicit_optional = 
False; or disallow* = True -> allow* = False)


Hanna


+namespace_packages = True
+no_implicit_optional = True
+scripts_are_modules = True
+warn_redundant_casts = True
+warn_unused_configs = True
+warn_unused_ignores = True





Re: [PATCH 01/13] iotests/297: Move pylint config into pylintrc

2021-10-13 Thread Hanna Reitz

On 04.10.21 23:04, John Snow wrote:

Move --score=n and --notes=XXX,FIXME into pylintrc. This pulls
configuration out of code, which I think is probably a good thing in
general.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/297  |  4 +---
  tests/qemu-iotests/pylintrc | 16 
  2 files changed, 17 insertions(+), 3 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH] block/vpc: Add a sanity check that fixed-size images have the right type

2021-10-13 Thread Hanna Reitz

On 12.10.21 10:27, Thomas Huth wrote:

The code in vpc.c uses BDRVVPCState->footer.type in various places
to decide whether the image is a fixed-size (VHD_FIXED) or a dynamic
(VHD_DYNAMIC) image. However, we never check that this field really
contains VHD_FIXED if we detected a fixed size image in vpc_open(),
so a wrong value here could cause quite some trouble during runtime.

Suggested-by: Kevin Wolf 
Signed-off-by: Thomas Huth 
---
  block/vpc.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block/

Hanna




Re: [PATCH 04/15] pcie: Add callback preceding SR-IOV VFs update

2021-10-13 Thread Michael S. Tsirkin
On Tue, Oct 12, 2021 at 06:06:46PM +0200, Lukasz Maniak wrote:
> On Tue, Oct 12, 2021 at 03:25:12AM -0400, Michael S. Tsirkin wrote:
> > On Thu, Oct 07, 2021 at 06:23:55PM +0200, Lukasz Maniak wrote:
> > > PCIe devices implementing SR-IOV may need to perform certain actions
> > > before the VFs are unrealized or vice versa.
> > > 
> > > Signed-off-by: Lukasz Maniak 
> > 
> > Callbacks are annoying and easy to misuse though.
> > VFs are enabled through a config cycle, we generally just
> > have devices invoke the capability handler.
> > E.g.
> > 
> > static void pci_bridge_dev_write_config(PCIDevice *d,
> > uint32_t address, uint32_t val, int 
> > len)
> > {
> > pci_bridge_write_config(d, address, val, len);
> > if (msi_present(d)) {
> > msi_write_config(d, address, val, len);
> > }
> > }
> > 
> > this makes it easy to do whatever you want before/after
> > the write. You can also add a helper to check
> > that SRIOV is being enabled/disabled if necessary.
> > 
> > > ---
> > >  docs/pcie_sriov.txt |  2 +-
> > >  hw/pci/pcie_sriov.c | 14 +-
> > >  include/hw/pci/pcie_sriov.h |  8 +++-
> > >  3 files changed, 21 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
> > > index f5e891e1d4..63ca1a7b8e 100644
> > > --- a/docs/pcie_sriov.txt
> > > +++ b/docs/pcie_sriov.txt
> > > @@ -57,7 +57,7 @@ setting up a BAR for a VF.
> > >/* Add and initialize the SR/IOV capability */
> > >pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
> > > vf_devid, initial_vfs, total_vfs,
> > > -   fun_offset, stride);
> > > +   fun_offset, stride, pre_vfs_update_cb);
> > >  
> > >/* Set up individual VF BARs (parameters as for normal BARs) */
> > >pcie_sriov_pf_init_vf_bar( ... )
> > > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> > > index 501a1ff433..cac2aee061 100644
> > > --- a/hw/pci/pcie_sriov.c
> > > +++ b/hw/pci/pcie_sriov.c
> > > @@ -30,7 +30,8 @@ static void unregister_vfs(PCIDevice *dev);
> > >  void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> > >  const char *vfname, uint16_t vf_dev_id,
> > >  uint16_t init_vfs, uint16_t total_vfs,
> > > -uint16_t vf_offset, uint16_t vf_stride)
> > > +uint16_t vf_offset, uint16_t vf_stride,
> > > +SriovVfsUpdate pre_vfs_update)
> > >  {
> > >  uint8_t *cfg = dev->config + offset;
> > >  uint8_t *wmask;
> > > @@ -41,6 +42,7 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> > >  dev->exp.sriov_pf.num_vfs = 0;
> > >  dev->exp.sriov_pf.vfname = g_strdup(vfname);
> > >  dev->exp.sriov_pf.vf = NULL;
> > > +dev->exp.sriov_pf.pre_vfs_update = pre_vfs_update;
> > >  
> > >  pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> > >  pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride);
> > > @@ -180,6 +182,11 @@ static void register_vfs(PCIDevice *dev)
> > >  assert(dev->exp.sriov_pf.vf);
> > >  
> > >  trace_sriov_register_vfs(SRIOV_ID(dev), num_vfs);
> > > +
> > > +if (dev->exp.sriov_pf.pre_vfs_update) {
> > > +dev->exp.sriov_pf.pre_vfs_update(dev, dev->exp.sriov_pf.num_vfs, 
> > > num_vfs);
> > > +}
> > > +
> > >  for (i = 0; i < num_vfs; i++) {
> > >  dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, 
> > > dev->exp.sriov_pf.vfname, i);
> > >  if (!dev->exp.sriov_pf.vf[i]) {
> > > @@ -198,6 +205,11 @@ static void unregister_vfs(PCIDevice *dev)
> > >  uint16_t i;
> > >  
> > >  trace_sriov_unregister_vfs(SRIOV_ID(dev), num_vfs);
> > > +
> > > +if (dev->exp.sriov_pf.pre_vfs_update) {
> > > +dev->exp.sriov_pf.pre_vfs_update(dev, dev->exp.sriov_pf.num_vfs, 
> > > 0);
> > > +}
> > > +
> > >  for (i = 0; i < num_vfs; i++) {
> > >  PCIDevice *vf = dev->exp.sriov_pf.vf[i];
> > >  object_property_set_bool(OBJECT(vf), "realized", false, 
> > > _err);
> > > diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
> > > index 0974f00054..9ab48b79c0 100644
> > > --- a/include/hw/pci/pcie_sriov.h
> > > +++ b/include/hw/pci/pcie_sriov.h
> > > @@ -13,11 +13,16 @@
> > >  #ifndef QEMU_PCIE_SRIOV_H
> > >  #define QEMU_PCIE_SRIOV_H
> > >  
> > > +typedef void (*SriovVfsUpdate)(PCIDevice *dev, uint16_t prev_num_vfs,
> > > +   uint16_t num_vfs);
> > > +
> > >  struct PCIESriovPF {
> > >  uint16_t num_vfs;   /* Number of virtual functions created */
> > >  uint8_t vf_bar_type[PCI_NUM_REGIONS];  /* Store type for each VF bar 
> > > */
> > >  const char *vfname; /* Reference to the device type used for 
> > > the VFs */
> > >  PCIDevice **vf; /* Pointer to an array of num_vfs VF 
> > > devices */
> > > +
> > > +SriovVfsUpdate 

Re: [PATCH v3 0/7] Switch iotests to using Async QMP

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

Based-on: <20211012214152.802483-1-js...@redhat.com>
   [PULL 00/10] Python patches
GitLab: https://gitlab.com/jsnow/qemu/-/commits/python-aqmp-iotest-wrapper
CI: https://gitlab.com/jsnow/qemu/-/pipelines/387210591

Hiya,

This series continues where the last two AQMP series left off and adds a
synchronous 'legacy' wrapper around the new AQMP interface, then drops
it straight into iotests to prove that AQMP is functional and totally
cool and fine. The disruption and churn to iotests is pretty minimal.

In the event that a regression happens and I am not physically proximate
to inflict damage upon, one may set the QEMU_PYTHON_LEGACY_QMP variable
to any non-empty string as it pleases you to engage the QMP machinery
you are used to.

I'd like to try and get this committed early in the 6.2 development
cycle to give ample time to smooth over any possible regressions. I've
tested it locally and via gitlab CI, across Python versions 3.6 through
3.10, and "worksforme". If something bad happens, we can revert the
actual switch-flip very trivially.


So running iotests locally, I got one failure:

$ TEST_DIR=/tmp/vdi-tests ./check -c writethrough -vdi 300
[...]
300 fail   [10:28:06] [10:28:11] 
5.1s output mismatch (see 300.out.bad)

--- /home/maxx/projects/qemu/tests/qemu-iotests/300.out
+++ 300.out.bad
@@ -1,4 +1,5 @@
-...
+..ERROR:qemu.aqmp.qmp_client.qemu-b-222963:Task.Reader: 
ConnectionResetError: [Errno 104] Connection reset by peer

+.
 --
 Ran 39 tests
[...]


I’m afraid I can’t really give a reproducer or anything.  It feels like 
just some random spurious timing-related error.  Although then again, 
300 does have an `except machine.AbnormalShutdown` clause at one 
point...  So perhaps that’s the culprit, and we need to disable logging 
there.


Hanna




Re: [PATCH v3 6/7] python/aqmp: Create sync QMP wrapper for iotests

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

This is a wrapper around the async QMPClient that mimics the old,
synchronous QEMUMonitorProtocol class. It is designed to be
interchangeable with the old implementation.

It does not, however, attempt to mimic Exception compatibility.

Signed-off-by: John Snow 
---
  python/qemu/aqmp/legacy.py | 138 +
  1 file changed, 138 insertions(+)
  create mode 100644 python/qemu/aqmp/legacy.py


Acked-by: Hanna Reitz 




Re: [PATCH v3 5/7] iotests: Conditionally silence certain AQMP errors

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

AQMP likes to be very chatty about errors it encounters. In general,
this is good because it allows us to get good diagnostic information for
otherwise complex async failures.

For example, during a failed QMP connection attempt, we might see:

+ERROR:qemu.aqmp.qmp_client.qemub-2536319:Negotiation failed: EOFError
+ERROR:qemu.aqmp.qmp_client.qemub-2536319:Failed to establish session: EOFError

This might be nice in iotests output, because failure scenarios
involving the new QMP library will be spelled out plainly in the output
diffs.

For tests that are intentionally causing this scenario though, filtering
that log output could be a hassle. For now, add a context manager that
simply lets us toggle this output off during a critical region.

(Additionally, a forthcoming patch allows the use of either legacy or
async QMP to be toggled with an environment variable. In this
circumstance, we can't amend the iotest output to just always expect the
error message, either. Just suppress it for now. More rigorous log
filtering can be investigated later if/when it is deemed safe to
permanently replace the legacy QMP library.)

Signed-off-by: John Snow 
---
  tests/qemu-iotests/iotests.py | 20 +++-
  tests/qemu-iotests/tests/mirror-top-perms | 12 
  2 files changed, 27 insertions(+), 5 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH v3 4/7] iotests: Accommodate async QMP Exception classes

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

(But continue to support the old ones for now, too.)

There are very few cases of any user of QEMUMachine or a subclass
thereof relying on a QMP Exception type. If you'd like to check for
yourself, you want to grep for all of the derivatives of QMPError,
excluding 'AQMPError' and its derivatives. That'd be these:

- QMPError
- QMPConnectError
- QMPCapabilitiesError
- QMPTimeoutError
- QMPProtocolError
- QMPResponseError
- QMPBadPortError


Signed-off-by: John Snow 
---
  scripts/simplebench/bench_block_job.py| 3 ++-
  tests/qemu-iotests/tests/mirror-top-perms | 5 +++--
  2 files changed, 5 insertions(+), 3 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH v3 3/7] python/aqmp: Remove scary message

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

The scary message interferes with the iotests output. Coincidentally, if
iotests works by removing this, then it's good evidence that we don't
really need to scare people away from using it.

Signed-off-by: John Snow 
---
  python/qemu/aqmp/__init__.py | 12 
  1 file changed, 12 deletions(-)


Acked-by: Hanna Reitz 




Re: [PATCH v3 2/7] python/machine: Handle QMP errors on close more meticulously

2021-10-13 Thread Hanna Reitz

On 13.10.21 00:34, John Snow wrote:

To use the AQMP backend, Machine just needs to be a little more diligent
about what happens when closing a QMP connection. The operation is no
longer a freebie in the async world; it may return errors encountered in
the async bottom half on incoming message receipt, etc.

(AQMP's disconnect, ultimately, serves as the quiescence point where all
async contexts are gathered together, and any final errors reported at
that point.)

Because async QMP continues to check for messages asynchronously, it's
almost certainly likely that the loop will have exited due to EOF after
issuing the last 'quit' command. That error will ultimately be bubbled
up when attempting to close the QMP connection. The manager class here
then is free to discard it -- if it was expected.

Signed-off-by: John Snow 
---
  python/qemu/machine/machine.py | 48 +-
  1 file changed, 42 insertions(+), 6 deletions(-)


Reviewed-by: Hanna Reitz