On 6/23/26 9:27 PM, Maciej S. Szmigiero wrote:
> On 19.06.2026 12:55, Andrey Drobyshev wrote:
>> During CPR migration, instead of reopening /dev/vhost-vsock device on
>> each initialization, let's make sure the already open FD is preserved and
>> properly reused on a subsequent initialization.
>>
>> Signed-off-by: Andrey Drobyshev <[email protected]>
>> ---
>> hw/virtio/vhost-vsock.c | 25 ++++++++++++++++++++-----
>> 1 file changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
>> index da244eb1657..ea9a7d2149d 100644
>> --- a/hw/virtio/vhost-vsock.c
>> +++ b/hw/virtio/vhost-vsock.c
>> @@ -20,6 +20,7 @@
>> #include "hw/core/qdev-properties.h"
>> #include "hw/virtio/vhost-vsock.h"
>> #include "monitor/monitor.h"
>> +#include "migration/cpr.h"
>>
>> static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config)
>> {
>> @@ -126,6 +127,7 @@ static void vhost_vsock_device_realize(DeviceState *dev,
>> Error **errp)
>> VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
>> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> VHostVSock *vsock = VHOST_VSOCK(dev);
>> + DeviceState *proxy = qdev_get_parent_bus(DEVICE(vsock))->parent;
>> int vhostfd;
>> int ret;
>>
>> @@ -141,7 +143,8 @@ static void vhost_vsock_device_realize(DeviceState *dev,
>> Error **errp)
>> }
>>
>> if (vsock->conf.vhostfd) {
>> - vhostfd = monitor_fd_param(monitor_cur(), vsock->conf.vhostfd,
>> errp);
>> + vhostfd = cpr_get_fd_param(proxy->id, vsock->conf.vhostfd, 0,
>
> Is this proxy->id *guaranteed* unique in a QEMU instance?
>
It should be if it exists. But it may not, in which case we'll crash on
cpr_save_fd(NULL) -> strlen(NULL).
I guess we might only cpr_save_fd() this device conditionally, i.e. only
if it has ID set, and then add a migration blocker which would refuse
CPR for ID-less device.
>> + errp);
>> if (vhostfd == -1) {
>> error_prepend(errp, "vhost-vsock: unable to parse vhostfd: ");
>> return;
>> @@ -151,10 +154,20 @@ static void vhost_vsock_device_realize(DeviceState
>> *dev, Error **errp)
>> return;
>> }
>> } else {
>> - vhostfd = open("/dev/vhost-vsock", O_RDWR);
>> - if (vhostfd < 0) {
>> - error_setg_file_open(errp, errno, "/dev/vhost-vsock");
>> - return;
>> + if (cpr_is_incoming()) {
>> + vhostfd = cpr_find_fd(proxy->id, 0);
>> + if (vhostfd < 0) {
>> + error_setg(errp, "vhost-vsock: could not restore vhost FD "
>> + "for %s", proxy->id);
>> + return;
>> + }
>> + } else {
>> + vhostfd = open("/dev/vhost-vsock", O_RDWR);
>> + if (vhostfd < 0) {
>> + error_setg_file_open(errp, errno, "/dev/vhost-vsock");
>> + return;
>> + }
>> + cpr_save_fd(proxy->id, 0, vhostfd);
>> }
>>
>> if (!qemu_set_blocking(vhostfd, false, errp)) {
>> return;
>> }
>
> So if qemu_set_blocking() fails the FD leaks (existing bug) and now also
> CPR has this dangling FD saved?
>
> Furthermore, if calls further down vhost_vsock_device_realize() fail
> AFAIK vhost_vsock_device_unrealize() won't get called so this FD won't
> be removed from CPR - even in something else in vhost stack actually closes
> this FD during error cleanup.
>
Agreed on both. It seems moving cpr_save_fd() to just before the final
return would remedy both. I.e. only register the FD on successful device
registration.
Thanks for pointing out these bugs.
>> @@ -193,10 +206,12 @@ static void vhost_vsock_device_unrealize(DeviceState
>> *dev)
>> {
>> VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
>> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> + DeviceState *proxy = qdev_get_parent_bus(dev)->parent;
>>
>> /* This will stop vhost backend if appropriate. */
>> vhost_vsock_set_status(vdev, 0);
>>
>> + cpr_delete_fd(proxy->id, 0);
>> vhost_dev_cleanup(&vvc->vhost_dev);
>> vhost_vsock_common_unrealize(vdev);
>> }
>
> Thanks,
> Maciej
>