Very excited to see this. High level looks good modulo a few small things.

My major concern is around existing vhost-user-scsi backends which don’t 
support VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD. IMO we should hide the reconnect 
behavior behind a VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD check. We may want to do 
the same for vhost-user-blk.

The question is then what happens if the check is false. IIUC without an 
inflight FD, if a device processes requests out of order, it’s not safe to 
continue execution on reconnect, as there’s no way for the backend to know how 
to replay IO. Should we permanently wedge the device or have QEMU fail out? May 
be nice to have a toggle for this.

> On Jul 21, 2023, at 6:51 AM, Li Feng <fen...@smartx.com> wrote:
> 
> If the backend crashes and restarts, the device is broken.
> This patch adds reconnect for vhost-user-scsi.
> 
> Tested with spdk backend.
> 
> Signed-off-by: Li Feng <fen...@smartx.com>
> ---
> hw/block/vhost-user-blk.c           |   2 -
> hw/scsi/vhost-scsi-common.c         |  27 ++---
> hw/scsi/vhost-user-scsi.c           | 163 +++++++++++++++++++++++++---
> include/hw/virtio/vhost-user-scsi.h |   3 +
> include/hw/virtio/vhost.h           |   2 +
> 5 files changed, 165 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index eecf3f7a81..f250c740b5 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -32,8 +32,6 @@
> #include "sysemu/sysemu.h"
> #include "sysemu/runstate.h"
> 
> -#define REALIZE_CONNECTION_RETRIES 3
> -
> static const int user_feature_bits[] = {
>     VIRTIO_BLK_F_SIZE_MAX,
>     VIRTIO_BLK_F_SEG_MAX,
> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c

Why can’t all the vhost-scsi-common stuff be moved to a separate change?

Especially the stuff introduced for vhost-user-blk in 
1b0063b3048af65dfaae6422a572c87db8575a92 should be moved out.

> index a06f01af26..08801886b8 100644
> --- a/hw/scsi/vhost-scsi-common.c
> +++ b/hw/scsi/vhost-scsi-common.c
> @@ -52,16 +52,22 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
> 
>     vsc->dev.acked_features = vdev->guest_features;
> 
> -    assert(vsc->inflight == NULL);
> -    vsc->inflight = g_new0(struct vhost_inflight, 1);
> -    ret = vhost_dev_get_inflight(&vsc->dev,
> -                                 vs->conf.virtqueue_size,
> -                                 vsc->inflight);
> +    ret = vhost_dev_prepare_inflight(&vsc->dev, vdev);
>     if (ret < 0) {
> -        error_report("Error get inflight: %d", -ret);
> +        error_report("Error setting inflight format: %d", -ret);
>         goto err_guest_notifiers;
>     }
> 
> +    if (!vsc->inflight->addr) {
> +        ret = vhost_dev_get_inflight(&vsc->dev,
> +                                    vs->conf.virtqueue_size,
> +                                    vsc->inflight);
> +        if (ret < 0) {
> +            error_report("Error get inflight: %d", -ret);
> +            goto err_guest_notifiers;
> +        }
> +    }
> +
>     ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight);
>     if (ret < 0) {
>         error_report("Error set inflight: %d", -ret);
> @@ -85,9 +91,6 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc)
>     return ret;
> 
> err_guest_notifiers:
> -    g_free(vsc->inflight);
> -    vsc->inflight = NULL;
> -
>     k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
> err_host_notifiers:
>     vhost_dev_disable_notifiers(&vsc->dev, vdev);
> @@ -111,12 +114,6 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>     }
>     assert(ret >= 0);
> 

In the vhost-scsi (kernel backend) path, what will cleanup vsc->inflight now?

> -    if (vsc->inflight) {
> -        vhost_dev_free_inflight(vsc->inflight);
> -        g_free(vsc->inflight);
> -        vsc->inflight = NULL;
> -    }
> -
>     vhost_dev_disable_notifiers(&vsc->dev, vdev);
> }
> 
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index ee99b19e7a..e0e88b0c42 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -89,14 +89,126 @@ static void vhost_dummy_handle_output(VirtIODevice 
> *vdev, VirtQueue *vq)
> {
> }
> 
> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    int ret = 0;
> +
> +    if (s->connected) {
> +        return 0;
> +    }
> +    s->connected = true;
> +
> +    vsc->dev.num_queues = vs->conf.num_queues;
> +    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
> +    vsc->dev.vqs = s->vhost_vqs;
> +    vsc->dev.vq_index = 0;
> +    vsc->dev.backend_features = 0;
> +
> +    ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 
> 0,
> +                         errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    /* restore vhost state */

Should this use virtio_device_should_start like vhost_user_blk?

> +    if (virtio_device_started(vdev, vdev->status)) {
> +        ret = vhost_scsi_common_start(vsc);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event);
> +
> +static void vhost_user_scsi_disconnect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +

I don’t think we want to execute vhost_scsi_common_stop() if the device hasn’t 
been started. I remember that caused a number of races with the vhost_user_blk 
connecting/disconnecting on startup.

Let’s add a similar started_vu check?

> +    if (!s->connected) {
> +        return;
> +    }
> +    s->connected = false;
> +
> +    vhost_scsi_common_stop(vsc);
> +
> +    vhost_dev_cleanup(&vsc->dev);
> +
> +    /* Re-instate the event handler for new connections */
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
> +                             vhost_user_scsi_event, NULL, dev, NULL, true);
> +}
> +
> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    Error *local_err = NULL;
> +
> +    switch (event) {
> +    case CHR_EVENT_OPENED:
> +        if (vhost_user_scsi_connect(dev, &local_err) < 0) {
> +            error_report_err(local_err);
> +            qemu_chr_fe_disconnect(&vs->conf.chardev);
> +            return;
> +        }
> +        break;
> +    case CHR_EVENT_CLOSED:
> +        /* defer close until later to avoid circular close */
> +        vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> +                               vhost_user_scsi_disconnect);
> +        break;
> +    case CHR_EVENT_BREAK:
> +    case CHR_EVENT_MUX_IN:
> +    case CHR_EVENT_MUX_OUT:
> +        /* Ignore */
> +        break;
> +    }
> +}
> +
> +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp)
> +{
> +    DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    int ret;
> +
> +    s->connected = false;
> +
> +    ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = vhost_user_scsi_connect(dev, errp);
> +    if (ret < 0) {
> +        qemu_chr_fe_disconnect(&vs->conf.chardev);
> +        return ret;
> +    }
> +    assert(s->connected);
> +
> +    return 0;
> +}
> +
> static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
> {
>     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> -    struct vhost_virtqueue *vqs = NULL;
>     Error *err = NULL;
>     int ret;
> +    int retries = REALIZE_CONNECTION_RETRIES;
> 
>     if (!vs->conf.chardev.chr) {
>         error_setg(errp, "vhost-user-scsi: missing chardev");
> @@ -112,21 +224,31 @@ static void vhost_user_scsi_realize(DeviceState *dev, 
> Error **errp)
>     }
> 
>     if (!vhost_user_init(&s->vhost_user, &vs->conf.chardev, errp)) {

Why execute vhost_user_cleanup() if vhost_user_init() fails?

> -        goto free_virtio;
> +        goto free_vhost;
>     }
> 
> -    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
> -    vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
> -    vsc->dev.vq_index = 0;
> -    vsc->dev.backend_features = 0;
> -    vqs = vsc->dev.vqs;
> +    vsc->inflight = g_new0(struct vhost_inflight, 1);
> +    s->vhost_vqs = g_new0(struct vhost_virtqueue,
> +                          VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues);
> +
> +    assert(!*errp);
> +    do {
> +        if (*errp) {
> +            error_prepend(errp, "Reconnecting after error: ");
> +            error_report_err(*errp);
> +            *errp = NULL;
> +        }
> +        ret = vhost_user_scsi_realize_connect(s, errp);
> +    } while (ret < 0 && retries--);
> 
> -    ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
> -                         VHOST_BACKEND_TYPE_USER, 0, errp);
>     if (ret < 0) {
> -        goto free_vhost;
> +        goto free_vqs;
>     }
> 
> +    /* we're fully initialized, now we can operate, so add the handler */
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev,  NULL, NULL,
> +                             vhost_user_scsi_event, NULL, (void *)dev,
> +                             NULL, true);
>     /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
>     vsc->channel = 0;
>     vsc->lun = 0;
> @@ -134,10 +256,15 @@ static void vhost_user_scsi_realize(DeviceState *dev, 
> Error **errp)
> 
>     return;
> 
> +free_vqs:
> +    g_free(s->vhost_vqs);
> +    s->vhost_vqs = NULL;
> +    g_free(vsc->inflight);
> +    vsc->inflight = NULL;
> +
> free_vhost:
>     vhost_user_cleanup(&s->vhost_user);
> -    g_free(vqs);
> -free_virtio:
> +
>     virtio_scsi_common_unrealize(dev);
> }
> 
> @@ -146,16 +273,22 @@ static void vhost_user_scsi_unrealize(DeviceState *dev)
>     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> -    struct vhost_virtqueue *vqs = vsc->dev.vqs;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> 
>     /* This will stop the vhost backend. */
>     vhost_user_scsi_set_status(vdev, 0);
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, NULL,
> +                             NULL, false);
> 
>     vhost_dev_cleanup(&vsc->dev);
> -    g_free(vqs);

Nit: Why not put vhost_dev_free_inflight next to the remaining inflight cleanup?

> +    vhost_dev_free_inflight(vsc->inflight);
> +    g_free(s->vhost_vqs);
> +    s->vhost_vqs = NULL;
> +    g_free(vsc->inflight);
> +    vsc->inflight = NULL;
> 

Curiosity - why reorder here? Is something in vhost_user_cleanup() dependent on 
state freed in virtio_scsi_common_unrealize()?

If so, should that go as a standalone fix?

> -    virtio_scsi_common_unrealize(dev);
>     vhost_user_cleanup(&s->vhost_user);
> +    virtio_scsi_common_unrealize(dev);
> }
> 
> static Property vhost_user_scsi_properties[] = {
> diff --git a/include/hw/virtio/vhost-user-scsi.h 
> b/include/hw/virtio/vhost-user-scsi.h
> index 521b08e559..c66acc68b7 100644
> --- a/include/hw/virtio/vhost-user-scsi.h
> +++ b/include/hw/virtio/vhost-user-scsi.h
> @@ -29,6 +29,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI)
> struct VHostUserSCSI {
>     VHostSCSICommon parent_obj;
>     VhostUserState vhost_user;

See above - we should probably have started_vu here/

Maybe we should have some shared struct with vhost_user_blk for connectivity 
params?

> +    bool connected;
> +
> +    struct vhost_virtqueue *vhost_vqs;
> };
> 
> #endif /* VHOST_USER_SCSI_H */
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 6a173cb9fa..b904346fe1 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -8,6 +8,8 @@
> #define VHOST_F_DEVICE_IOTLB 63
> #define VHOST_USER_F_PROTOCOL_FEATURES 30
> 

Should the macro name indicate that this is for vhost-user?

VU_REALIZE_CONN_RETRIES? 

> +#define REALIZE_CONNECTION_RETRIES 3
> +
> /* Generic structures common for any vhost based device. */
> 
> struct vhost_inflight {
> -- 
> 2.41.0
> 

Reply via email to