> On 1 Sep 2023, at 8:00 PM, Markus Armbruster <arm...@redhat.com> wrote:
> 
> Li Feng <fen...@smartx.com <mailto:fen...@smartx.com>> writes:
> 
>> If the backend crashes and restarts, the device is broken.
>> This patch adds reconnect for vhost-user-scsi.
>> 
>> Tested with spdk backend.
>> 
>> Signed-off-by: Li Feng <fen...@smartx.com>
>> ---
>> hw/scsi/vhost-user-scsi.c           | 199 +++++++++++++++++++++++++---
>> include/hw/virtio/vhost-user-scsi.h |   4 +
>> 2 files changed, 184 insertions(+), 19 deletions(-)
>> 
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index ee99b19e7a..5bf012461b 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -43,26 +43,54 @@ enum VhostUserProtocolFeature {
>>     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>> };
>> 
>> +static int vhost_user_scsi_start(VHostUserSCSI *s)
>> +{
>> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +    int ret;
>> +
>> +    ret = vhost_scsi_common_start(vsc);
>> +    s->started_vu = (ret < 0 ? false : true);
>> +
>> +    return ret;
>> +}
>> +
>> +static void vhost_user_scsi_stop(VHostUserSCSI *s)
>> +{
>> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +
>> +    if (!s->started_vu) {
>> +        return;
>> +    }
>> +    s->started_vu = false;
>> +
>> +    vhost_scsi_common_stop(vsc);
>> +}
>> +
>> static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>     VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>> +    DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
>>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> -    bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +    bool should_start = virtio_device_should_start(vdev, status);
>> +    int ret;
>> 
>> -    if (vhost_dev_is_started(&vsc->dev) == start) {
>> +    if (!s->connected) {
>>         return;
>>     }
>> 
>> -    if (start) {
>> -        int ret;
>> +    if (vhost_dev_is_started(&vsc->dev) == should_start) {
>> +        return;
>> +    }
>> 
>> -        ret = vhost_scsi_common_start(vsc);
>> +    if (should_start) {
>> +        ret = vhost_user_scsi_start(s);
>>         if (ret < 0) {
>>             error_report("unable to start vhost-user-scsi: %s", 
>> strerror(-ret));
>> -            exit(1);
>> +            qemu_chr_fe_disconnect(&vs->conf.chardev);
>>         }
>>     } else {
>> -        vhost_scsi_common_stop(vsc);
>> +        vhost_user_scsi_stop(s);
>>     }
>> }
>> 
>> @@ -89,14 +117,126 @@ static void vhost_dummy_handle_output(VirtIODevice 
>> *vdev, VirtQueue *vq)
>> {
>> }
>> 
>> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
>> +{
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
>> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +    int ret = 0;
>> +
>> +    if (s->connected) {
>> +        return 0;
>> +    }
>> +    s->connected = true;
>> +
>> +    vsc->dev.num_queues = vs->conf.num_queues;
>> +    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
>> +    vsc->dev.vqs = s->vhost_vqs;
>> +    vsc->dev.vq_index = 0;
>> +    vsc->dev.backend_features = 0;
>> +
>> +    ret = vhost_dev_init(&vsc->dev, &s->vhost_user, 
>> VHOST_BACKEND_TYPE_USER, 0,
>> +                         errp);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    /* restore vhost state */
>> +    if (virtio_device_started(vdev, vdev->status)) {
>> +        ret = vhost_user_scsi_start(s);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
> 
> Fails without setting an error, violating the function's (tacit)
> postcondition.  Callers:
> 
> * vhost_user_scsi_event()
> 
>  Dereferences the null error and crashes.
> 
> * vhost_user_scsi_realize_connect()
> 
>  Also fails without setting an error.  Caller:
> 
>  - vhost_user_scsi_realize()
> 
>    1. Doesn't emit the "Reconnecting after error: " message.  See
>       also below.
> 
>    2. Succeeds instead of failing!
Ack.

> 
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event);
>> +
>> +static void vhost_user_scsi_disconnect(DeviceState *dev)
>> +{
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
>> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +
>> +    if (!s->connected) {
>> +        return;
>> +    }
>> +    s->connected = false;
>> +
>> +    vhost_user_scsi_stop(s);
>> +
>> +    vhost_dev_cleanup(&vsc->dev);
>> +
>> +    /* Re-instate the event handler for new connections */
>> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
>> +                             vhost_user_scsi_event, NULL, dev, NULL, true);
>> +}
>> +
>> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
>> +{
>> +    DeviceState *dev = opaque;
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
>> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +    Error *local_err = NULL;
>> +
>> +    switch (event) {
>> +    case CHR_EVENT_OPENED:
>> +        if (vhost_user_scsi_connect(dev, &local_err) < 0) {
>> +            error_report_err(local_err);
>> +            qemu_chr_fe_disconnect(&vs->conf.chardev);
>> +            return;
>> +        }
>> +        break;
>> +    case CHR_EVENT_CLOSED:
>> +        /* defer close until later to avoid circular close */
>> +        vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
>> +                               vhost_user_scsi_disconnect);
>> +        break;
>> +    case CHR_EVENT_BREAK:
>> +    case CHR_EVENT_MUX_IN:
>> +    case CHR_EVENT_MUX_OUT:
>> +        /* Ignore */
>> +        break;
>> +    }
>> +}
>> +
>> +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp)
>> +{
>> +    DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> +    int ret;
>> +
>> +    s->connected = false;
>> +
>> +    ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    ret = vhost_user_scsi_connect(dev, errp);
>> +    if (ret < 0) {
>> +        qemu_chr_fe_disconnect(&vs->conf.chardev);
>> +        return ret;
>> +    }
>> +    assert(s->connected);
>> +
>> +    return 0;
>> +}
>> +
>> static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
>> {
>>     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>>     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> -    struct vhost_virtqueue *vqs = NULL;
>>     Error *err = NULL;
>>     int ret;
>> +    int retries = VU_REALIZE_CONN_RETRIES;
>> 
>>     if (!vs->conf.chardev.chr) {
>>         error_setg(errp, "vhost-user-scsi: missing chardev");
>> @@ -115,18 +255,28 @@ static void vhost_user_scsi_realize(DeviceState *dev, 
>> Error **errp)
>>         goto free_virtio;
>>     }
>> 
>> -    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
>> -    vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
>> -    vsc->dev.vq_index = 0;
>> -    vsc->dev.backend_features = 0;
>> -    vqs = vsc->dev.vqs;
>> +    vsc->inflight = g_new0(struct vhost_inflight, 1);
>> +    s->vhost_vqs = g_new0(struct vhost_virtqueue,
>> +                          VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues);
>> +
>> +    assert(!*errp);
> 
> Dereferencing *errp is wrong.  Quoting qapi/error.h's big comment:
> 
> * = Why, when and how to use ERRP_GUARD() =
> *
> * Without ERRP_GUARD(), use of the @errp parameter is restricted:
> * - It must not be dereferenced, because it may be null.
> * - It should not be passed to error_prepend() or
> *   error_append_hint(), because that doesn't work with &error_fatal.
> * ERRP_GUARD() lifts these restrictions.
> 
> See there for how to use ERRP_GUARD().

Thanks, good catch. I have understood this knowledge.
> 
>> +    do {
>> +        if (*errp) {
>> +            error_prepend(errp, "Reconnecting after error: ");
>> +            error_report_err(*errp);
>> +            *errp = NULL;
>> +        }
>> +        ret = vhost_user_scsi_realize_connect(s, errp);
>> +    } while (ret < 0 && retries--);
> 
> Reports "Reconnecting ..." to the HMP monitor when in HMP context, else
> to stderr.  I.e. reports to stderr when we're in QMP context.  Is this
> really what we want?
The vhost-user-blk has the same code, so just keep it the same here is a
good idea?

> 
>> 
>> -    ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
>> -                         VHOST_BACKEND_TYPE_USER, 0, errp);
>>     if (ret < 0) {
>>         goto free_vhost;
>>     }
>> 
>> +    /* we're fully initialized, now we can operate, so add the handler */
>> +    qemu_chr_fe_set_handlers(&vs->conf.chardev,  NULL, NULL,
>> +                             vhost_user_scsi_event, NULL, (void *)dev,
>> +                             NULL, true);
>>     /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
>>     vsc->channel = 0;
>>     vsc->lun = 0;
>> @@ -135,8 +285,12 @@ static void vhost_user_scsi_realize(DeviceState *dev, 
>> Error **errp)
>>     return;
>> 
>> free_vhost:
>> +    g_free(s->vhost_vqs);
>> +    s->vhost_vqs = NULL;
>> +    g_free(vsc->inflight);
>> +    vsc->inflight = NULL;
>>     vhost_user_cleanup(&s->vhost_user);
>> -    g_free(vqs);
>> +
>> free_virtio:
>>     virtio_scsi_common_unrealize(dev);
>> }
>> @@ -146,16 +300,23 @@ static void vhost_user_scsi_unrealize(DeviceState *dev)
>>     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> -    struct vhost_virtqueue *vqs = vsc->dev.vqs;
>> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>> 
>>     /* This will stop the vhost backend. */
>>     vhost_user_scsi_set_status(vdev, 0);
>> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, 
>> NULL,
>> +                             NULL, false);
>> 
>>     vhost_dev_cleanup(&vsc->dev);
>> -    g_free(vqs);
>> +    g_free(s->vhost_vqs);
>> +    s->vhost_vqs = NULL;
>> +
>> +    vhost_dev_free_inflight(vsc->inflight);
>> +    g_free(vsc->inflight);
>> +    vsc->inflight = NULL;
>> 
>> -    virtio_scsi_common_unrealize(dev);
>>     vhost_user_cleanup(&s->vhost_user);
>> +    virtio_scsi_common_unrealize(dev);
>> }
>> 
>> static Property vhost_user_scsi_properties[] = {
>> diff --git a/include/hw/virtio/vhost-user-scsi.h 
>> b/include/hw/virtio/vhost-user-scsi.h
>> index 521b08e559..b405ec952a 100644
>> --- a/include/hw/virtio/vhost-user-scsi.h
>> +++ b/include/hw/virtio/vhost-user-scsi.h
>> @@ -29,6 +29,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI)
>> struct VHostUserSCSI {
>>     VHostSCSICommon parent_obj;
>>     VhostUserState vhost_user;
>> +    bool connected;
>> +    bool started_vu;
>> +
>> +    struct vhost_virtqueue *vhost_vqs;
>> };
>> 
>> #endif /* VHOST_USER_SCSI_H */

Reply via email to