> On 1 Sep 2023, at 8:00 PM, Markus Armbruster <arm...@redhat.com> wrote: > > Li Feng <fen...@smartx.com <mailto:fen...@smartx.com>> writes: > >> If the backend crashes and restarts, the device is broken. >> This patch adds reconnect for vhost-user-scsi. >> >> Tested with spdk backend. >> >> Signed-off-by: Li Feng <fen...@smartx.com> >> --- >> hw/scsi/vhost-user-scsi.c | 199 +++++++++++++++++++++++++--- >> include/hw/virtio/vhost-user-scsi.h | 4 + >> 2 files changed, 184 insertions(+), 19 deletions(-) >> >> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c >> index ee99b19e7a..5bf012461b 100644 >> --- a/hw/scsi/vhost-user-scsi.c >> +++ b/hw/scsi/vhost-user-scsi.c >> @@ -43,26 +43,54 @@ enum VhostUserProtocolFeature { >> VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, >> }; >> >> +static int vhost_user_scsi_start(VHostUserSCSI *s) >> +{ >> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> + int ret; >> + >> + ret = vhost_scsi_common_start(vsc); >> + s->started_vu = (ret < 0 ? false : true); >> + >> + return ret; >> +} >> + >> +static void vhost_user_scsi_stop(VHostUserSCSI *s) >> +{ >> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> + >> + if (!s->started_vu) { >> + return; >> + } >> + s->started_vu = false; >> + >> + vhost_scsi_common_stop(vsc); >> +} >> + >> static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) >> { >> VHostUserSCSI *s = (VHostUserSCSI *)vdev; >> + DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj; >> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> - bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> + bool should_start = virtio_device_should_start(vdev, status); >> + int ret; >> >> - if (vhost_dev_is_started(&vsc->dev) == start) { >> + if (!s->connected) { >> return; >> } >> >> - if (start) { >> - int ret; >> + if (vhost_dev_is_started(&vsc->dev) == should_start) { >> + return; >> + } >> >> - ret = vhost_scsi_common_start(vsc); >> + if (should_start) { >> + ret = vhost_user_scsi_start(s); >> if (ret < 0) { >> error_report("unable to start vhost-user-scsi: %s", >> strerror(-ret)); >> - exit(1); >> + qemu_chr_fe_disconnect(&vs->conf.chardev); >> } >> } else { >> - vhost_scsi_common_stop(vsc); >> + vhost_user_scsi_stop(s); >> } >> } >> >> @@ -89,14 +117,126 @@ static void vhost_dummy_handle_output(VirtIODevice >> *vdev, VirtQueue *vq) >> { >> } >> >> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp) >> +{ >> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> + int ret = 0; >> + >> + if (s->connected) { >> + return 0; >> + } >> + s->connected = true; >> + >> + vsc->dev.num_queues = vs->conf.num_queues; >> + vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; >> + vsc->dev.vqs = s->vhost_vqs; >> + vsc->dev.vq_index = 0; >> + vsc->dev.backend_features = 0; >> + >> + ret = vhost_dev_init(&vsc->dev, &s->vhost_user, >> VHOST_BACKEND_TYPE_USER, 0, >> + errp); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + /* restore vhost state */ >> + if (virtio_device_started(vdev, vdev->status)) { >> + ret = vhost_user_scsi_start(s); >> + if (ret < 0) { >> + return ret; >> + } > > Fails without setting an error, violating the function's (tacit) > postcondition. Callers: > > * vhost_user_scsi_event() > > Dereferences the null error and crashes. > > * vhost_user_scsi_realize_connect() > > Also fails without setting an error. Caller: > > - vhost_user_scsi_realize() > > 1. Doesn't emit the "Reconnecting after error: " message. See > also below. > > 2. Succeeds instead of failing! Ack.
> >> + } >> + >> + return 0; >> +} >> + >> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event); >> + >> +static void vhost_user_scsi_disconnect(DeviceState *dev) >> +{ >> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> + >> + if (!s->connected) { >> + return; >> + } >> + s->connected = false; >> + >> + vhost_user_scsi_stop(s); >> + >> + vhost_dev_cleanup(&vsc->dev); >> + >> + /* Re-instate the event handler for new connections */ >> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, >> + vhost_user_scsi_event, NULL, dev, NULL, true); >> +} >> + >> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) >> +{ >> + DeviceState *dev = opaque; >> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> + Error *local_err = NULL; >> + >> + switch (event) { >> + case CHR_EVENT_OPENED: >> + if (vhost_user_scsi_connect(dev, &local_err) < 0) { >> + error_report_err(local_err); >> + qemu_chr_fe_disconnect(&vs->conf.chardev); >> + return; >> + } >> + break; >> + case CHR_EVENT_CLOSED: >> + /* defer close until later to avoid circular close */ >> + vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, >> + vhost_user_scsi_disconnect); >> + break; >> + case CHR_EVENT_BREAK: >> + case CHR_EVENT_MUX_IN: >> + case CHR_EVENT_MUX_OUT: >> + /* Ignore */ >> + break; >> + } >> +} >> + >> +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp) >> +{ >> + DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj; >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> + int ret; >> + >> + s->connected = false; >> + >> + ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + ret = vhost_user_scsi_connect(dev, errp); >> + if (ret < 0) { >> + qemu_chr_fe_disconnect(&vs->conf.chardev); >> + return ret; >> + } >> + assert(s->connected); >> + >> + return 0; >> +} >> + >> static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) >> { >> VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> VHostUserSCSI *s = VHOST_USER_SCSI(dev); >> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> - struct vhost_virtqueue *vqs = NULL; >> Error *err = NULL; >> int ret; >> + int retries = VU_REALIZE_CONN_RETRIES; >> >> if (!vs->conf.chardev.chr) { >> error_setg(errp, "vhost-user-scsi: missing chardev"); >> @@ -115,18 +255,28 @@ static void vhost_user_scsi_realize(DeviceState *dev, >> Error **errp) >> goto free_virtio; >> } >> >> - vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; >> - vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); >> - vsc->dev.vq_index = 0; >> - vsc->dev.backend_features = 0; >> - vqs = vsc->dev.vqs; >> + vsc->inflight = g_new0(struct vhost_inflight, 1); >> + s->vhost_vqs = g_new0(struct vhost_virtqueue, >> + VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues); >> + >> + assert(!*errp); > > Dereferencing *errp is wrong. Quoting qapi/error.h's big comment: > > * = Why, when and how to use ERRP_GUARD() = > * > * Without ERRP_GUARD(), use of the @errp parameter is restricted: > * - It must not be dereferenced, because it may be null. > * - It should not be passed to error_prepend() or > * error_append_hint(), because that doesn't work with &error_fatal. > * ERRP_GUARD() lifts these restrictions. > > See there for how to use ERRP_GUARD(). Thanks, good catch. I have understood this knowledge. > >> + do { >> + if (*errp) { >> + error_prepend(errp, "Reconnecting after error: "); >> + error_report_err(*errp); >> + *errp = NULL; >> + } >> + ret = vhost_user_scsi_realize_connect(s, errp); >> + } while (ret < 0 && retries--); > > Reports "Reconnecting ..." to the HMP monitor when in HMP context, else > to stderr. I.e. reports to stderr when we're in QMP context. Is this > really what we want? The vhost-user-blk has the same code, so just keep it the same here is a good idea? > >> >> - ret = vhost_dev_init(&vsc->dev, &s->vhost_user, >> - VHOST_BACKEND_TYPE_USER, 0, errp); >> if (ret < 0) { >> goto free_vhost; >> } >> >> + /* we're fully initialized, now we can operate, so add the handler */ >> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, >> + vhost_user_scsi_event, NULL, (void *)dev, >> + NULL, true); >> /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ >> vsc->channel = 0; >> vsc->lun = 0; >> @@ -135,8 +285,12 @@ static void vhost_user_scsi_realize(DeviceState *dev, >> Error **errp) >> return; >> >> free_vhost: >> + g_free(s->vhost_vqs); >> + s->vhost_vqs = NULL; >> + g_free(vsc->inflight); >> + vsc->inflight = NULL; >> vhost_user_cleanup(&s->vhost_user); >> - g_free(vqs); >> + >> free_virtio: >> virtio_scsi_common_unrealize(dev); >> } >> @@ -146,16 +300,23 @@ static void vhost_user_scsi_unrealize(DeviceState *dev) >> VirtIODevice *vdev = VIRTIO_DEVICE(dev); >> VHostUserSCSI *s = VHOST_USER_SCSI(dev); >> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >> - struct vhost_virtqueue *vqs = vsc->dev.vqs; >> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >> >> /* This will stop the vhost backend. */ >> vhost_user_scsi_set_status(vdev, 0); >> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, >> NULL, >> + NULL, false); >> >> vhost_dev_cleanup(&vsc->dev); >> - g_free(vqs); >> + g_free(s->vhost_vqs); >> + s->vhost_vqs = NULL; >> + >> + vhost_dev_free_inflight(vsc->inflight); >> + g_free(vsc->inflight); >> + vsc->inflight = NULL; >> >> - virtio_scsi_common_unrealize(dev); >> vhost_user_cleanup(&s->vhost_user); >> + virtio_scsi_common_unrealize(dev); >> } >> >> static Property vhost_user_scsi_properties[] = { >> diff --git a/include/hw/virtio/vhost-user-scsi.h >> b/include/hw/virtio/vhost-user-scsi.h >> index 521b08e559..b405ec952a 100644 >> --- a/include/hw/virtio/vhost-user-scsi.h >> +++ b/include/hw/virtio/vhost-user-scsi.h >> @@ -29,6 +29,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI) >> struct VHostUserSCSI { >> VHostSCSICommon parent_obj; >> VhostUserState vhost_user; >> + bool connected; >> + bool started_vu; >> + >> + struct vhost_virtqueue *vhost_vqs; >> }; >> >> #endif /* VHOST_USER_SCSI_H */