Dave Voutila <[email protected]> writes:
> mbuhl@ found an issue where the emulated virtio block device can
> hang. The tl;dr: the emulated pic never injects an interrupt and the
> vioblk(4) driver in the guest starves, waiting to be told to check the
> virtqueue.
>
> Flipping the bit in the i8259 using gdb causes the spice to flow once
> again.
>
> This diff fixes two related things (so could be committed in 2 parts):
>
> 1. the irq deassert on a virtio pci interrupt status register read
> should occur synchronously by the vcpu thread in the vm and not
> async.
>
> 2. always raise the irr bit in the pic, regardless of mask. The mask is
> used when finding an irq to ack and shouldn't be used to determine if
> the irr bit is raised. AFAICT from the old i8259 data sheets, masking
> should have no effect on if the irr bit is set.
>
> This is holding up another diff I want to share that reduces latency in
> interrupts, but it's shown to make this i8259 race condition worse, so
> I'd like to give folks a few days to check if the below diff causes
> regressions. Once this is committed, I'll feel safe sharing the latency
> diff with tech@.
>
> Any ok's pending a few days for folks to test?
Still looking for ok's or test reports (other than from mbuhl@, of
course.)
>
> -dv
>
>
> diff refs/heads/master cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> commit - ad1cd1152fddbf55189657a2df9f2468409698ab
> commit + cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> blob - f7862f5e9d17f96f5260358271fab8f253b26505
> blob + b6891c52c824d7b8c69e67e5323772152b1ed844
> --- usr.sbin/vmd/i8259.c
> +++ usr.sbin/vmd/i8259.c
> @@ -222,20 +222,16 @@ i8259_assert_irq(uint8_t irq)
> {
> mutex_lock(&pic_mtx);
> if (irq <= 7) {
> - if (!ISSET(pics[MASTER].imr, 1 << irq)) {
> - SET(pics[MASTER].irr, 1 << irq);
> - pics[MASTER].asserted = 1;
> - }
> + SET(pics[MASTER].irr, 1 << irq);
> + pics[MASTER].asserted = 1;
> } else {
> irq -= 8;
> - if (!ISSET(pics[SLAVE].imr, 1 << irq)) {
> - SET(pics[SLAVE].irr, 1 << irq);
> - pics[SLAVE].asserted = 1;
> + SET(pics[SLAVE].irr, 1 << irq);
> + pics[SLAVE].asserted = 1;
>
> - /* Assert cascade IRQ on master PIC */
> - SET(pics[MASTER].irr, 1 << 2);
> - pics[MASTER].asserted = 1;
> - }
> + /* Assert cascade IRQ on master PIC */
> + SET(pics[MASTER].irr, 1 << 2);
> + pics[MASTER].asserted = 1;
> }
> mutex_unlock(&pic_mtx);
> }
> blob - 7bc76c4daed427dae82688452ec21985be679bc4
> blob + 4d321fd4ff23514ffa7a4bee0eeb7a9324020d80
> --- usr.sbin/vmd/vioblk.c
> +++ usr.sbin/vmd/vioblk.c
> @@ -39,7 +39,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
> extern struct vmd_vm *current_vm;
>
> static const char *disk_type(int);
> -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
> +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
> + int8_t *);
> static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
> void vioblk_notify_rx(struct vioblk_dev *);
> int vioblk_notifyq(struct vioblk_dev *);
> @@ -723,6 +724,7 @@ handle_sync_io(int fd, short event, void *arg)
> struct viodev_msg msg;
> struct imsg imsg;
> ssize_t n;
> + int8_t intr = INTR_STATE_NOOP;
>
> if (event & EV_READ) {
> if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
> @@ -770,8 +772,9 @@ handle_sync_io(int fd, short event, void *arg)
> }
> case VIODEV_MSG_IO_READ:
> /* Read IO: make sure to send a reply */
> - msg.data = handle_io_read(&msg, dev);
> + msg.data = handle_io_read(&msg, dev, &intr);
> msg.data_valid = 1;
> + msg.state = intr;
> imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
> sizeof(msg));
> break;
> @@ -844,7 +847,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
> }
>
> static uint32_t
> -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
> +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
> {
> struct vioblk_dev *vioblk = &dev->vioblk;
> uint8_t sz = msg->io_sz;
> @@ -1001,7 +1004,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
> case VIRTIO_CONFIG_ISR_STATUS:
> data = vioblk->cfg.isr_status;
> vioblk->cfg.isr_status = 0;
> - virtio_deassert_pic_irq(dev, 0);
> + if (intr != NULL)
> + *intr = INTR_STATE_DEASSERT;
> break;
> default:
> return (0xFFFFFFFF);
> blob - c16ad2635ea622fd7fce48b5145e2199dd451346
> blob + 5c2a943ac7ad02dfbc4793f5caab3200a3ced803
> --- usr.sbin/vmd/vionet.c
> +++ usr.sbin/vmd/vionet.c
> @@ -52,7 +52,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
>
> static int vionet_rx(struct vionet_dev *);
> static void vionet_rx_event(int, short, void *);
> -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
> +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
> + int8_t *);
> static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
> void vionet_notify_rx(struct virtio_dev *);
> int vionet_notifyq(struct virtio_dev *);
> @@ -757,6 +758,7 @@ handle_sync_io(int fd, short event, void *arg)
> struct viodev_msg msg;
> struct imsg imsg;
> ssize_t n;
> + int8_t intr = INTR_STATE_NOOP;
>
> if (event & EV_READ) {
> if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
> @@ -804,8 +806,9 @@ handle_sync_io(int fd, short event, void *arg)
> }
> case VIODEV_MSG_IO_READ:
> /* Read IO: make sure to send a reply */
> - msg.data = handle_io_read(&msg, dev);
> + msg.data = handle_io_read(&msg, dev, &intr);
> msg.data_valid = 1;
> + msg.state = intr;
> imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
> sizeof(msg));
> break;
> @@ -891,7 +894,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
> }
>
> static uint32_t
> -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
> +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
> {
> struct vionet_dev *vionet = &dev->vionet;
> uint32_t data;
> @@ -930,7 +933,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
> case VIRTIO_CONFIG_ISR_STATUS:
> data = vionet->cfg.isr_status;
> vionet->cfg.isr_status = 0;
> - virtio_deassert_pic_irq(dev, 0);
> + if (intr != NULL)
> + *intr = INTR_STATE_DEASSERT;
> break;
> default:
> return (0xFFFFFFFF);