mbuhl@ found an issue where the emulated virtio block device can
hang. The tl;dr: the emulated pic never injects an interrupt and the
vioblk(4) driver in the guest starves, waiting to be told to check the
virtqueue.

Flipping the bit in the i8259 using gdb causes the spice to flow once
again.

This diff fixes two related things (so could be committed in 2 parts):

1. the irq deassert on a virtio pci interrupt status register read
   should occur synchronously by the vcpu thread in the vm and not
   async.

2. always raise the irr bit in the pic, regardless of mask. The mask is
   used when finding an irq to ack and shouldn't be used to determine if
   the irr bit is raised. AFAICT from the old i8259 data sheets, masking
   should have no effect on if the irr bit is set.

This is holding up another diff I want to share that reduces latency in
interrupts, but it's shown to make this i8259 race condition worse, so
I'd like to give folks a few days to check if the below diff causes
regressions. Once this is committed, I'll feel safe sharing the latency
diff with tech@.

Any ok's pending a few days for folks to test?

-dv


diff refs/heads/master cdba90a89ee6e77b52c2b6955d36a260a23a3b85
commit - ad1cd1152fddbf55189657a2df9f2468409698ab
commit + cdba90a89ee6e77b52c2b6955d36a260a23a3b85
blob - f7862f5e9d17f96f5260358271fab8f253b26505
blob + b6891c52c824d7b8c69e67e5323772152b1ed844
--- usr.sbin/vmd/i8259.c
+++ usr.sbin/vmd/i8259.c
@@ -222,20 +222,16 @@ i8259_assert_irq(uint8_t irq)
 {
        mutex_lock(&pic_mtx);
        if (irq <= 7) {
-               if (!ISSET(pics[MASTER].imr, 1 << irq)) {
-                       SET(pics[MASTER].irr, 1 << irq);
-                       pics[MASTER].asserted = 1;
-               }
+               SET(pics[MASTER].irr, 1 << irq);
+               pics[MASTER].asserted = 1;
        } else {
                irq -= 8;
-               if (!ISSET(pics[SLAVE].imr, 1 << irq)) {
-                       SET(pics[SLAVE].irr, 1 << irq);
-                       pics[SLAVE].asserted = 1;
+               SET(pics[SLAVE].irr, 1 << irq);
+               pics[SLAVE].asserted = 1;

-                       /* Assert cascade IRQ on master PIC */
-                       SET(pics[MASTER].irr, 1 << 2);
-                       pics[MASTER].asserted = 1;
-               }
+               /* Assert cascade IRQ on master PIC */
+               SET(pics[MASTER].irr, 1 << 2);
+               pics[MASTER].asserted = 1;
        }
        mutex_unlock(&pic_mtx);
 }
blob - 7bc76c4daed427dae82688452ec21985be679bc4
blob + 4d321fd4ff23514ffa7a4bee0eeb7a9324020d80
--- usr.sbin/vmd/vioblk.c
+++ usr.sbin/vmd/vioblk.c
@@ -39,7 +39,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
 extern struct vmd_vm *current_vm;

 static const char *disk_type(int);
-static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
+static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
+    int8_t *);
 static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
 void vioblk_notify_rx(struct vioblk_dev *);
 int vioblk_notifyq(struct vioblk_dev *);
@@ -723,6 +724,7 @@ handle_sync_io(int fd, short event, void *arg)
        struct viodev_msg msg;
        struct imsg imsg;
        ssize_t n;
+       int8_t intr = INTR_STATE_NOOP;

        if (event & EV_READ) {
                if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
@@ -770,8 +772,9 @@ handle_sync_io(int fd, short event, void *arg)
                        }
                case VIODEV_MSG_IO_READ:
                        /* Read IO: make sure to send a reply */
-                       msg.data = handle_io_read(&msg, dev);
+                       msg.data = handle_io_read(&msg, dev, &intr);
                        msg.data_valid = 1;
+                       msg.state = intr;
                        imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
                            sizeof(msg));
                        break;
@@ -844,7 +847,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
 }

 static uint32_t
-handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
+handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
 {
        struct vioblk_dev *vioblk = &dev->vioblk;
        uint8_t sz = msg->io_sz;
@@ -1001,7 +1004,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
        case VIRTIO_CONFIG_ISR_STATUS:
                data = vioblk->cfg.isr_status;
                vioblk->cfg.isr_status = 0;
-               virtio_deassert_pic_irq(dev, 0);
+               if (intr != NULL)
+                       *intr = INTR_STATE_DEASSERT;
                break;
        default:
                return (0xFFFFFFFF);
blob - c16ad2635ea622fd7fce48b5145e2199dd451346
blob + 5c2a943ac7ad02dfbc4793f5caab3200a3ced803
--- usr.sbin/vmd/vionet.c
+++ usr.sbin/vmd/vionet.c
@@ -52,7 +52,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st

 static int vionet_rx(struct vionet_dev *);
 static void vionet_rx_event(int, short, void *);
-static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *);
+static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
+    int8_t *);
 static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
 void vionet_notify_rx(struct virtio_dev *);
 int vionet_notifyq(struct virtio_dev *);
@@ -757,6 +758,7 @@ handle_sync_io(int fd, short event, void *arg)
        struct viodev_msg msg;
        struct imsg imsg;
        ssize_t n;
+       int8_t intr = INTR_STATE_NOOP;

        if (event & EV_READ) {
                if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
@@ -804,8 +806,9 @@ handle_sync_io(int fd, short event, void *arg)
                        }
                case VIODEV_MSG_IO_READ:
                        /* Read IO: make sure to send a reply */
-                       msg.data = handle_io_read(&msg, dev);
+                       msg.data = handle_io_read(&msg, dev, &intr);
                        msg.data_valid = 1;
+                       msg.state = intr;
                        imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
                            sizeof(msg));
                        break;
@@ -891,7 +894,7 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
 }

 static uint32_t
-handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
+handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
 {
        struct vionet_dev *vionet = &dev->vionet;
        uint32_t data;
@@ -930,7 +933,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d
        case VIRTIO_CONFIG_ISR_STATUS:
                data = vionet->cfg.isr_status;
                vionet->cfg.isr_status = 0;
-               virtio_deassert_pic_irq(dev, 0);
+               if (intr != NULL)
+                       *intr = INTR_STATE_DEASSERT;
                break;
        default:
                return (0xFFFFFFFF);

Reply via email to