Author: grehan
Date: Wed Jul 11 02:57:19 2012
New Revision: 238360
URL: http://svn.freebsd.org/changeset/base/238360

Log:
  Various VirtIO improvements
  
        PCI:
          - Properly handle interrupt fallback from MSIX to MSI to legacy.
            The host may not have sufficient resources to support MSIX,
            so we must be able to fallback to legacy interrupts.
          - Add interface to get the (sub) vendor and device IDs.
          - Rename flags to VTPCI_FLAG_* like other VirtIO drivers.
        Block:
          - No longer allocate vtblk_requests from separate UMA zone.
            malloc(9) from M_DEVBUF is sufficient. Assert segment counts
            at allocation.
          - More verbose error and debug messages.
        Network:
          - Remove stray write once variable.
        Virtqueue:
          - Shuffle code around in preparation of converting the mb()s to
            the appropriate atomic(9) operations.
          - Only walk the descriptor chain when freeing if INVARIANTS is
            defined since the result is only KASSERT()ed.
  
  Submitted by: Bryan Venteicher (bry...@daemoninthecloset.org)

Modified:
  head/sys/dev/virtio/balloon/virtio_balloon.c
  head/sys/dev/virtio/block/virtio_blk.c
  head/sys/dev/virtio/network/if_vtnet.c
  head/sys/dev/virtio/pci/virtio_pci.c
  head/sys/dev/virtio/pci/virtio_pci.h
  head/sys/dev/virtio/virtio.c
  head/sys/dev/virtio/virtio.h
  head/sys/dev/virtio/virtio_ring.h
  head/sys/dev/virtio/virtqueue.c
  head/sys/dev/virtio/virtqueue.h

Modified: head/sys/dev/virtio/balloon/virtio_balloon.c
==============================================================================
--- head/sys/dev/virtio/balloon/virtio_balloon.c        Wed Jul 11 01:04:59 
2012        (r238359)
+++ head/sys/dev/virtio/balloon/virtio_balloon.c        Wed Jul 11 02:57:19 
2012        (r238360)
@@ -412,7 +412,6 @@ vtballoon_send_page_frames(struct vtball
         * interrupt handler will wake us up.
         */
        VTBALLOON_LOCK(sc);
-
        while ((c = virtqueue_dequeue(vq, NULL)) == NULL)
                msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0);
        VTBALLOON_UNLOCK(sc);

Modified: head/sys/dev/virtio/block/virtio_blk.c
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.c      Wed Jul 11 01:04:59 2012        
(r238359)
+++ head/sys/dev/virtio/block/virtio_blk.c      Wed Jul 11 02:57:19 2012        
(r238360)
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 
 #include <geom/geom_disk.h>
-#include <vm/uma.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
@@ -119,7 +118,7 @@ static int  vtblk_shutdown(device_t);
 static int     vtblk_open(struct disk *);
 static int     vtblk_close(struct disk *);
 static int     vtblk_ioctl(struct disk *, u_long, void *, int,
-                   struct thread *);
+                   struct thread *);
 static int     vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
 static void    vtblk_strategy(struct bio *);
 
@@ -193,7 +192,7 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_
                                mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
 
 #define VTBLK_DISK_NAME                "vtbd"
-#define        VTBLK_QUIESCE_TIMEOUT   (30 * hz)
+#define VTBLK_QUIESCE_TIMEOUT  (30 * hz)
 
 /*
  * Each block request uses at least two segments - one for the header
@@ -201,8 +200,6 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_
  */
 #define VTBLK_MIN_SEGMENTS     2
 
-static uma_zone_t vtblk_req_zone;
-
 static device_method_t vtblk_methods[] = {
        /* Device methods. */
        DEVMETHOD(device_probe,         vtblk_probe),
@@ -236,19 +233,8 @@ vtblk_modevent(module_t mod, int type, v
 
        switch (type) {
        case MOD_LOAD:
-               vtblk_req_zone = uma_zcreate("vtblk_request",
-                   sizeof(struct vtblk_request),
-                   NULL, NULL, NULL, NULL, 0, 0);
-               break;
        case MOD_QUIESCE:
        case MOD_UNLOAD:
-               if (uma_zone_get_cur(vtblk_req_zone) > 0)
-                       error = EBUSY;
-               else if (type == MOD_UNLOAD) {
-                       uma_zdestroy(vtblk_req_zone);
-                       vtblk_req_zone = NULL;
-               }
-               break;
        case MOD_SHUTDOWN:
                break;
        default:
@@ -316,7 +302,7 @@ vtblk_attach(device_t dev)
        }
 
        sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
-        if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
+       if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
                error = EINVAL;
                device_printf(dev, "fewer than minimum number of segments "
                    "allowed: %d\n", sc->vtblk_max_nsegs);
@@ -493,7 +479,6 @@ vtblk_dump(void *arg, void *virtual, vm_
        int error;
 
        dp = arg;
-       error = 0;
 
        if ((sc = dp->d_drv1) == NULL)
                return (ENXIO);
@@ -539,7 +524,7 @@ vtblk_strategy(struct bio *bp)
                return;
        }
 
-#ifdef INVARIANTS
+#ifdef INVARIANTS
        /*
         * Prevent read/write buffers spanning too many segments from
         * getting into the queue. This should only trip if d_maxsize
@@ -547,13 +532,13 @@ vtblk_strategy(struct bio *bp)
         */
        if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
                int nsegs, max_nsegs;
-               
+
                nsegs = sglist_count(bp->bio_data, bp->bio_bcount);
                max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS;
 
                KASSERT(nsegs <= max_nsegs,
-                   ("bio spanned too many segments: %d, max: %d",
-                   nsegs, max_nsegs));
+                   ("bio %p spanned too many segments: %d, max: %d",
+                   bp, nsegs, max_nsegs));
        }
 #endif
 
@@ -800,27 +785,22 @@ vtblk_execute_request(struct vtblk_softc
        VTBLK_LOCK_ASSERT(sc);
 
        sglist_reset(sg);
-       error = sglist_append(sg, &req->vbr_hdr,
-           sizeof(struct virtio_blk_outhdr));
-       KASSERT(error == 0, ("error adding header to sglist"));
-       KASSERT(sg->sg_nseg == 1,
-           ("header spanned multiple segments: %d", sg->sg_nseg));
+
+       sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
 
        if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
                error = sglist_append(sg, bp->bio_data, bp->bio_bcount);
-               KASSERT(error == 0, ("error adding buffer to sglist"));
+               if (error || sg->sg_nseg == sg->sg_maxseg)
+                       panic("%s: data buffer too big bio:%p error:%d",
+                           __FUNCTION__, bp, error);
 
                /* BIO_READ means the host writes into our buffer. */
                if (bp->bio_cmd == BIO_READ)
-                       writable += sg->sg_nseg - 1;
+                       writable = sg->sg_nseg - 1;
        }
 
-       error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
-       KASSERT(error == 0, ("error adding ack to sglist"));
        writable++;
-
-       KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
-           ("fewer than min segments: %d", sg->sg_nseg));
+       sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
 
        readable = sg->sg_nseg - writable;
 
@@ -995,12 +975,10 @@ vtblk_flush_dump(struct vtblk_softc *sc)
 static int
 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
 {
-       device_t dev;
        struct virtqueue *vq;
        struct vtblk_request *r;
        int error;
 
-       dev = sc->vtblk_dev;
        vq = sc->vtblk_vq;
 
        if (!virtqueue_empty(vq))
@@ -1013,12 +991,12 @@ vtblk_poll_request(struct vtblk_softc *s
        virtqueue_notify(vq);
 
        r = virtqueue_poll(vq, NULL);
-       KASSERT(r == req, ("unexpected request response"));
+       KASSERT(r == req, ("unexpected request response: %p/%p", r, req));
 
        error = vtblk_request_error(req);
        if (error && bootverbose) {
-               device_printf(dev, "vtblk_poll_request: IO error: %d\n",
-                   error);
+               device_printf(sc->vtblk_dev,
+                   "%s: IO error: %d\n", __FUNCTION__, error);
        }
 
        return (error);
@@ -1090,6 +1068,20 @@ vtblk_drain(struct vtblk_softc *sc)
        vtblk_free_requests(sc);
 }
 
+#ifdef INVARIANTS
+static void
+vtblk_request_invariants(struct vtblk_request *req)
+{
+       int hdr_nsegs, ack_nsegs;
+
+       hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr));
+       ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack));
+
+       KASSERT(hdr_nsegs == 1, ("request header crossed page boundary"));
+       KASSERT(ack_nsegs == 1, ("request ack crossed page boundary"));
+}
+#endif
+
 static int
 vtblk_alloc_requests(struct vtblk_softc *sc)
 {
@@ -1107,10 +1099,14 @@ vtblk_alloc_requests(struct vtblk_softc 
                nreqs /= VTBLK_MIN_SEGMENTS;
 
        for (i = 0; i < nreqs; i++) {
-               req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
+               req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
                if (req == NULL)
                        return (ENOMEM);
 
+#ifdef INVARIANTS
+               vtblk_request_invariants(req);
+#endif
+
                sc->vtblk_request_count++;
                vtblk_enqueue_request(sc, req);
        }
@@ -1128,10 +1124,11 @@ vtblk_free_requests(struct vtblk_softc *
 
        while ((req = vtblk_dequeue_request(sc)) != NULL) {
                sc->vtblk_request_count--;
-               uma_zfree(vtblk_req_zone, req);
+               free(req, M_DEVBUF);
        }
 
-       KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
+       KASSERT(sc->vtblk_request_count == 0,
+           ("leaked requests: %d", sc->vtblk_request_count));
 }
 
 static struct vtblk_request *

Modified: head/sys/dev/virtio/network/if_vtnet.c
==============================================================================
--- head/sys/dev/virtio/network/if_vtnet.c      Wed Jul 11 01:04:59 2012        
(r238359)
+++ head/sys/dev/virtio/network/if_vtnet.c      Wed Jul 11 02:57:19 2012        
(r238360)
@@ -748,11 +748,9 @@ vtnet_is_link_up(struct vtnet_softc *sc)
 static void
 vtnet_update_link_status(struct vtnet_softc *sc)
 {
-       device_t dev;
        struct ifnet *ifp;
        int link;
 
-       dev = sc->vtnet_dev;
        ifp = sc->vtnet_ifp;
 
        link = vtnet_is_link_up(sc);

Modified: head/sys/dev/virtio/pci/virtio_pci.c
==============================================================================
--- head/sys/dev/virtio/pci/virtio_pci.c        Wed Jul 11 01:04:59 2012        
(r238359)
+++ head/sys/dev/virtio/pci/virtio_pci.c        Wed Jul 11 02:57:19 2012        
(r238360)
@@ -57,12 +57,15 @@ struct vtpci_softc {
        struct resource                 *vtpci_msix_res;
        uint64_t                         vtpci_features;
        uint32_t                         vtpci_flags;
-#define VIRTIO_PCI_FLAG_NO_MSI          0x0001
-#define VIRTIO_PCI_FLAG_MSI             0x0002
-#define VIRTIO_PCI_FLAG_NO_MSIX                 0x0010
-#define VIRTIO_PCI_FLAG_MSIX            0x0020
-#define VIRTIO_PCI_FLAG_SHARED_MSIX     0x0040
+#define VTPCI_FLAG_NO_MSI              0x0001
+#define VTPCI_FLAG_NO_MSIX             0x0002
+#define VTPCI_FLAG_LEGACY              0x1000
+#define VTPCI_FLAG_MSI                 0x2000
+#define VTPCI_FLAG_MSIX                        0x4000
+#define VTPCI_FLAG_SHARED_MSIX         0x8000
+#define VTPCI_FLAG_ITYPE_MASK          0xF000
 
+       /* This "bus" will only ever have one child. */
        device_t                         vtpci_child_dev;
        struct virtio_feature_desc      *vtpci_child_feat_desc;
 
@@ -80,7 +83,8 @@ struct vtpci_softc {
        int                              vtpci_nvqs;
        struct vtpci_virtqueue {
                struct virtqueue *vq;
-
+               /* Device did not provide a callback for this virtqueue. */
+               int               no_intr;
                /* Index into vtpci_intr_res[] below. Unused, then -1. */
                int               ires_idx;
        } vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
@@ -130,24 +134,39 @@ static void       vtpci_describe_features(stru
                    uint64_t);
 static void    vtpci_probe_and_attach_child(struct vtpci_softc *);
 
-static int     vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
-                   struct vq_alloc_info *);
-static int     vtpci_alloc_intr_resources(struct vtpci_softc *, int,
-                   struct vq_alloc_info *);
-static int     vtpci_alloc_msi(struct vtpci_softc *);
-static int     vtpci_alloc_msix(struct vtpci_softc *, int);
+static int     vtpci_alloc_msix(struct vtpci_softc *, int);
+static int     vtpci_alloc_msi(struct vtpci_softc *);
+static int     vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
+static int     vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
+static int     vtpci_alloc_intr_msi(struct vtpci_softc *);
+static int     vtpci_alloc_intr_legacy(struct vtpci_softc *);
+static int     vtpci_alloc_intr_resources(struct vtpci_softc *);
+
+static int     vtpci_setup_legacy_interrupt(struct vtpci_softc *,
+                   enum intr_type);
+static int     vtpci_setup_msix_interrupts(struct vtpci_softc *,
+                   enum intr_type);
+static int     vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
+
 static int     vtpci_register_msix_vector(struct vtpci_softc *, int, int);
+static int     vtpci_set_host_msix_vectors(struct vtpci_softc *);
+static int     vtpci_reinit_virtqueue(struct vtpci_softc *, int);
 
 static void    vtpci_free_interrupts(struct vtpci_softc *);
 static void    vtpci_free_virtqueues(struct vtpci_softc *);
+static void    vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
 static void    vtpci_release_child_resources(struct vtpci_softc *);
 static void    vtpci_reset(struct vtpci_softc *);
 
+static void    vtpci_select_virtqueue(struct vtpci_softc *, int);
+
 static int     vtpci_legacy_intr(void *);
 static int     vtpci_vq_shared_intr(void *);
 static int     vtpci_vq_intr(void *);
 static int     vtpci_config_intr(void *);
 
+#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
+
 /*
  * I/O port read/write wrappers.
  */
@@ -252,7 +271,7 @@ vtpci_attach(device_t dev)
        }
 
        if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
-               sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI;
+               sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
 
        if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
                rid = PCIR_BAR(1);
@@ -261,7 +280,7 @@ vtpci_attach(device_t dev)
        }
 
        if (sc->vtpci_msix_res == NULL)
-               sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX;
+               sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
 
        vtpci_reset(sc);
 
@@ -372,6 +391,16 @@ vtpci_read_ivar(device_t dev, device_t c
 
        switch (index) {
        case VIRTIO_IVAR_DEVTYPE:
+       case VIRTIO_IVAR_SUBDEVICE:
+               *result = pci_get_subdevice(dev);
+               break;
+       case VIRTIO_IVAR_VENDOR:
+               *result = pci_get_vendor(dev);
+               break;
+       case VIRTIO_IVAR_DEVICE:
+               *result = pci_get_device(dev);
+               break;
+       case VIRTIO_IVAR_SUBVENDOR:
                *result = pci_get_subdevice(dev);
                break;
        default:
@@ -442,102 +471,97 @@ vtpci_alloc_virtqueues(device_t dev, int
     struct vq_alloc_info *vq_info)
 {
        struct vtpci_softc *sc;
+       struct virtqueue *vq;
        struct vtpci_virtqueue *vqx;
        struct vq_alloc_info *info;
-       int queue, error;
-       uint16_t vq_size;
+       int idx, error;
+       uint16_t size;
 
        sc = device_get_softc(dev);
+       error = 0;
 
-       if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
-           nvqs > VIRTIO_MAX_VIRTQUEUES)
+       if (sc->vtpci_nvqs != 0)
+               return (EALREADY);
+       if (nvqs <= 0 || nvqs > VIRTIO_MAX_VIRTQUEUES)
                return (EINVAL);
 
-       error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
-       if (error) {
-               device_printf(dev, "cannot allocate interrupts\n");
-               return (error);
-       }
-
-       if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-               error = vtpci_register_msix_vector(sc,
-                   VIRTIO_MSI_CONFIG_VECTOR, 0);
-               if (error)
-                       return (error);
-       }
+       if (flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX)
+               sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
 
-       for (queue = 0; queue < nvqs; queue++) {
-               vqx = &sc->vtpci_vqx[queue];
-               info = &vq_info[queue];
-
-               vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
-
-               vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
-               error = virtqueue_alloc(dev, queue, vq_size,
-                   VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
-               if (error)
-                       return (error);
-
-               if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-                       error = vtpci_register_msix_vector(sc,
-                           VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
-                       if (error)
-                               return (error);
+       for (idx = 0; idx < nvqs; idx++) {
+               vqx = &sc->vtpci_vqx[idx];
+               info = &vq_info[idx];
+
+               vtpci_select_virtqueue(sc, idx);
+               size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
+
+               error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
+                   0xFFFFFFFFUL, info, &vq);
+               if (error) {
+                       device_printf(dev,
+                           "cannot allocate virtqueue %d: %d\n", idx, error);
+                       break;
                }
 
                vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
-                   virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+                   virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+
+               vqx->vq = *info->vqai_vq = vq;
+               vqx->no_intr = info->vqai_intr == NULL;
 
-               *info->vqai_vq = vqx->vq;
                sc->vtpci_nvqs++;
        }
 
-       return (0);
+       return (error);
 }
 
 static int
 vtpci_setup_intr(device_t dev, enum intr_type type)
 {
        struct vtpci_softc *sc;
-       struct vtpci_intr_resource *ires;
-       struct vtpci_virtqueue *vqx;
-       int i, flags, error;
+       int attempt, error;
 
        sc = device_get_softc(dev);
-       flags = type | INTR_MPSAFE;
-       ires = &sc->vtpci_intr_res[0];
-
-       if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
-               error = bus_setup_intr(dev, ires->irq, flags,
-                   vtpci_legacy_intr, NULL, sc, &ires->intrhand);
 
-               return (error);
-       }
-
-       error = bus_setup_intr(dev, ires->irq, flags, vtpci_config_intr,
-           NULL, sc, &ires->intrhand);
-       if (error)
-               return (error);
+       for (attempt = 0; attempt < 5; attempt++) {
+               /*
+                * Start with the most desirable interrupt configuration and
+                * fallback towards less desirable ones.
+                */
+               switch (attempt) {
+               case 0:
+                       error = vtpci_alloc_intr_msix_pervq(sc);
+                       break;
+               case 1:
+                       error = vtpci_alloc_intr_msix_shared(sc);
+                       break;
+               case 2:
+                       error = vtpci_alloc_intr_msi(sc);
+                       break;
+               case 3:
+                       error = vtpci_alloc_intr_legacy(sc);
+                       break;
+               default:
+                       device_printf(dev,
+                           "exhausted all interrupt allocation attempts\n");
+                       return (ENXIO);
+               }
 
-       if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
-               ires = &sc->vtpci_intr_res[1];
-               error = bus_setup_intr(dev, ires->irq, flags,
-                   vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
+               if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
+                       break;
 
-               return (error);
+               vtpci_cleanup_setup_intr_attempt(sc);
        }
 
-       /* Setup an interrupt handler for each virtqueue. */
-       for (i = 0; i < sc->vtpci_nvqs; i++) {
-               vqx = &sc->vtpci_vqx[i];
-               if (vqx->ires_idx < 1)
-                       continue;
-
-               ires = &sc->vtpci_intr_res[vqx->ires_idx];
-               error = bus_setup_intr(dev, ires->irq, flags,
-                   vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
-               if (error)
-                       return (error);
+       if (bootverbose) {
+               if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
+                       device_printf(dev, "using legacy interrupt\n");
+               else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
+                       device_printf(dev, "using MSI interrupt\n");
+               else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
+                       device_printf(dev, "using shared MSIX interrupts\n");
+               else
+                       device_printf(dev, "using per VQ MSIX interrupts\n");
        }
 
        return (0);
@@ -554,20 +578,19 @@ static int
 vtpci_reinit(device_t dev, uint64_t features)
 {
        struct vtpci_softc *sc;
-       struct vtpci_virtqueue *vqx;
-       struct virtqueue *vq;
-       int queue, error;
-       uint16_t vq_size;
+       int idx, error;
 
        sc = device_get_softc(dev);
 
        /*
-        * Redrive the device initialization. This is a bit of an abuse
-        * of the specification, but both VirtualBox and QEMU/KVM seem
-        * to play nice. We do not allow the host device to change from
-        * what was originally negotiated beyond what the guest driver
-        * changed (MSIX state should not change, number of virtqueues
-        * and their size remain the same, etc).
+        * Redrive the device initialization. This is a bit of an abuse of
+        * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
+        * play nice.
+        *
+        * We do not allow the host device to change from what was originally
+        * negotiated beyond what the guest driver changed. MSIX state should
+        * not change, number of virtqueues and their size remain the same, etc.
+        * This will need to be rethought when we want to support migration.
         */
 
        if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
@@ -582,34 +605,16 @@ vtpci_reinit(device_t dev, uint64_t feat
 
        vtpci_negotiate_features(dev, features);
 
-       if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-               error = vtpci_register_msix_vector(sc,
-                   VIRTIO_MSI_CONFIG_VECTOR, 0);
+       for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
+               error = vtpci_reinit_virtqueue(sc, idx);
                if (error)
                        return (error);
        }
 
-       for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
-               vqx = &sc->vtpci_vqx[queue];
-               vq = vqx->vq;
-
-               KASSERT(vq != NULL, ("vq %d not allocated", queue));
-               vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
-
-               vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
-               error = virtqueue_reinit(vq, vq_size);
+       if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
+               error = vtpci_set_host_msix_vectors(sc);
                if (error)
                        return (error);
-
-               if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-                       error = vtpci_register_msix_vector(sc,
-                           VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
-                       if (error)
-                               return (error);
-               }
-
-               vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
-                   virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
        }
 
        return (0);
@@ -744,7 +749,6 @@ vtpci_probe_and_attach_child(struct vtpc
                vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
                vtpci_reset(sc);
                vtpci_release_child_resources(sc);
-
                /* Reset status for future attempt. */
                vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
        } else
@@ -752,42 +756,126 @@ vtpci_probe_and_attach_child(struct vtpc
 }
 
 static int
-vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
-    struct vq_alloc_info *vq_info)
+vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
+{
+       device_t dev;
+       int nmsix, cnt, required;
+
+       dev = sc->vtpci_dev;
+
+       /* Allocate an additional vector for the config changes. */
+       required = nvectors + 1;
+
+       nmsix = pci_msix_count(dev);
+       if (nmsix < required)
+               return (1);
+
+       cnt = required;
+       if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
+               sc->vtpci_nintr_res = required;
+               return (0);
+       }
+
+       pci_release_msi(dev);
+
+       return (1);
+}
+
+static int
+vtpci_alloc_msi(struct vtpci_softc *sc)
+{
+       device_t dev;
+       int nmsi, cnt, required;
+
+       dev = sc->vtpci_dev;
+       required = 1;
+
+       nmsi = pci_msi_count(dev);
+       if (nmsi < required)
+               return (1);
+
+       cnt = required;
+       if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
+               sc->vtpci_nintr_res = required;
+               return (0);
+       }
+
+       pci_release_msi(dev);
+
+       return (1);
+}
+
+static int
+vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
 {
        int i, nvectors, error;
 
-       /*
-        * Only allocate a vector for virtqueues that are actually
-        * expecting an interrupt.
-        */
-       for (nvectors = 0, i = 0; i < nvqs; i++)
-               if (vq_info[i].vqai_intr != NULL)
+       if (vtpci_disable_msix != 0 ||
+           sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
+               return (ENOTSUP);
+
+       for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
+               if (sc->vtpci_vqx[i].no_intr == 0)
                        nvectors++;
+       }
+
+       error = vtpci_alloc_msix(sc, nvectors);
+       if (error)
+               return (error);
+
+       sc->vtpci_flags |= VTPCI_FLAG_MSIX;
+
+       return (0);
+}
+
+static int
+vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
+{
+       int error;
 
        if (vtpci_disable_msix != 0 ||
-           sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX ||
-           flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
-           vtpci_alloc_msix(sc, nvectors) != 0) {
-               /*
-                * Use MSI interrupts if available. Otherwise, we fallback
-                * to legacy interrupts.
-                */
-               if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 &&
-                   vtpci_alloc_msi(sc) == 0)
-                       sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
+           sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
+               return (ENOTSUP);
 
-               sc->vtpci_nintr_res = 1;
-       }
+       error = vtpci_alloc_msix(sc, 1);
+       if (error)
+               return (error);
 
-       error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
+       sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
 
-       return (error);
+       return (0);
 }
 
 static int
-vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
-    struct vq_alloc_info *vq_info)
+vtpci_alloc_intr_msi(struct vtpci_softc *sc)
+{
+       int error;
+
+       /* Only BHyVe supports MSI. */
+       if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
+               return (ENOTSUP);
+
+       error = vtpci_alloc_msi(sc);
+       if (error)
+               return (error);
+
+       sc->vtpci_flags |= VTPCI_FLAG_MSI;
+
+       return (0);
+}
+
+static int
+vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
+{
+
+       sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
+       sc->vtpci_nintr_res = 1;
+
+       return (0);
+}
+
+static int
+vtpci_alloc_intr_resources(struct vtpci_softc *sc)
 {
        device_t dev;
        struct resource *irq;
@@ -795,14 +883,14 @@ vtpci_alloc_intr_resources(struct vtpci_
        int i, rid, flags, res_idx;
 
        dev = sc->vtpci_dev;
-       flags = RF_ACTIVE;
 
-       if ((sc->vtpci_flags &
-           (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) {
+       if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) {
                rid = 0;
-               flags |= RF_SHAREABLE;
-       } else
+               flags = RF_ACTIVE | RF_SHAREABLE;
+       } else {
                rid = 1;
+               flags = RF_ACTIVE;
+       }
 
        for (i = 0; i < sc->vtpci_nintr_res; i++) {
                irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
@@ -814,16 +902,16 @@ vtpci_alloc_intr_resources(struct vtpci_
        }
 
        /*
-        * Map the virtqueue into the correct index in vq_intr_res[]. Note the
-        * first index is reserved for configuration changes notifications.
+        * Map the virtqueue into the correct index in vq_intr_res[]. The
+        * first index is reserved for configuration changed notifications.
         */
-       for (i = 0, res_idx = 1; i < nvqs; i++) {
+       for (i = 0, res_idx = 1; i < sc->vtpci_nvqs; i++) {
                vqx = &sc->vtpci_vqx[i];
 
-               if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
-                       if (vq_info[i].vqai_intr == NULL)
+               if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
+                       if (vqx->no_intr != 0)
                                vqx->ires_idx = -1;
-                       else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
+                       else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
                                vqx->ires_idx = res_idx;
                        else
                                vqx->ires_idx = res_idx++;
@@ -835,110 +923,180 @@ vtpci_alloc_intr_resources(struct vtpci_
 }
 
 static int
-vtpci_alloc_msi(struct vtpci_softc *sc)
+vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
 {
        device_t dev;
-       int nmsi, cnt;
+       struct vtpci_intr_resource *ires;
+       int error;
 
        dev = sc->vtpci_dev;
-       nmsi = pci_msi_count(dev);
-
-       if (nmsi < 1)
-               return (1);
 
-       cnt = 1;
-       if (pci_alloc_msi(dev, &cnt) == 0 && cnt == 1)
-               return (0);
+       ires = &sc->vtpci_intr_res[0];
+       error = bus_setup_intr(dev, ires->irq, type, vtpci_legacy_intr, NULL,
+           sc, &ires->intrhand);
 
-       return (1);
+       return (error);
 }
 
 static int
-vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
+vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
 {
        device_t dev;
-       int nmsix, cnt, required;
+       struct vtpci_intr_resource *ires;
+       struct vtpci_virtqueue *vqx;
+       int i, error;
 
        dev = sc->vtpci_dev;
 
-       nmsix = pci_msix_count(dev);
-       if (nmsix < 1)
-               return (1);
+       /*
+        * The first resource is used for configuration changed interrupts.
+        */
+       ires = &sc->vtpci_intr_res[0];
+       error = bus_setup_intr(dev, ires->irq, type, vtpci_config_intr,
+           NULL, sc, &ires->intrhand);
+       if (error)
+               return (error);
 
-       /* An additional vector is needed for the config changes. */
-       required = nvectors + 1;
-       if (nmsix >= required) {
-               cnt = required;
-               if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required)
-                       goto out;
+       if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
+               ires = &sc->vtpci_intr_res[1];
 
-               pci_release_msi(dev);
+               error = bus_setup_intr(dev, ires->irq, type,
+                   vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
+               if (error)
+                       return (error);
+       } else {
+               /*
+                * Each remaining resource is assigned to a specific virtqueue.
+                */
+               for (i = 0; i < sc->vtpci_nvqs; i++) {
+                       vqx = &sc->vtpci_vqx[i];
+                       if (vqx->ires_idx < 1)
+                               continue;
+
+                       ires = &sc->vtpci_intr_res[vqx->ires_idx];
+                       error = bus_setup_intr(dev, ires->irq, type,
+                           vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
+                       if (error)
+                               return (error);
+               }
        }
 
-       /* Attempt shared MSIX configuration. */
-       required = 2;
-       if (nmsix >= required) {
-               cnt = required;
-               if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
-                       sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
-                       goto out;
-               }
+       error = vtpci_set_host_msix_vectors(sc);
+       if (error)
+               return (error);
 
-               pci_release_msi(dev);
-       }
+       return (0);
+}
 
-       return (1);
+static int
+vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
+{
+       int error;
 
-out:
-       sc->vtpci_nintr_res = required;
-       sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
+       type |= INTR_MPSAFE;
+       KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
+           ("no interrupt type selected: %#x", sc->vtpci_flags));
 
-       if (bootverbose) {
-               if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
-                       device_printf(dev, "using shared virtqueue MSIX\n");
-               else
-                       device_printf(dev, "using per virtqueue MSIX\n");
-       }
+       error = vtpci_alloc_intr_resources(sc);
+       if (error)
+               return (error);
 
-       return (0);
+       if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
+               error = vtpci_setup_legacy_interrupt(sc, type);
+       else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
+               error = vtpci_setup_msi_interrupt(sc, type);
+       else
+               error = vtpci_setup_msix_interrupts(sc, type);
+
+       return (error);
 }
 
 static int
 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
 {
        device_t dev;
-       uint16_t vector;
+       uint16_t vector, rdvector;
 
        dev = sc->vtpci_dev;
 
-       if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
-           offset != VIRTIO_MSI_QUEUE_VECTOR)
-               return (EINVAL);
-
        if (res_idx != -1) {
-               /* Map from rid to host vector. */
+               /* Map from guest rid to host vector. */
                vector = sc->vtpci_intr_res[res_idx].rid - 1;
        } else
                vector = VIRTIO_MSI_NO_VECTOR;
 
-       /* The first resource is special; make sure it is used correctly. */
+       /*
+        * Assert the first resource is always used for the configuration
+        * changed interrupts.
+        */
        if (res_idx == 0) {
-               KASSERT(vector == 0, ("unexpected config vector"));
-               KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
-                   ("unexpected config offset"));
-       }
+               KASSERT(vector == 0 && offset == VIRTIO_MSI_CONFIG_VECTOR,
+                   ("bad first res use vector:%d offset:%d", vector, offset));
+       } else
+               KASSERT(offset == VIRTIO_MSI_QUEUE_VECTOR, ("bad offset"));
 
        vtpci_write_config_2(sc, offset, vector);
 
-       if (vtpci_read_config_2(sc, offset) != vector) {
-               device_printf(dev, "insufficient host resources for "
-                   "MSIX interrupts\n");
+       /* Read vector to determine if the host had sufficient resources. */
+       rdvector = vtpci_read_config_2(sc, offset);
+       if (rdvector != vector) {
+               device_printf(dev,
+                   "insufficient host resources for MSIX interrupts\n");
                return (ENODEV);
        }
 
        return (0);
 }
 
+static int
+vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
+{
+       struct vtpci_virtqueue *vqx;
+       int idx, error;
+
+       error = vtpci_register_msix_vector(sc, VIRTIO_MSI_CONFIG_VECTOR, 0);
+       if (error)
+               return (error);
+
+       for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
+               vqx = &sc->vtpci_vqx[idx];
+
+               vtpci_select_virtqueue(sc, idx);
+               error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR,
+                   vqx->ires_idx);
+               if (error)
+                       return (error);
+       }
+
+       return (0);
+}
+
+static int

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to