Author: grehan
Date: Wed Jul 17 23:37:33 2013
New Revision: 253440
URL: http://svnweb.freebsd.org/changeset/base/253440

Log:
  Major rework of the virtio code. Split out common parts, and modify
  the net/block devices accordingly.
  
  Submitted by: Chris Torek   torek at torek dot net
  Reviewed by:  grehan

Added:
  head/usr.sbin/bhyve/virtio.c   (contents, props changed)
Modified:
  head/usr.sbin/bhyve/Makefile
  head/usr.sbin/bhyve/pci_virtio_block.c
  head/usr.sbin/bhyve/pci_virtio_net.c
  head/usr.sbin/bhyve/virtio.h

Modified: head/usr.sbin/bhyve/Makefile
==============================================================================
--- head/usr.sbin/bhyve/Makefile        Wed Jul 17 23:29:56 2013        
(r253439)
+++ head/usr.sbin/bhyve/Makefile        Wed Jul 17 23:37:33 2013        
(r253440)
@@ -10,7 +10,7 @@ SRCS= acpi.c atpic.c bhyverun.c consport
 SRCS+=  ioapic.c mem.c mevent.c mptbl.c
 SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
 SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c
-SRCS+= xmsr.c spinup_ap.c
+SRCS+= virtio.c xmsr.c spinup_ap.c
 
 .PATH: ${.CURDIR}/../../sys/amd64/vmm
 SRCS+= vmm_instruction_emul.c

Modified: head/usr.sbin/bhyve/pci_virtio_block.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_block.c      Wed Jul 17 23:29:56 2013        
(r253439)
+++ head/usr.sbin/bhyve/pci_virtio_block.c      Wed Jul 17 23:37:33 2013        
(r253440)
@@ -53,14 +53,6 @@ __FBSDID("$FreeBSD$");
 
 #define VTBLK_RINGSZ   64
 
-#define VTBLK_CFGSZ    28
-
-#define VTBLK_R_CFG            VTCFG_R_CFG1 
-#define VTBLK_R_CFG_END                VTBLK_R_CFG + VTBLK_CFGSZ -1
-#define VTBLK_R_MAX            VTBLK_R_CFG_END
-
-#define VTBLK_REGSZ            VTBLK_R_MAX+1
-
 #define VTBLK_MAXSEGS  32
 
 #define VTBLK_S_OK     0
@@ -71,28 +63,10 @@ __FBSDID("$FreeBSD$");
  */
 #define VTBLK_S_HOSTCAPS      \
   ( 0x00000004 |       /* host maximum request segments */ \
-    0x10000000 )       /* supports indirect descriptors */
-
-static int use_msix = 1;
-
-struct vring_hqueue {
-       /* Internal state */
-       uint16_t        hq_size;
-       uint16_t        hq_cur_aidx;            /* trails behind 'avail_idx' */
-
-        /* Host-context pointers to the queue */
-       struct virtio_desc *hq_dtable;
-       uint16_t        *hq_avail_flags;
-       uint16_t        *hq_avail_idx;          /* monotonically increasing */
-       uint16_t        *hq_avail_ring;
-
-       uint16_t        *hq_used_flags;
-       uint16_t        *hq_used_idx;           /* monotonically increasing */
-       struct virtio_used *hq_used_ring;
-};
+    VIRTIO_RING_F_INDIRECT_DESC )      /* indirect descriptors */
 
 /*
- * Config space
+ * Config space "registers"
  */
 struct vtblk_config {
        uint64_t        vbc_capacity;
@@ -104,7 +78,6 @@ struct vtblk_config {
        uint32_t        vbc_blk_size;
        uint32_t        vbc_sectors_max;
 } __packed;
-CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
 
 /*
  * Fixed-size block header
@@ -129,113 +102,69 @@ static int pci_vtblk_debug;
  * Per-device softc
  */
 struct pci_vtblk_softc {
-       struct pci_devinst *vbsc_pi;
+       struct virtio_softc vbsc_vs;
+       struct vqueue_info vbsc_vq;
        int             vbsc_fd;
-       int             vbsc_status;
-       int             vbsc_isr;
-       int             vbsc_lastq;
-       uint32_t        vbsc_features;
-       uint64_t        vbsc_pfn;
-       struct vring_hqueue vbsc_q;
        struct vtblk_config vbsc_cfg;   
-       uint16_t        msix_table_idx_req;
-       uint16_t        msix_table_idx_cfg;
 };
-#define        vtblk_ctx(sc)   ((sc)->vbsc_pi->pi_vmctx)
-
-/* 
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or 
- * not
- */ 
-static uint64_t
-pci_vtblk_iosize(struct pci_devinst *pi)
-{
-
-       if (pci_msix_enabled(pi)) 
-               return (VTBLK_REGSZ);
-       else
-               return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
 
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
-       uint16_t ndesc;
-
-       /*
-        * We're just computing (a-b) in GF(216).
-        *
-        * The only glitch here is that in standard C,
-        * uint16_t promotes to (signed) int when int has
-        * more than 16 bits (pretty much always now), so
-        * we have to force it back to unsigned.
-        */
-       ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
-       assert(ndesc <= hq->hq_size);
-
-       return (ndesc);
-}
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+       "vtblk",                /* our name */
+       1,                      /* we support 1 virtqueue */
+       sizeof(struct vtblk_config), /* config reg size */
+       pci_vtblk_reset,        /* reset */
+       pci_vtblk_notify,       /* device-wide qnotify */
+       pci_vtblk_cfgread,      /* read PCI config */
+       pci_vtblk_cfgwrite,     /* write PCI config */
+       VTBLK_S_HOSTCAPS,       /* our capabilities */
+};
 
 static void
-pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
+pci_vtblk_reset(void *vsc)
 {
-       if (value == 0) {
-               DPRINTF(("vtblk: device reset requested !\n"));
-               sc->vbsc_isr = 0;
-               sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
-               sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
-               sc->vbsc_features = 0;
-               sc->vbsc_pfn = 0;
-               sc->vbsc_lastq = 0;
-               memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue));
-       }
+       struct pci_vtblk_softc *sc = vsc;
 
-       sc->vbsc_status = value;
+       DPRINTF(("vtblk: device reset requested !\n"));
+       vi_reset_dev(&sc->vbsc_vs);
 }
 
 static void
-pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
 {
-       struct iovec iov[VTBLK_MAXSEGS];
        struct virtio_blk_hdr *vbh;
-       struct virtio_desc *vd, *vid;
-       struct virtio_used *vu;
        uint8_t *status;
-       int i;
+       int i, n;
        int err;
        int iolen;
-       int uidx, aidx, didx;
-       int indirect, writeop, type;
+       int writeop, type;
        off_t offset;
+       struct iovec iov[VTBLK_MAXSEGS + 2];
+       uint16_t flags[VTBLK_MAXSEGS + 2];
 
-       uidx = *hq->hq_used_idx;
-       aidx = hq->hq_cur_aidx;
-       didx = hq->hq_avail_ring[aidx % hq->hq_size];
-       assert(didx >= 0 && didx < hq->hq_size);
-
-       vd = &hq->hq_dtable[didx];
-
-       indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0);
-
-       if (indirect) {
-               vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
-               vd = &vid[0];
-       }
+       n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags);
 
        /*
-        * The first descriptor will be the read-only fixed header
+        * The first descriptor will be the read-only fixed header,
+        * and the last is for status (hence +2 above and below).
+        * The remaining iov's are the actual data I/O vectors.
+        *
+        * XXX - note - this fails on crash dump, which does a
+        * VIRTIO_BLK_T_FLUSH with a zero transfer length
         */
-       vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr,
-                           sizeof(struct virtio_blk_hdr));
-       assert(vd->vd_len == sizeof(struct virtio_blk_hdr));
-       assert(vd->vd_flags & VRING_DESC_F_NEXT);
-       assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0);
+       assert (n >= 3 && n < VTBLK_MAXSEGS + 2);
+
+       assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+       assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+       vbh = iov[0].iov_base;
+
+       status = iov[--n].iov_base;
+       assert(iov[n].iov_len == 1);
+       assert(flags[n] & VRING_DESC_F_WRITE);
 
        /*
         * XXX
@@ -247,120 +176,44 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 
        offset = vbh->vbh_sector * DEV_BSIZE;
 
-       /*
-        * Build up the iovec based on the guest's data descriptors
-        */
-       i = iolen = 0;
-       while (1) {
-               if (indirect)
-                       vd = &vid[i + 1];       /* skip first indirect desc */
-               else
-                       vd = &hq->hq_dtable[vd->vd_next];
-
-               if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
-                       break;
-
-               if (i == VTBLK_MAXSEGS)
-                       break;
-
+       iolen = 0;
+       for (i = 1; i < n; i++) {
                /*
                 * - write op implies read-only descriptor,
                 * - read op implies write-only descriptor,
                 * therefore test the inverse of the descriptor bit
                 * to the op.
                 */
-               assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) ==
-                      writeop);
-
-               iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc),
-                                                  vd->vd_addr,
-                                                  vd->vd_len);
-               iov[i].iov_len = vd->vd_len;
-               iolen += vd->vd_len;
-               i++;
+               assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+               iolen += iov[i].iov_len;
        }
 
-       /* Lastly, get the address of the status byte */
-       status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1);
-       assert(vd->vd_len == 1);
-       assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0);
-       assert(vd->vd_flags & VRING_DESC_F_WRITE);
-
        DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 
-                writeop ? "write" : "read", iolen, i, offset));
+                writeop ? "write" : "read", iolen, i - 1, offset));
 
        if (writeop)
-               err = pwritev(sc->vbsc_fd, iov, i, offset);
+               err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset);
        else
-               err = preadv(sc->vbsc_fd, iov, i, offset);
+               err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset);
 
        *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
 
        /*
-        * Return the single descriptor back to the host
+        * Return the descriptor back to the host.
+        * We wrote 1 byte (our status) to host.
         */
-       vu = &hq->hq_used_ring[uidx % hq->hq_size];
-       vu->vu_idx = didx;
-       vu->vu_tlen = 1;
-       hq->hq_cur_aidx++;
-       *hq->hq_used_idx += 1;
-
-       /*
-        * Generate an interrupt if able
-        */
-       if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 
-               if (use_msix) {
-                       pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req); 
-               } else if (sc->vbsc_isr == 0) {
-                       sc->vbsc_isr = 1;
-                       pci_generate_msi(sc->vbsc_pi, 0);
-               }
-       }
+       vq_relchain(vq, 1);
 }
 
 static void
-pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
 {
-       struct vring_hqueue *hq = &sc->vbsc_q;
-       int ndescs;
+       struct pci_vtblk_softc *sc = vsc;
 
-       while ((ndescs = hq_num_avail(hq)) != 0) {
-               /*
-                * Run through all the entries, placing them into iovecs and
-                * sending when an end-of-packet is found
-                */
-               pci_vtblk_proc(sc, hq);
-       }
-}
-
-static void
-pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
-{
-       struct vring_hqueue *hq;
-
-       sc->vbsc_pfn = pfn << VRING_PFN;
-       
-       /*
-        * Set up host pointers to the various parts of the
-        * queue
-        */
-       hq = &sc->vbsc_q;
-       hq->hq_size = VTBLK_RINGSZ;
-
-       hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
-                                        vring_size(VTBLK_RINGSZ));
-       hq->hq_avail_flags =  (uint16_t *)(hq->hq_dtable + hq->hq_size);
-       hq->hq_avail_idx = hq->hq_avail_flags + 1;
-       hq->hq_avail_ring = hq->hq_avail_flags + 2;
-       hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
-                                                VRING_ALIGN);
-       hq->hq_used_idx = hq->hq_used_flags + 1;
-       hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
-
-       /*
-        * Initialize queue indexes
-        */
-       hq->hq_cur_aidx = 0;
+       vq_startchains(vq);
+       while (vq_has_descs(vq))
+               pci_vtblk_proc(sc, vq);
+       vq_endchains(vq, 1);    /* Generate interrupt if appropriate. */
 }
 
 static int
@@ -371,6 +224,7 @@ pci_vtblk_init(struct vmctx *ctx, struct
        off_t size;     
        int fd;
        int sectsz;
+       int use_msix;
        const char *env_msi;
 
        if (opts == NULL) {
@@ -412,10 +266,14 @@ pci_vtblk_init(struct vmctx *ctx, struct
        sc = malloc(sizeof(struct pci_vtblk_softc));
        memset(sc, 0, sizeof(struct pci_vtblk_softc));
 
-       pi->pi_arg = sc;
-       sc->vbsc_pi = pi;
+       /* record fd of storage device/file */
        sc->vbsc_fd = fd;
 
+       /* init virtio softc and virtqueues */
+       vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+       sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+       /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
        /* setup virtio block config space */
        sc->vbsc_cfg.vbc_capacity = size / sectsz;
        sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
@@ -426,206 +284,51 @@ pci_vtblk_init(struct vmctx *ctx, struct
        sc->vbsc_cfg.vbc_geom_s = 0;
        sc->vbsc_cfg.vbc_sectors_max = 0;
 
-       /* initialize config space */
+       /*
+        * Should we move some of this into virtio.c?  Could
+        * have the device, class, and subdev_0 as fields in
+        * the virtio constants structure.
+        */
        pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
        pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
        pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
        pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
 
+       use_msix = 1;
        if ((env_msi = getenv("BHYVE_USE_MSI"))) {
                if (strcasecmp(env_msi, "yes") == 0)
                        use_msix = 0;
        } 
-
-       if (use_msix) {
-               /* MSI-X Support */
-               sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;  
-               sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;  
-               
-               if (pci_emul_add_msixcap(pi, 2, 1))
-                       return (1);
-       } else {
-               /* MSI Support */       
-               pci_emul_add_msicap(pi, 1);
-       }       
-       
-       pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
-
+       if (vi_intr_init(&sc->vbsc_vs, 1, use_msix))
+               return (1);
+       vi_set_io_bar(&sc->vbsc_vs, 0);
        return (0);
 }
 
-static uint64_t
-vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
-{
-       /*
-        * Device specific offsets used by guest would change 
-        * based on whether MSI-X capability is enabled or not
-        */ 
-       if (!pci_msix_enabled(pi)) {
-               if (offset >= VTCFG_R_MSIX) 
-                       return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-       }
-
-       return (offset);
-}
-
-static void
-pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
-               int baridx, uint64_t offset, int size, uint64_t value)
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
 {
-       struct pci_vtblk_softc *sc = pi->pi_arg;
-
-       if (use_msix) {
-               if (baridx == pci_msix_table_bar(pi) ||
-                   baridx == pci_msix_pba_bar(pi)) {
-                       pci_emul_msix_twrite(pi, offset, size, value);
-                       return;
-               }
-       }
-       
-       assert(baridx == 0);
 
-       if (offset + size > pci_vtblk_iosize(pi)) {
-               DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
-                        offset, size));
-               return;
-       }
-
-       offset = vtblk_adjust_offset(pi, offset);
-       
-       switch (offset) {
-       case VTCFG_R_GUESTCAP:
-               assert(size == 4);
-               sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
-               break;
-       case VTCFG_R_PFN:
-               assert(size == 4);
-               pci_vtblk_ring_init(sc, value);
-               break;
-       case VTCFG_R_QSEL:
-               assert(size == 2);
-               sc->vbsc_lastq = value;
-               break;
-       case VTCFG_R_QNOTIFY:
-               assert(size == 2);
-               assert(value == 0);
-               pci_vtblk_qnotify(sc);
-               break;
-       case VTCFG_R_STATUS:
-               assert(size == 1);
-               pci_vtblk_update_status(sc, value);
-               break;
-       case VTCFG_R_CFGVEC:
-               assert(size == 2);
-               sc->msix_table_idx_cfg = value; 
-               break;  
-       case VTCFG_R_QVEC:
-               assert(size == 2);
-               sc->msix_table_idx_req = value;
-               break;  
-       case VTCFG_R_HOSTCAP:
-       case VTCFG_R_QNUM:
-       case VTCFG_R_ISR:
-       case VTBLK_R_CFG ... VTBLK_R_CFG_END:
-               DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
-               break;
-       default:
-               DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
-               value = 0;
-               break;
-       }
+       DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+       return (1);
 }
 
-uint64_t
-pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
-              int baridx, uint64_t offset, int size)
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
 {
-       struct pci_vtblk_softc *sc = pi->pi_arg;
+       struct pci_vtblk_softc *sc = vsc;
        void *ptr;
-       uint32_t value;
 
-       if (use_msix) {
-               if (baridx == pci_msix_table_bar(pi) ||
-                   baridx == pci_msix_pba_bar(pi)) {
-                       return (pci_emul_msix_tread(pi, offset, size));
-               }
-       }
-
-       assert(baridx == 0);
-
-       if (offset + size > pci_vtblk_iosize(pi)) {
-               DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
-                        offset, size));
-               return (0);
-       }
-
-       offset = vtblk_adjust_offset(pi, offset);
-
-       switch (offset) {
-       case VTCFG_R_HOSTCAP:
-               assert(size == 4);
-               value = VTBLK_S_HOSTCAPS;
-               break;
-       case VTCFG_R_GUESTCAP:
-               assert(size == 4);
-               value = sc->vbsc_features; /* XXX never read ? */
-               break;
-       case VTCFG_R_PFN:
-               assert(size == 4);
-               value = sc->vbsc_pfn >> VRING_PFN;
-               break;
-       case VTCFG_R_QNUM:
-               value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
-               break;
-       case VTCFG_R_QSEL:
-               assert(size == 2);
-               value = sc->vbsc_lastq; /* XXX never read ? */
-               break;
-       case VTCFG_R_QNOTIFY:
-               assert(size == 2);
-               value = 0; /* XXX never read ? */
-               break;
-       case VTCFG_R_STATUS:
-               assert(size == 1);
-               value = sc->vbsc_status;
-               break;
-       case VTCFG_R_ISR:
-               assert(size == 1);
-               value = sc->vbsc_isr;
-               sc->vbsc_isr = 0;     /* a read clears this flag */
-               break;
-       case VTCFG_R_CFGVEC:
-               assert(size == 2);
-               value = sc->msix_table_idx_cfg;
-               break;
-       case VTCFG_R_QVEC:
-               assert(size == 2);
-               value = sc->msix_table_idx_req;
-               break;  
-       case VTBLK_R_CFG ... VTBLK_R_CFG_END:
-               assert(size + offset <= (VTBLK_R_CFG_END + 1));
-               ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
-               if (size == 1) {
-                       value = *(uint8_t *) ptr;
-               } else if (size == 2) {
-                       value = *(uint16_t *) ptr;
-               } else {
-                       value = *(uint32_t *) ptr;
-               }
-               break;
-       default:
-               DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
-               value = 0;
-               break;
-       }
-
-       return (value);
+       /* our caller has already verified offset and size */
+       ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+       memcpy(retval, ptr, size);
+       return (0);
 }
 
 struct pci_devemu pci_de_vblk = {
        .pe_emu =       "virtio-blk",
        .pe_init =      pci_vtblk_init,
-       .pe_barwrite =  pci_vtblk_write,
-       .pe_barread =   pci_vtblk_read
+       .pe_barwrite =  vi_pci_write,
+       .pe_barread =   vi_pci_read
 };
 PCI_EMUL_SET(pci_de_vblk);

Modified: head/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_net.c        Wed Jul 17 23:29:56 2013        
(r253439)
+++ head/usr.sbin/bhyve/pci_virtio_net.c        Wed Jul 17 23:37:33 2013        
(r253440)
@@ -59,56 +59,49 @@ __FBSDID("$FreeBSD$");
 #define VTNET_MAXSEGS  32
 
 /*
- * PCI config-space register offsets
+ * Host capabilities.  Note that we only offer a few of these.
  */
-#define VTNET_R_CFG0   24
-#define VTNET_R_CFG1   25
-#define VTNET_R_CFG2   26
-#define VTNET_R_CFG3   27
-#define VTNET_R_CFG4   28
-#define VTNET_R_CFG5   29
-#define VTNET_R_CFG6   30
-#define VTNET_R_CFG7   31
-#define VTNET_R_MAX    31
+#define        VIRTIO_NET_F_CSUM       (1 <<  0) /* host handles partial cksum 
*/
+#define        VIRTIO_NET_F_GUEST_CSUM (1 <<  1) /* guest handles partial 
cksum */
+#define        VIRTIO_NET_F_MAC        (1 <<  5) /* host supplies MAC */
+#define        VIRTIO_NET_F_GSO_DEPREC (1 <<  6) /* deprecated: host handles 
GSO */
+#define        VIRTIO_NET_F_GUEST_TSO4 (1 <<  7) /* guest can rcv TSOv4 */
+#define        VIRTIO_NET_F_GUEST_TSO6 (1 <<  8) /* guest can rcv TSOv6 */
+#define        VIRTIO_NET_F_GUEST_ECN  (1 <<  9) /* guest can rcv TSO with ECN 
*/
+#define        VIRTIO_NET_F_GUEST_UFO  (1 << 10) /* guest can rcv UFO */
+#define        VIRTIO_NET_F_HOST_TSO4  (1 << 11) /* host can rcv TSOv4 */
+#define        VIRTIO_NET_F_HOST_TSO6  (1 << 12) /* host can rcv TSOv6 */
+#define        VIRTIO_NET_F_HOST_ECN   (1 << 13) /* host can rcv TSO with ECN 
*/
+#define        VIRTIO_NET_F_HOST_UFO   (1 << 14) /* host can rcv UFO */
+#define        VIRTIO_NET_F_MRG_RXBUF  (1 << 15) /* host can merge RX buffers 
*/
+#define        VIRTIO_NET_F_STATUS     (1 << 16) /* config status field 
available */
+#define        VIRTIO_NET_F_CTRL_VQ    (1 << 17) /* control channel available 
*/
+#define        VIRTIO_NET_F_CTRL_RX    (1 << 18) /* control channel RX mode 
support */
+#define        VIRTIO_NET_F_CTRL_VLAN  (1 << 19) /* control channel VLAN 
filtering */
+#define        VIRTIO_NET_F_GUEST_ANNOUNCE \
+                               (1 << 21) /* guest can send gratuitous pkts */
 
-#define VTNET_REGSZ    VTNET_R_MAX+1
+#define VTNET_S_HOSTCAPS      \
+  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+    VIRTIO_F_NOTIFY_ON_EMPTY)
 
 /*
- * Host capabilities
+ * PCI config-space "registers"
  */
-#define VTNET_S_HOSTCAPS      \
-  ( 0x00000020 |       /* host supplies MAC */ \
-    0x00008000 |       /* host can merge Rx buffers */ \
-    0x00010000 |       /* config status available */ \
-    VIRTIO_F_NOTIFY_ON_EMPTY)
+struct virtio_net_config {
+       uint8_t  mac[6];
+       uint16_t status;
+} __packed;
 
 /*
  * Queue definitions.
  */
 #define VTNET_RXQ      0
 #define VTNET_TXQ      1
-#define VTNET_CTLQ     2
+#define VTNET_CTLQ     2       /* NB: not yet supported */
 
 #define VTNET_MAXQ     3
 
-static int use_msix = 1;
-
-struct vring_hqueue {
-       /* Internal state */
-       uint16_t        hq_size;
-       uint16_t        hq_cur_aidx;            /* trails behind 'avail_idx' */
-
-        /* Host-context pointers to the queue */
-       struct virtio_desc *hq_dtable;
-       uint16_t        *hq_avail_flags;
-       uint16_t        *hq_avail_idx;          /* monotonically increasing */
-       uint16_t        *hq_avail_ring;
-
-       uint16_t        *hq_used_flags;
-       uint16_t        *hq_used_idx;           /* monotonically increasing */
-       struct virtio_used *hq_used_ring;
-};
-
 /*
  * Fixed network header size
  */
@@ -133,23 +126,17 @@ static int pci_vtnet_debug;
  * Per-device softc
  */
 struct pci_vtnet_softc {
-       struct pci_devinst *vsc_pi;
+       struct virtio_softc vsc_vs;
+       struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
        pthread_mutex_t vsc_mtx;
        struct mevent   *vsc_mevp;
 
-       int             vsc_curq;
-       int             vsc_status;
-       int             vsc_isr;
        int             vsc_tapfd;
        int             vsc_rx_ready;
-       int             resetting;
+       volatile int    resetting;      /* set and checked outside lock */
 
        uint32_t        vsc_features;
-       uint8_t         vsc_macaddr[6];
-
-       uint64_t        vsc_pfn[VTNET_MAXQ];
-       struct  vring_hqueue vsc_hq[VTNET_MAXQ];
-       uint16_t        vsc_msix_table_idx[VTNET_MAXQ];
+       struct virtio_net_config vsc_config;
 
        pthread_mutex_t rx_mtx;
        int             rx_in_progress;
@@ -159,73 +146,22 @@ struct pci_vtnet_softc {
        pthread_cond_t  tx_cond;
        int             tx_in_progress;
 };
-#define        vtnet_ctx(sc)           ((sc)->vsc_pi->pi_vmctx)
-#define        notify_on_empty(sc)     ((sc)->vsc_features & 
VIRTIO_F_NOTIFY_ON_EMPTY)
-
-/*
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or
- * not.
- */
-static uint64_t
-pci_vtnet_iosize(struct pci_devinst *pi)
-{
-       if (pci_msix_enabled(pi))
-               return (VTNET_REGSZ);
-       else
-               return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
-
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
-       uint16_t ndesc;
 
-       /*
-        * We're just computing (a-b) mod 2^16
-        *
-        * The only glitch here is that in standard C,
-        * uint16_t promotes to (signed) int when int has
-        * more than 16 bits (pretty much always now), so
-        * we have to force it back to unsigned.
-        */
-       ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
-       assert(ndesc <= hq->hq_size);
-
-       return (ndesc);
-}
-
-static uint16_t
-pci_vtnet_qsize(int qnum)
-{
-       /* XXX no ctl queue currently */
-       if (qnum == VTNET_CTLQ) {
-               return (0);
-       }
-
-       /* XXX fixed currently. Maybe different for tx/rx/ctl */
-       return (VTNET_RINGSZ);
-}
-
-static void
-pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring)
-{
-       struct vring_hqueue *hq;
-
-       assert(ring < VTNET_MAXQ);
-
-       hq = &sc->vsc_hq[ring];
-
-       /*
-        * Reset all soft state
-        */
-       hq->hq_cur_aidx = 0;
-}
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+       "vtnet",                /* our name */
+       VTNET_MAXQ - 1,         /* we currently support 2 virtqueues */
+       sizeof(struct virtio_net_config), /* config reg size */
+       pci_vtnet_reset,        /* reset */
+       NULL,                   /* device-wide qnotify -- not used */
+       pci_vtnet_cfgread,      /* read PCI config */
+       pci_vtnet_cfgwrite,     /* write PCI config */
+       VTNET_S_HOSTCAPS,       /* our capabilities */
+};
 
 /*
  * If the transmit thread is active then stall until it is done.
@@ -260,48 +196,27 @@ pci_vtnet_rxwait(struct pci_vtnet_softc 
 }
 
 static void
-pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
+pci_vtnet_reset(void *vsc)
 {
-       int i;
-
-       if (value == 0) {
-               DPRINTF(("vtnet: device reset requested !\n"));
-               
-               sc->resetting = 1;
-
-               /*
-                * Wait for the transmit and receive threads to finish their
-                * processing.
-                */
-               pci_vtnet_txwait(sc);
-               pci_vtnet_rxwait(sc);
+       struct pci_vtnet_softc *sc = vsc;
 
-               sc->vsc_rx_ready = 0;
-               pci_vtnet_ring_reset(sc, VTNET_RXQ);
-               pci_vtnet_ring_reset(sc, VTNET_TXQ);
+       DPRINTF(("vtnet: device reset requested !\n"));
 
-               for (i = 0; i < VTNET_MAXQ; i++)
-                       sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR;
+       sc->resetting = 1;
 
-               sc->vsc_isr = 0;
-               sc->vsc_features = 0;
+       /*
+        * Wait for the transmit and receive threads to finish their
+        * processing.
+        */
+       pci_vtnet_txwait(sc);
+       pci_vtnet_rxwait(sc);
 
-               sc->resetting = 0;
-       }
+       sc->vsc_rx_ready = 0;
 
-       sc->vsc_status = value;
-}
+       /* now reset rings, MSI-X vectors, and negotiated capabilities */
+       vi_reset_dev(&sc->vsc_vs);
 
-static void
-vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx)
-{
-
-       if (use_msix) {
-               pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]);
-       } else {
-               sc->vsc_isr |= 1;
-               pci_generate_msi(sc->vsc_pi, 0);
-       }
+       sc->resetting = 0;
 }
 
 /*
@@ -311,7 +226,7 @@ static void
 pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
                 int len)
 {
-       char pad[60];
+       static char pad[60]; /* all zero bytes */
 
        if (sc->vsc_tapfd == -1)
                return;
@@ -322,7 +237,6 @@ pci_vtnet_tap_tx(struct pci_vtnet_softc 
         * there is always an extra iov available by the caller.
         */
        if (len < 60) {
-               memset(pad, 0, 60 - len);
                iov[iovcnt].iov_base = pad;
                iov[iovcnt].iov_len = 60 - len;
                iovcnt++;
@@ -342,15 +256,11 @@ static uint8_t dummybuf[2048];
 static void
 pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
 {
-       struct virtio_desc *vd;
-       struct virtio_used *vu;
-       struct vring_hqueue *hq;
+       struct vqueue_info *vq;
        struct virtio_net_rxhdr *vrx;
        uint8_t *buf;
-       int i;
        int len;
-       int ndescs;
-       int didx, uidx, aidx;   /* descriptor, avail and used index */
+       struct iovec iov;
 
        /*
         * Should never be called without a valid tap fd
@@ -370,47 +280,45 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc 
        }
 
        /*
-        * Calculate the number of available rx buffers
+        * Check for available rx buffers
         */
-       hq = &sc->vsc_hq[VTNET_RXQ];
-
-       ndescs = hq_num_avail(hq);
-
-       if (ndescs == 0) {
+       vq = &sc->vsc_queues[VTNET_RXQ];
+       vq_startchains(vq);
+       if (!vq_has_descs(vq)) {
                /*
-                * Drop the packet and try later
+                * Drop the packet and try later.  Interrupt on
+                * empty, if that's negotiated.
                 */
                (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
-
-               if (notify_on_empty(sc))
-                       vtnet_generate_interrupt(sc, VTNET_RXQ);
-
+               vq_endchains(vq, 1);
                return;
        }
 
-       aidx = hq->hq_cur_aidx;
-       uidx = *hq->hq_used_idx;
-       for (i = 0; i < ndescs; i++) {
+       do {
                /*
-                * 'aidx' indexes into the an array of descriptor indexes
+                * Get descriptor chain, which should have just
+                * one descriptor in it.
+                * ??? allow guests to use multiple descs?
                 */
-               didx = hq->hq_avail_ring[aidx % hq->hq_size];
-               assert(didx >= 0 && didx < hq->hq_size);
-
-               vd = &hq->hq_dtable[didx];
+               assert(vq_getchain(vq, &iov, 1, NULL) == 1);
 
                /*
                 * Get a pointer to the rx header, and use the
                 * data immediately following it for the packet buffer.
                 */
-               vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len);
+               vrx = iov.iov_base;
                buf = (uint8_t *)(vrx + 1);
 
                len = read(sc->vsc_tapfd, buf,
-                          vd->vd_len - sizeof(struct virtio_net_rxhdr));
+                          iov.iov_len - sizeof(struct virtio_net_rxhdr));
 
                if (len < 0 && errno == EWOULDBLOCK) {
-                       break;
+                       /*
+                        * No more packets, but still some avail ring
+                        * entries.  Interrupt if needed/appropriate.
+                        */
+                       vq_endchains(vq, 0);
+                       return;
                }
 
                /*
@@ -422,23 +330,13 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc 
                vrx->vrh_bufs = 1;
 
                /*
-                * Write this descriptor into the used ring
+                * Release this chain and handle more chains.
                 */
-               vu = &hq->hq_used_ring[uidx % hq->hq_size];
-               vu->vu_idx = didx;
-               vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
-               uidx++;
-               aidx++;
-       }
+               vq_relchain(vq, len + sizeof(struct virtio_net_rxhdr));
+       } while (vq_has_descs(vq));

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to