virtio is currently pretty sloppy about accessing guest memory directly.  This
is problematic both because phys_ram_base + PA is not only valid but also
because it requires the host and guest both be of the same endianness.

This patch converts virtio to use the QEMU st/ld functions to manipulate the
ring queues.  I've tested both virtio-blk and virtio-net.  I've also confirm
that performance doesn't regress with virtio-net.

We still use phys_ram_base to create the struct iovec to handle IO operations.
I'm still trying to think of a way to eliminate that that doesn't involve an
unnecessary copy or a full blown DMA API.

This patch should make it easier to use virtio with QEMU (in the absence of
KVM).

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 3d54c4e..e1b0aad 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -114,7 +114,7 @@ static void virtio_net_receive(void *opaque, const uint8_t 
*buf, int size)
     int offset, i;
 
     /* FIXME: the drivers really need to set their status better */
-    if (n->rx_vq->vring.avail == NULL) {
+    if (!virtio_ring_inited(n->rx_vq)) {
        n->can_receive = 0;
        return;
     }
@@ -174,7 +174,7 @@ void virtio_net_poll(void)
                 continue;
 
             /* FIXME: the drivers really need to set their status better */
-            if (vnet->rx_vq->vring.avail == NULL) {
+            if (!virtio_ring_inited(vnet->rx_vq)) {
                 vnet->can_receive = 0;
                 continue;
             }
@@ -257,8 +257,8 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, 
VirtQueue *vq)
     VirtIONet *n = to_virtio_net(vdev);
 
     if (n->tx_timer_active &&
-       (vq->vring.avail->idx - vq->last_avail_idx) == 64) {
-       vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+       virtio_ring_avail_size(vq) == 64) {
+       virtio_ring_set_used_notify(vq, 0);
        qemu_del_timer(n->tx_timer);
        n->tx_timer_active = 0;
        virtio_net_flush_tx(n, vq);
@@ -266,7 +266,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, 
VirtQueue *vq)
        qemu_mod_timer(n->tx_timer,
                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
        n->tx_timer_active = 1;
-       vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+       virtio_ring_set_used_notify(vq, 1);
     }
 }
 
@@ -280,7 +280,7 @@ static void virtio_net_tx_timer(void *opaque)
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
         return;
 
-    n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+    virtio_ring_set_used_notify(n->tx_vq, 0);
     virtio_net_flush_tx(n, n->tx_vq);
 }
 
diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 9100bb1..5a905b1 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -17,6 +17,37 @@
 #include "virtio.h"
 #include "sysemu.h"
 
+/* from Linux's linux/virtio_ring.h */
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT      1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE     2
+
+/* This means don't notify other side when buffer added. */
+#define VRING_USED_F_NO_NOTIFY 1
+/* This means don't interrupt guest when buffer consumed. */
+#define VRING_AVAIL_F_NO_INTERRUPT     1
+
+#define VIRTIO_PCI_QUEUE_MAX   16
+
+typedef struct VRing
+{
+    unsigned int num;
+    target_phys_addr_t desc;
+    target_phys_addr_t avail;
+    target_phys_addr_t used;
+} VRing;
+
+struct VirtQueue
+{
+    VRing vring;
+    uint32_t pfn;
+    uint16_t last_avail_idx;
+    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+    int index;
+};
+
 /* from Linux's linux/virtio_pci.h */
 
 /* A 32-bit r/o bitmask of the features supported by the host */
@@ -58,11 +89,74 @@
 
 /* virt queue functions */
 
-static void virtqueue_init(VirtQueue *vq, void *p)
+static void virtqueue_init(VirtQueue *vq, target_phys_addr_t p)
 {
     vq->vring.desc = p;
-    vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
-    vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned 
long)&vq->vring.avail->ring[vq->vring.num]);
+    vq->vring.avail = p + vq->vring.num * 16;
+    vq->vring.used = vq->vring.avail + 2 * (2 + vq->vring.num);
+    vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.used);
+}
+
+static uint64_t vring_desc_addr(VirtQueue *vq, unsigned int i)
+{
+    return ldq_phys(vq->vring.desc + i * 16 + 0);
+}
+
+static uint32_t vring_desc_len(VirtQueue *vq, unsigned int i)
+{
+    return ldl_phys(vq->vring.desc + i * 16 + 8);
+}
+
+static uint16_t vring_desc_flags(VirtQueue *vq, unsigned int i)
+{
+    return lduw_phys(vq->vring.desc + i * 16 + 12);
+}
+
+static uint16_t vring_desc_next(VirtQueue *vq, unsigned int i)
+{
+    return lduw_phys(vq->vring.desc + i * 16 + 14);
+}
+
+static uint16_t vring_avail_flags(VirtQueue *vq)
+{
+    return lduw_phys(vq->vring.avail + 0);
+}
+
+static uint16_t vring_avail_idx(VirtQueue *vq)
+{
+    return lduw_phys(vq->vring.avail + 2);
+}
+
+static uint16_t vring_avail_ring(VirtQueue *vq, unsigned int i)
+{
+    return lduw_phys(vq->vring.avail + 4 + i * 2);
+}
+
+static void vring_used_set_flag(VirtQueue *vq, uint16_t flag)
+{
+    stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) | flag);
+}
+
+static void vring_used_unset_flag(VirtQueue *vq, uint16_t flag)
+{
+    stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) & ~flag);
+}
+
+static uint16_t vring_used_get_idx(VirtQueue *vq)
+{
+    return lduw_phys(vq->vring.used + 2);
+}
+
+static void vring_used_set_idx(VirtQueue *vq, uint16_t value)
+{
+    stw_phys(vq->vring.used + 2, value);
+}
+
+static void vring_used_set_ring(VirtQueue *vq, unsigned int i,
+                               uint32_t id, uint32_t len)
+{
+    stl_phys(vq->vring.used + 4 + i * 8 + 0, id);
+    stl_phys(vq->vring.used + 4 + i * 8 + 4, len);
 }
 
 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
@@ -70,11 +164,11 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, 
unsigned int i)
     unsigned int next;
 
     /* If this descriptor says it doesn't chain, we're done. */
-    if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
+    if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
        return vq->vring.num;
 
     /* Check they're not leading us off end of descriptors. */
-    next = vq->vring.desc[i].next;
+    next = vring_desc_next(vq, i);
     /* Make sure compiler knows to grab that: we don't want it changing! */
     wmb();
 
@@ -87,15 +181,12 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, 
unsigned int i)
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len)
 {
-    VRingUsedElem *used;
+    uint16_t idx;
 
-    /* Get a pointer to the next entry in the used ring. */
-    used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
-    used->id = elem->index;
-    used->len = len;
-    /* Make sure buffer is written before we update index. */
+    idx = vring_used_get_idx(vq);
+    vring_used_set_ring(vq, idx % vq->vring.num, elem->index, len);
     wmb();
-    vq->vring.used->idx++;
+    vring_used_set_idx(vq, idx + 1);
 }
 
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
@@ -104,17 +195,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     unsigned int position;
 
     /* Check it isn't doing very strange things with descriptor numbers. */
-    if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
+    if ((uint16_t)(vring_avail_idx(vq) - vq->last_avail_idx) > vq->vring.num)
        errx(1, "Guest moved used index from %u to %u",
-            vq->last_avail_idx, vq->vring.avail->idx);
+            vq->last_avail_idx, vring_avail_idx(vq));
 
     /* If there's nothing new since last we looked, return invalid. */
-    if (vq->vring.avail->idx == vq->last_avail_idx)
+    if (vring_avail_idx(vq) == vq->last_avail_idx)
        return 0;
 
     /* Grab the next descriptor number they're advertising, and increment
      * the index we've seen. */
-    head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
+    head = vring_avail_ring(vq, vq->last_avail_idx++ % vq->vring.num);
 
     /* If their number is silly, that's a fatal mistake. */
     if (head >= vq->vring.num)
@@ -127,17 +218,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     do {
        struct iovec *sg;
 
-       if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
+       if ((vring_desc_addr(vq, i) + vring_desc_len(vq, i)) > ram_size)
            errx(1, "Guest sent invalid pointer");
 
-       if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+       if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)
            sg = &elem->in_sg[elem->in_num++];
        else
            sg = &elem->out_sg[elem->out_num++];
 
        /* Grab the first descriptor, and check it's OK. */
-       sg->iov_len = vq->vring.desc[i].len;
-       sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
+       sg->iov_len = vring_desc_len(vq, i);
+       sg->iov_base = phys_ram_base + vring_desc_addr(vq, i);
 
        /* If we've got too many, that implies a descriptor loop. */
        if ((elem->in_num + elem->out_num) > vq->vring.num)
@@ -172,9 +263,9 @@ void virtio_reset(void *opaque)
     vdev->isr = 0;
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
-        vdev->vq[i].vring.desc = NULL;
-        vdev->vq[i].vring.avail = NULL;
-        vdev->vq[i].vring.used = NULL;
+        vdev->vq[i].vring.desc = 0;
+        vdev->vq[i].vring.avail = 0;
+        vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pfn = 0;
     }
@@ -199,7 +290,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
        if (pa == 0) {
             virtio_reset(vdev);
        } else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
-           virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
+           virtqueue_init(&vdev->vq[vdev->queue_sel], pa);
            /* FIXME if pa == 0, deal with device tear down */
        }
        break;
@@ -386,14 +477,32 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
queue_size,
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     /* Always notify when queue is empty */
-    if (vq->vring.avail->idx != vq->last_avail_idx &&
-       (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+    if (vring_avail_idx(vq) != vq->last_avail_idx &&
+       (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT))
        return;
 
     vdev->isr = 1;
     virtio_update_irq(vdev);
 }
 
+void virtio_ring_set_used_notify(VirtQueue *vq, int enable)
+{
+    if (enable)
+       vring_used_set_flag(vq, VRING_USED_F_NO_NOTIFY);
+    else
+       vring_used_unset_flag(vq, VRING_USED_F_NO_NOTIFY);
+}
+
+size_t virtio_ring_avail_size(VirtQueue *vq)
+{
+    return vring_avail_idx(vq) - vq->last_avail_idx;
+}
+
+int virtio_ring_inited(VirtQueue *vq)
+{
+    return (vq->vring.avail != 0);
+}
+
 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
                              uint16_t vendor, uint16_t device,
                              uint16_t subvendor, uint16_t subdevice,
@@ -413,7 +522,7 @@ VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
-    memset(vdev->vq, 0, sizeof(vdev->vq));
+    vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     config = pci_dev->config;
     config[0x00] = vendor & 0xFF;
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index dee97ba..8291bbd 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -30,66 +30,9 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED         0x80
 
-/* from Linux's linux/virtio_ring.h */
-
-/* This marks a buffer as continuing via the next field. */
-#define VRING_DESC_F_NEXT      1
-/* This marks a buffer as write-only (otherwise read-only). */
-#define VRING_DESC_F_WRITE     2
-
-/* This means don't notify other side when buffer added. */
-#define VRING_USED_F_NO_NOTIFY 1
-/* This means don't interrupt guest when buffer consumed. */
-#define VRING_AVAIL_F_NO_INTERRUPT     1
-
 typedef struct VirtQueue VirtQueue;
 typedef struct VirtIODevice VirtIODevice;
 
-typedef struct VRingDesc
-{
-    uint64_t addr;
-    uint32_t len;
-    uint16_t flags;
-    uint16_t next;
-} VRingDesc;
-
-typedef struct VRingAvail
-{
-    uint16_t flags;
-    uint16_t idx;
-    uint16_t ring[0];
-} VRingAvail;
-
-typedef struct VRingUsedElem
-{
-    uint32_t id;
-    uint32_t len;
-} VRingUsedElem;
-
-typedef struct VRingUsed
-{
-    uint16_t flags;
-    uint16_t idx;
-    VRingUsedElem ring[0];
-} VRingUsed;
-
-typedef struct VRing
-{
-    unsigned int num;
-    VRingDesc *desc;
-    VRingAvail *avail;
-    VRingUsed *used;
-} VRing;
-
-struct VirtQueue
-{
-    VRing vring;
-    uint32_t pfn;
-    uint16_t last_avail_idx;
-    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
-    int index;
-};
-
 #define VIRTQUEUE_MAX_SIZE 1024
 
 typedef struct VirtQueueElement
@@ -101,8 +44,6 @@ typedef struct VirtQueueElement
     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 } VirtQueueElement;
 
-#define VIRTIO_PCI_QUEUE_MAX   16
-
 struct VirtIODevice
 {
     PCIDevice pci_dev;
@@ -119,7 +60,7 @@ struct VirtIODevice
     uint32_t (*get_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
     void (*update_config)(VirtIODevice *vdev, uint8_t *config);
-    VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
+    VirtQueue *vq;
 };
 
 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
@@ -140,4 +81,10 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
 
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
+void virtio_ring_set_used_notify(VirtQueue *vq, int enable);
+
+size_t virtio_ring_avail_size(VirtQueue *vq);
+
+int virtio_ring_inited(VirtQueue *vq);
+
 #endif

-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to