virtio is currently pretty sloppy about accessing guest memory directly. This
is problematic both because phys_ram_base + PA is not only valid but also
because it requires the host and guest both be of the same endianness.
This patch converts virtio to use the QEMU st/ld functions to manipulate the
ring queues. I've tested both virtio-blk and virtio-net. I've also confirm
that performance doesn't regress with virtio-net.
We still use phys_ram_base to create the struct iovec to handle IO operations.
I'm still trying to think of a way to eliminate that that doesn't involve an
unnecessary copy or a full blown DMA API.
This patch should make it easier to use virtio with QEMU (in the absence of
KVM).
Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 3d54c4e..e1b0aad 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -114,7 +114,7 @@ static void virtio_net_receive(void *opaque, const uint8_t
*buf, int size)
int offset, i;
/* FIXME: the drivers really need to set their status better */
- if (n->rx_vq->vring.avail == NULL) {
+ if (!virtio_ring_inited(n->rx_vq)) {
n->can_receive = 0;
return;
}
@@ -174,7 +174,7 @@ void virtio_net_poll(void)
continue;
/* FIXME: the drivers really need to set their status better */
- if (vnet->rx_vq->vring.avail == NULL) {
+ if (!virtio_ring_inited(vnet->rx_vq)) {
vnet->can_receive = 0;
continue;
}
@@ -257,8 +257,8 @@ static void virtio_net_handle_tx(VirtIODevice *vdev,
VirtQueue *vq)
VirtIONet *n = to_virtio_net(vdev);
if (n->tx_timer_active &&
- (vq->vring.avail->idx - vq->last_avail_idx) == 64) {
- vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ virtio_ring_avail_size(vq) == 64) {
+ virtio_ring_set_used_notify(vq, 0);
qemu_del_timer(n->tx_timer);
n->tx_timer_active = 0;
virtio_net_flush_tx(n, vq);
@@ -266,7 +266,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev,
VirtQueue *vq)
qemu_mod_timer(n->tx_timer,
qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
n->tx_timer_active = 1;
- vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ virtio_ring_set_used_notify(vq, 1);
}
}
@@ -280,7 +280,7 @@ static void virtio_net_tx_timer(void *opaque)
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
- n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ virtio_ring_set_used_notify(n->tx_vq, 0);
virtio_net_flush_tx(n, n->tx_vq);
}
diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 9100bb1..5a905b1 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -17,6 +17,37 @@
#include "virtio.h"
#include "sysemu.h"
+/* from Linux's linux/virtio_ring.h */
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT 1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE 2
+
+/* This means don't notify other side when buffer added. */
+#define VRING_USED_F_NO_NOTIFY 1
+/* This means don't interrupt guest when buffer consumed. */
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+#define VIRTIO_PCI_QUEUE_MAX 16
+
+typedef struct VRing
+{
+ unsigned int num;
+ target_phys_addr_t desc;
+ target_phys_addr_t avail;
+ target_phys_addr_t used;
+} VRing;
+
+struct VirtQueue
+{
+ VRing vring;
+ uint32_t pfn;
+ uint16_t last_avail_idx;
+ void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+ int index;
+};
+
/* from Linux's linux/virtio_pci.h */
/* A 32-bit r/o bitmask of the features supported by the host */
@@ -58,11 +89,74 @@
/* virt queue functions */
-static void virtqueue_init(VirtQueue *vq, void *p)
+static void virtqueue_init(VirtQueue *vq, target_phys_addr_t p)
{
vq->vring.desc = p;
- vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
- vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned
long)&vq->vring.avail->ring[vq->vring.num]);
+ vq->vring.avail = p + vq->vring.num * 16;
+ vq->vring.used = vq->vring.avail + 2 * (2 + vq->vring.num);
+ vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.used);
+}
+
+static uint64_t vring_desc_addr(VirtQueue *vq, unsigned int i)
+{
+ return ldq_phys(vq->vring.desc + i * 16 + 0);
+}
+
+static uint32_t vring_desc_len(VirtQueue *vq, unsigned int i)
+{
+ return ldl_phys(vq->vring.desc + i * 16 + 8);
+}
+
+static uint16_t vring_desc_flags(VirtQueue *vq, unsigned int i)
+{
+ return lduw_phys(vq->vring.desc + i * 16 + 12);
+}
+
+static uint16_t vring_desc_next(VirtQueue *vq, unsigned int i)
+{
+ return lduw_phys(vq->vring.desc + i * 16 + 14);
+}
+
+static uint16_t vring_avail_flags(VirtQueue *vq)
+{
+ return lduw_phys(vq->vring.avail + 0);
+}
+
+static uint16_t vring_avail_idx(VirtQueue *vq)
+{
+ return lduw_phys(vq->vring.avail + 2);
+}
+
+static uint16_t vring_avail_ring(VirtQueue *vq, unsigned int i)
+{
+ return lduw_phys(vq->vring.avail + 4 + i * 2);
+}
+
+static void vring_used_set_flag(VirtQueue *vq, uint16_t flag)
+{
+ stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) | flag);
+}
+
+static void vring_used_unset_flag(VirtQueue *vq, uint16_t flag)
+{
+ stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) & ~flag);
+}
+
+static uint16_t vring_used_get_idx(VirtQueue *vq)
+{
+ return lduw_phys(vq->vring.used + 2);
+}
+
+static void vring_used_set_idx(VirtQueue *vq, uint16_t value)
+{
+ stw_phys(vq->vring.used + 2, value);
+}
+
+static void vring_used_set_ring(VirtQueue *vq, unsigned int i,
+ uint32_t id, uint32_t len)
+{
+ stl_phys(vq->vring.used + 4 + i * 8 + 0, id);
+ stl_phys(vq->vring.used + 4 + i * 8 + 4, len);
}
static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
@@ -70,11 +164,11 @@ static unsigned virtqueue_next_desc(VirtQueue *vq,
unsigned int i)
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
- if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
+ if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
return vq->vring.num;
/* Check they're not leading us off end of descriptors. */
- next = vq->vring.desc[i].next;
+ next = vring_desc_next(vq, i);
/* Make sure compiler knows to grab that: we don't want it changing! */
wmb();
@@ -87,15 +181,12 @@ static unsigned virtqueue_next_desc(VirtQueue *vq,
unsigned int i)
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
- VRingUsedElem *used;
+ uint16_t idx;
- /* Get a pointer to the next entry in the used ring. */
- used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
- used->id = elem->index;
- used->len = len;
- /* Make sure buffer is written before we update index. */
+ idx = vring_used_get_idx(vq);
+ vring_used_set_ring(vq, idx % vq->vring.num, elem->index, len);
wmb();
- vq->vring.used->idx++;
+ vring_used_set_idx(vq, idx + 1);
}
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
@@ -104,17 +195,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
unsigned int position;
/* Check it isn't doing very strange things with descriptor numbers. */
- if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
+ if ((uint16_t)(vring_avail_idx(vq) - vq->last_avail_idx) > vq->vring.num)
errx(1, "Guest moved used index from %u to %u",
- vq->last_avail_idx, vq->vring.avail->idx);
+ vq->last_avail_idx, vring_avail_idx(vq));
/* If there's nothing new since last we looked, return invalid. */
- if (vq->vring.avail->idx == vq->last_avail_idx)
+ if (vring_avail_idx(vq) == vq->last_avail_idx)
return 0;
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
+ head = vring_avail_ring(vq, vq->last_avail_idx++ % vq->vring.num);
/* If their number is silly, that's a fatal mistake. */
if (head >= vq->vring.num)
@@ -127,17 +218,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
do {
struct iovec *sg;
- if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
+ if ((vring_desc_addr(vq, i) + vring_desc_len(vq, i)) > ram_size)
errx(1, "Guest sent invalid pointer");
- if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+ if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)
sg = &elem->in_sg[elem->in_num++];
else
sg = &elem->out_sg[elem->out_num++];
/* Grab the first descriptor, and check it's OK. */
- sg->iov_len = vq->vring.desc[i].len;
- sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
+ sg->iov_len = vring_desc_len(vq, i);
+ sg->iov_base = phys_ram_base + vring_desc_addr(vq, i);
/* If we've got too many, that implies a descriptor loop. */
if ((elem->in_num + elem->out_num) > vq->vring.num)
@@ -172,9 +263,9 @@ void virtio_reset(void *opaque)
vdev->isr = 0;
for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- vdev->vq[i].vring.desc = NULL;
- vdev->vq[i].vring.avail = NULL;
- vdev->vq[i].vring.used = NULL;
+ vdev->vq[i].vring.desc = 0;
+ vdev->vq[i].vring.avail = 0;
+ vdev->vq[i].vring.used = 0;
vdev->vq[i].last_avail_idx = 0;
vdev->vq[i].pfn = 0;
}
@@ -199,7 +290,7 @@ static void virtio_ioport_write(void *opaque, uint32_t
addr, uint32_t val)
if (pa == 0) {
virtio_reset(vdev);
} else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
- virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
+ virtqueue_init(&vdev->vq[vdev->queue_sel], pa);
/* FIXME if pa == 0, deal with device tear down */
}
break;
@@ -386,14 +477,32 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int
queue_size,
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
/* Always notify when queue is empty */
- if (vq->vring.avail->idx != vq->last_avail_idx &&
- (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ if (vring_avail_idx(vq) != vq->last_avail_idx &&
+ (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT))
return;
vdev->isr = 1;
virtio_update_irq(vdev);
}
+void virtio_ring_set_used_notify(VirtQueue *vq, int enable)
+{
+ if (enable)
+ vring_used_set_flag(vq, VRING_USED_F_NO_NOTIFY);
+ else
+ vring_used_unset_flag(vq, VRING_USED_F_NO_NOTIFY);
+}
+
+size_t virtio_ring_avail_size(VirtQueue *vq)
+{
+ return vring_avail_idx(vq) - vq->last_avail_idx;
+}
+
+int virtio_ring_inited(VirtQueue *vq)
+{
+ return (vq->vring.avail != 0);
+}
+
VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
uint16_t vendor, uint16_t device,
uint16_t subvendor, uint16_t subdevice,
@@ -413,7 +522,7 @@ VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
vdev->status = 0;
vdev->isr = 0;
vdev->queue_sel = 0;
- memset(vdev->vq, 0, sizeof(vdev->vq));
+ vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
config = pci_dev->config;
config[0x00] = vendor & 0xFF;
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index dee97ba..8291bbd 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -30,66 +30,9 @@
/* We've given up on this device. */
#define VIRTIO_CONFIG_S_FAILED 0x80
-/* from Linux's linux/virtio_ring.h */
-
-/* This marks a buffer as continuing via the next field. */
-#define VRING_DESC_F_NEXT 1
-/* This marks a buffer as write-only (otherwise read-only). */
-#define VRING_DESC_F_WRITE 2
-
-/* This means don't notify other side when buffer added. */
-#define VRING_USED_F_NO_NOTIFY 1
-/* This means don't interrupt guest when buffer consumed. */
-#define VRING_AVAIL_F_NO_INTERRUPT 1
-
typedef struct VirtQueue VirtQueue;
typedef struct VirtIODevice VirtIODevice;
-typedef struct VRingDesc
-{
- uint64_t addr;
- uint32_t len;
- uint16_t flags;
- uint16_t next;
-} VRingDesc;
-
-typedef struct VRingAvail
-{
- uint16_t flags;
- uint16_t idx;
- uint16_t ring[0];
-} VRingAvail;
-
-typedef struct VRingUsedElem
-{
- uint32_t id;
- uint32_t len;
-} VRingUsedElem;
-
-typedef struct VRingUsed
-{
- uint16_t flags;
- uint16_t idx;
- VRingUsedElem ring[0];
-} VRingUsed;
-
-typedef struct VRing
-{
- unsigned int num;
- VRingDesc *desc;
- VRingAvail *avail;
- VRingUsed *used;
-} VRing;
-
-struct VirtQueue
-{
- VRing vring;
- uint32_t pfn;
- uint16_t last_avail_idx;
- void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
- int index;
-};
-
#define VIRTQUEUE_MAX_SIZE 1024
typedef struct VirtQueueElement
@@ -101,8 +44,6 @@ typedef struct VirtQueueElement
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElement;
-#define VIRTIO_PCI_QUEUE_MAX 16
-
struct VirtIODevice
{
PCIDevice pci_dev;
@@ -119,7 +60,7 @@ struct VirtIODevice
uint32_t (*get_features)(VirtIODevice *vdev);
void (*set_features)(VirtIODevice *vdev, uint32_t val);
void (*update_config)(VirtIODevice *vdev, uint8_t *config);
- VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
+ VirtQueue *vq;
};
VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
@@ -140,4 +81,10 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_ring_set_used_notify(VirtQueue *vq, int enable);
+
+size_t virtio_ring_avail_size(VirtQueue *vq);
+
+int virtio_ring_inited(VirtQueue *vq);
+
#endif
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel