This is the host-userspace backend for kvm's virtio.
It is based on lguest code. It implements a registration
hypercall callback for registering the shared memory descriptors.
It also implements the input and notify handlers for the dma
calls.
Currently qemu doesn't have readv/writev handlers so the tap fd
is used directly. It will be generalized in the future.
Signed-off-by: Dor Laor <[EMAIL PROTECTED]>
---
qemu/virtio.c | 332
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
qemu/virtio.h | 88 +++++++++++++++
2 files changed, 420 insertions(+), 0 deletions(-)
create mode 100644 qemu/virtio.c
create mode 100644 qemu/virtio.h
diff --git a/qemu/virtio.c b/qemu/virtio.c
new file mode 100644
index 0000000..b786385
--- /dev/null
+++ b/qemu/virtio.c
@@ -0,0 +1,332 @@
+/*
+ * More efficient lguest implementation of virtio, using descriptors.
+ *
+ * This allows zero-copy from guest <-> host. It uses a page of
+ * descriptors, a page to say what descriptors to use, and a page to
say
+ * what's been used: one each set for inbufs and one for outbufs.
+ *
+ * Copyright 2007 Dor Laor <[EMAIL PROTECTED]> Qumranet
+ * Copyright 2007 Rusty Russell <[EMAIL PROTECTED]> IBM Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+
+#include "vl.h"
+#include "virtio.h"
+
+#define descs_per_page() (getpagesize() / sizeof(struct virtio_desc))
+
+static int verbose = 0;
+#define verbose(args...) \
+ do { if (verbose) printf(args); } while(0)
+
+struct vio_device_list devices;
+
+ /* This simply sets up an iovec array where we can put data to be
discarded.
+ * This happens when the Guest doesn't want or can't handle the input:
we have
+ * to get rid of it somewhere, and if we bury it in the ceiling space
it will
+ * start to smell after a week. */
+static void discard_iovec(struct iovec *iov, unsigned int *num)
+{
+ static char discard_buf[1024];
+ *num = 1;
+ iov->iov_base = discard_buf;
+ iov->iov_len = sizeof(discard_buf);
+}
+
+ /* This is the generic routine we call when the Guest sends some DMA
out. */
+void handle_notify(unsigned int key, unsigned int iotype)
+{
+ struct virtio_device *i;
+
+ for (i = devices.dev; i; i = i->next) {
+ if (key == i->id) {
+ if (iotype & VIRTIO_DEVICE_OUTPUT &&
i->handle_output)
+ i->handle_output(i);
+ //if (iotype & VIRTIO_DEVICE_INPUT &&
i->handle_input)
+ // i->handle_input(i);
+ return;
+ }
+ }
+ verbose("Pending dma key %x", key);
+}
+
+
+struct virtqueue_info
+{
+ /* Their page of descriptors. */
+ struct virtio_desc *desc;
+ /* How they tell us what buffers are available. */
+ unsigned int *avail_idx;
+ unsigned int *available;
+ /* How we tell them what we've used. */
+ unsigned int *used_idx;
+ struct virtio_used *used;
+
+ /* Last available index we saw. */
+ unsigned int last_avail_idx;
+};
+
+static void setup_virtqueue_info(struct virtqueue_info *vqi, void *mem)
+{
+ /* Descriptor page, available page, other side's used page */
+ vqi->desc = mem;
+ vqi->avail_idx = mem + getpagesize();
+ vqi->available = (void *)(vqi->avail_idx + 1);
+ vqi->used_idx = mem + getpagesize()*2;
+ vqi->used = (void *)(vqi->used_idx + 1);
+ vqi->last_avail_idx = 0;
+}
+
+struct virtnet_info
+{
+ struct virtqueue_info in, out;
+};
+
+/* Descriptors consist of output then input descs. */
+static void gather_desc(struct virtio_device *vdev,
+ struct virtio_desc *desc,
+ unsigned int i,
+ struct iovec iov[],
+ unsigned int *out_num, unsigned int *in_num)
+{
+ *out_num = *in_num = 0;
+
+ for (;;) {
+ iov[*out_num + *in_num].iov_len = desc[i].len;
+ iov[*out_num + *in_num].iov_base
+ = (void*)(vdev->memstart + desc[i].pfn *
getpagesize() + desc[i].offset);
+ if (desc[i].flags & VIRTIO_DESC_F_WRITE)
+ (*in_num)++;
+ else {
+ if (*in_num)
+ fprintf(stderr, "Descriptor has out
after in");
+ (*out_num)++;
+ }
+ if (!(desc[i].flags & VIRTIO_DESC_F_NEXT))
+ break;
+ if (*out_num + *in_num == descs_per_page())
+ fprintf(stderr, "Looped descriptor");
+ i = desc[i].next;
+ if (i >= descs_per_page())
+ fprintf(stderr, "Desc next is %u", i);
+ if (desc[i].flags & VIRTIO_DESC_F_HEAD)
+ fprintf(stderr, "Descriptor has middle head at
%i", i);
+ }
+}
+
+/* We've used a buffer, tell them about it. */
+static void add_used(struct virtqueue_info *vqi, unsigned int id, int
len)
+{
+ struct virtio_used *used;
+
+ used = &vqi->used[(*vqi->used_idx)++ % descs_per_page()];
+ verbose("%s:used_idx = %d\n", __FUNCTION__, *vqi->used_idx);
+ used->id = id;
+ used->len = len;
+}
+
+/* See if they have a buffer for us. */
+static unsigned int get_available(struct virtqueue_info *vqi)
+{
+ unsigned int num;
+
+ if (*vqi->avail_idx - vqi->last_avail_idx > descs_per_page())
+ fprintf(stderr, "Guest moved used index from %u to %u",
+ vqi->last_avail_idx, *vqi->avail_idx);
+
+ if (*vqi->avail_idx == vqi->last_avail_idx)
+ return descs_per_page();
+
+ num = vqi->available[vqi->last_avail_idx % descs_per_page()];
+ if (num >= descs_per_page())
+ fprintf(stderr, "Guest says index %u is available",
num);
+ return num;
+}
+
+static void advance_available(struct virtqueue_info *vqi)
+{
+ vqi->last_avail_idx++;
+}
+
+static void handle_virtnet_input(struct virtio_device *dev)
+{
+ int len;
+ unsigned out_num, in_num, desc;
+ struct virtnet_info *vni = dev->virtio_priv;
+ struct iovec iov[descs_per_page()];
+ fd_set rfds;
+ struct timeval tv;
+ int credit = descs_per_page();
+
+ FD_ZERO(&rfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+
+ /*Check if not yer registered */
+ if (!dev->register_done)
+ return;
+
+ for (;credit;credit--) {
+
+ FD_SET(dev->tap_fd, &rfds);
+
+ /* Find any input descriptor head. */
+ desc = get_available(&vni->in);
+ if (desc == descs_per_page()) {
+ if (dev->desc->status &
VIRTIO_DEVICE_S_DRIVER_OK)
+ printf("network: no dma buffer!");
+ // Maybe dont discard but leave it until we have
something?
+ discard_iovec(iov, &in_num);
+ } else {
+ gather_desc(dev, vni->in.desc, desc, iov,
&out_num, &in_num);
+ if (out_num != 0)
+ fprintf(stderr, "network: output in
receive queue?");
+ verbose("%s: gathered %d desc\n", __FUNCTION__,
in_num);
+ }
+
+ if (select(dev->tap_fd + 1, &rfds, NULL, NULL, &tv) <=
0) {
+ //verbose("virt input, select return <=0\n");
+ break;;
+ }
+ if (!FD_ISSET(dev->tap_fd, &rfds)) {
+ verbose("virt input, select didnt set tap
fd\n");
+ break;;
+ }
+
+ len = readv(dev->tap_fd, iov, in_num);
+ if (len <= 0) {
+ fprintf(stderr, "reading network error %d",
len);
+ break;;
+ }
+
+ verbose("virt input packet len %i addr %p [%02x %02x]
(%s)\n", len, iov[0].iov_base,
+ ((uint32_t *)iov[0].iov_base)[0], ((uint32_t
*)iov[0].iov_base)[1],
+ desc == descs_per_page()? "discarded" : "sent");
+
+ if (desc != descs_per_page()) {
+ advance_available(&vni->in);
+ add_used(&vni->in, desc, len);
+ } else
+ break;
+ }
+
+ if (credit != descs_per_page()) {
+ verbose("%s:triggering irq\n", __FUNCTION__);
+ dev->trigger_irq(dev->opaque);
+ }
+
+ return;
+}
+
+static void handle_virtnet_output(struct virtio_device *dev)
+{
+ unsigned desc, out_num, in_num;
+ int len;
+ struct virtnet_info *vni = dev->virtio_priv;
+ struct iovec iov[descs_per_page()];
+
+ if (!dev->register_done)
+ return;
+
+ /* Send all output descriptors. */
+ while ((desc = get_available(&vni->out)) < descs_per_page()) {
+ advance_available(&vni->out);
+ gather_desc(dev, vni->out.desc, desc, iov, &out_num,
&in_num);
+ if (in_num != 0)
+ fprintf(stderr, "network: recv descs in output
queue?");
+ verbose("%s:gathered %d out dec\n", __FUNCTION__,
out_num);
+ len = writev(dev->tap_fd, iov, out_num);
+ add_used(&vni->out, desc, 0);
+ }
+ dev->trigger_irq(dev->opaque);
+}
+
+static struct virtio_device *new_device(struct vio_device_list
*devices, uint16_t type,
+ uint16_t num_pages, uint16_t features)
+{
+ struct virtio_device *dev = malloc(sizeof(*dev));
+
+ *(devices->lastdev) = dev;
+ dev->next = NULL;
+ devices->lastdev = &dev->next;
+
+ dev->desc = malloc(sizeof(*dev->desc));
+ dev->desc->type = type;
+ dev->desc->features = features;
+ dev->desc->num_pages = num_pages;
+
+ return dev;
+}
+
+void virtio_register_mem(unsigned long memstart, unsigned int key,
unsigned long out_addr, unsigned long in_addr)
+{
+ struct virtio_device *i;
+
+ for (i = devices.dev; i; i = i->next) {
+ if (key == i->id) {
+ struct virtnet_info *vni = i->virtio_priv;
+ setup_virtqueue_info(&vni->in, (void*)in_addr);
+ setup_virtqueue_info(&vni->out,
(void*)out_addr);
+ i->register_done = 1;
+ i->memstart = memstart;
+ return;
+ }
+ }
+ fprintf(stderr, "%s: no device found for key %x\n",
__FUNCTION__, key);
+}
+
+struct virtio_device* setup_virtnet(void *opaque,
+ unsigned int key,
+ int tap_fd,
+ void (*trigger_irq)(void *opaque))
+{
+ struct virtio_device *dev;
+ struct virtnet_info *vni;
+ unsigned char mac[6];
+
+ dev = new_device(&devices, 6, VIRTIO_DEVICE_T_NET,
+ VIRTIO_DEVICE_F_RANDOMNESS);
+
+ dev->handle_output = handle_virtnet_output;
+ dev->handle_input = handle_virtnet_input;
+ dev->trigger_irq = trigger_irq;
+ dev->tap_fd = tap_fd;
+ dev->id = key;
+
+ dev->virtio_priv = vni = malloc(sizeof(*vni));
+ dev->opaque = opaque;
+ dev->register_done = 0;
+
+ return dev;
+}
+
+void virtio_init(void)
+{
+ devices.dev = NULL;
+ devices.lastdev = &devices.dev;
+}
+
diff --git a/qemu/virtio.h b/qemu/virtio.h
new file mode 100644
index 0000000..64306fd
--- /dev/null
+++ b/qemu/virtio.h
@@ -0,0 +1,88 @@
+/*
+ * More efficient lguest implementation of virtio, using descriptors.
+ *
+ * This allows zero-copy from guest <-> host. It uses a page of
+ * descriptors, a page to say what descriptors to use, and a page to
say
+ * what's been used: one each set for inbufs and one for outbufs.
+ *
+ * Copyright 2007 Dor Laor <[EMAIL PROTECTED]> Qumranet
+ * Copyright 2007 Rusty Russell <[EMAIL PROTECTED]> IBM Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+*/
+
+#ifndef __VIRTIO_H_
+#define __VIRTIO_H_
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <asm/types.h>
+#include <asm/page.h>
+#include <linux/virtio_be.h>
+
+/* The device structure describes a single device. */
+struct virtio_device
+{
+ unsigned int id;
+ /* The linked-list pointer. */
+ struct virtio_device *next;
+ /* The descriptor for this device, as mapped into the Guest. */
+ struct virtio_device_desc *desc;
+
+ void (*trigger_irq)(void *opaque);
+
+ void (*handle_input)(struct virtio_device *vdev);
+ void (*handle_output)(struct virtio_device *vdev);
+
+ /* Device-specific data. */
+ void *virtio_priv;
+
+ /* Qemu private data */
+ void *opaque;
+
+ /* The outside world fd*/
+ int tap_fd;
+
+ #define VIRTIO_DEVICE_INPUT 1
+ #define VIRTIO_DEVICE_OUTPUT 2
+ unsigned int io_type;
+
+ unsigned int register_done;
+
+ unsigned long memstart;
+};
+
+struct vio_device_list
+{
+ /* A single linked list of devices. */
+ struct virtio_device *dev;
+ /* ... And an end pointer so we can easily append new devices */
+ struct virtio_device **lastdev;
+};
+
+void virtio_register_mem(unsigned long memstart, unsigned int key,
unsigned long out_addr, unsigned long in_addr);
+void handle_notify(unsigned int key, unsigned int iotype);
+struct virtio_device* setup_virtnet(void *opaque,
+ unsigned int key,
+ int tap_fd,
+ void (*trigger_irq)(void *opaque));
+void virtio_init(void);
+
+#endif
-----
In simplicity there is elegance.
Dor Laor ;)
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel