This is the host-userspace backend for kvm's virtio.
It is based on lguest code. It implements a registration
hypercall callback for registering the shared memory descriptors.
It also implements the input and notify handlers for the dma
calls.

Currently qemu doesn't have readv/writev handlers so the tap fd
is used directly. It will be generalized in the future.

Signed-off-by: Dor Laor <[EMAIL PROTECTED]>
---
 qemu/virtio.c |  332
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu/virtio.h |   88 +++++++++++++++
 2 files changed, 420 insertions(+), 0 deletions(-)
 create mode 100644 qemu/virtio.c
 create mode 100644 qemu/virtio.h

diff --git a/qemu/virtio.c b/qemu/virtio.c
new file mode 100644
index 0000000..b786385
--- /dev/null
+++ b/qemu/virtio.c
@@ -0,0 +1,332 @@
+/*
+ * More efficient lguest implementation of virtio, using descriptors.
+ * 
+ * This allows zero-copy from guest <-> host.  It uses a page of
+ * descriptors, a page to say what descriptors to use, and a page to
say
+ * what's been used: one each set for inbufs and one for outbufs.
+ *
+ * Copyright 2007 Dor Laor <[EMAIL PROTECTED]> Qumranet
+ * Copyright 2007 Rusty Russell <[EMAIL PROTECTED]> IBM Corporation
+ * 
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+
+#include "vl.h"
+#include "virtio.h"
+
+#define descs_per_page() (getpagesize() / sizeof(struct virtio_desc))
+
+static int verbose = 0;
+#define verbose(args...) \
+       do { if (verbose) printf(args); } while(0)
+
+struct vio_device_list devices;
+
+ /* This simply sets up an iovec array where we can put data to be
discarded.
+  * This happens when the Guest doesn't want or can't handle the input:
we have
+  * to get rid of it somewhere, and if we bury it in the ceiling space
it will
+  * start to smell after a week. */
+static void discard_iovec(struct iovec *iov, unsigned int *num)
+{
+       static char discard_buf[1024];
+       *num = 1;
+       iov->iov_base = discard_buf;
+       iov->iov_len = sizeof(discard_buf);
+}
+
+ /* This is the generic routine we call when the Guest sends some DMA
out. */
+void handle_notify(unsigned int key,  unsigned int iotype)
+{
+       struct virtio_device *i;
+
+       for (i = devices.dev; i; i = i->next) {
+               if (key == i->id) {
+                       if (iotype & VIRTIO_DEVICE_OUTPUT &&
i->handle_output)
+                               i->handle_output(i);
+                       //if (iotype & VIRTIO_DEVICE_INPUT &&
i->handle_input)
+                       //      i->handle_input(i);
+                       return;
+               }
+       }
+       verbose("Pending dma key %x", key);
+}
+
+
+struct virtqueue_info
+{
+       /* Their page of descriptors. */
+       struct virtio_desc *desc;
+       /* How they tell us what buffers are available. */
+       unsigned int *avail_idx;
+       unsigned int *available;
+       /* How we tell them what we've used. */
+       unsigned int *used_idx;
+       struct virtio_used *used;
+
+       /* Last available index we saw. */
+       unsigned int last_avail_idx;
+};
+
+static void setup_virtqueue_info(struct virtqueue_info *vqi, void *mem)
+{
+       /* Descriptor page, available page, other side's used page */
+       vqi->desc = mem;
+       vqi->avail_idx = mem + getpagesize();
+       vqi->available = (void *)(vqi->avail_idx + 1);
+       vqi->used_idx = mem + getpagesize()*2;
+       vqi->used = (void *)(vqi->used_idx + 1);
+       vqi->last_avail_idx = 0;
+}
+
+struct virtnet_info
+{
+       struct virtqueue_info in, out;
+};
+
+/* Descriptors consist of output then input descs. */
+static void gather_desc(struct virtio_device *vdev,
+                       struct virtio_desc *desc,
+                       unsigned int i,
+                       struct iovec iov[],
+                       unsigned int *out_num, unsigned int *in_num)
+{
+       *out_num = *in_num = 0;
+
+       for (;;) {
+               iov[*out_num + *in_num].iov_len = desc[i].len;
+               iov[*out_num + *in_num].iov_base
+                       = (void*)(vdev->memstart + desc[i].pfn *
getpagesize() + desc[i].offset);
+               if (desc[i].flags & VIRTIO_DESC_F_WRITE)
+                       (*in_num)++;
+               else {
+                       if (*in_num)
+                               fprintf(stderr, "Descriptor has out
after in");
+                       (*out_num)++;
+               }
+               if (!(desc[i].flags & VIRTIO_DESC_F_NEXT))
+                       break;
+               if (*out_num + *in_num == descs_per_page())
+                       fprintf(stderr, "Looped descriptor");
+               i = desc[i].next;
+               if (i >= descs_per_page())
+                       fprintf(stderr, "Desc next is %u", i);
+               if (desc[i].flags & VIRTIO_DESC_F_HEAD)
+                       fprintf(stderr, "Descriptor has middle head at
%i", i);
+       }
+}
+
+/* We've used a buffer, tell them about it. */
+static void add_used(struct virtqueue_info *vqi, unsigned int id, int
len)
+{
+       struct virtio_used *used;
+
+       used = &vqi->used[(*vqi->used_idx)++ % descs_per_page()];
+       verbose("%s:used_idx = %d\n", __FUNCTION__, *vqi->used_idx);
+       used->id = id;
+       used->len = len;
+}
+
+/* See if they have a buffer for us. */
+static unsigned int get_available(struct virtqueue_info *vqi)
+{
+       unsigned int num;
+
+       if (*vqi->avail_idx - vqi->last_avail_idx > descs_per_page())
+               fprintf(stderr, "Guest moved used index from %u to %u",
+                    vqi->last_avail_idx, *vqi->avail_idx);
+
+       if (*vqi->avail_idx == vqi->last_avail_idx)
+               return descs_per_page();
+
+       num = vqi->available[vqi->last_avail_idx % descs_per_page()];
+       if (num >= descs_per_page())
+               fprintf(stderr, "Guest says index %u is available",
num);
+       return num;
+}
+
+static void advance_available(struct virtqueue_info *vqi)
+{
+       vqi->last_avail_idx++;
+}
+
+static void handle_virtnet_input(struct virtio_device *dev)
+{
+       int len;
+       unsigned out_num, in_num, desc;
+       struct virtnet_info *vni = dev->virtio_priv;
+       struct iovec iov[descs_per_page()];
+       fd_set rfds;
+       struct timeval tv;
+       int credit = descs_per_page();
+
+       FD_ZERO(&rfds);
+       tv.tv_sec = 0;
+       tv.tv_usec = 0;
+
+       /*Check if not yer registered */
+       if (!dev->register_done)
+               return;
+
+       for (;credit;credit--) {
+
+               FD_SET(dev->tap_fd, &rfds);
+       
+               /* Find any input descriptor head. */
+               desc = get_available(&vni->in);
+               if (desc == descs_per_page()) {
+                       if (dev->desc->status &
VIRTIO_DEVICE_S_DRIVER_OK)
+                               printf("network: no dma buffer!");
+                       // Maybe dont discard but leave it until we have
something?
+                       discard_iovec(iov, &in_num);
+               } else {
+                       gather_desc(dev, vni->in.desc, desc, iov,
&out_num, &in_num);
+                       if (out_num != 0)
+                               fprintf(stderr, "network: output in
receive queue?");
+                       verbose("%s: gathered %d desc\n", __FUNCTION__,
in_num);
+               }
+       
+               if (select(dev->tap_fd + 1, &rfds, NULL, NULL, &tv) <=
0) {
+                       //verbose("virt input, select return <=0\n");
+                       break;;
+               }
+               if (!FD_ISSET(dev->tap_fd, &rfds)) {
+                       verbose("virt input, select didnt set tap
fd\n");
+                       break;;
+               }
+       
+               len = readv(dev->tap_fd, iov, in_num);
+               if (len <= 0) {
+                       fprintf(stderr, "reading network error %d",
len);
+                       break;;
+               }
+       
+               verbose("virt input packet len %i addr %p [%02x %02x]
(%s)\n", len, iov[0].iov_base,
+                       ((uint32_t *)iov[0].iov_base)[0], ((uint32_t
*)iov[0].iov_base)[1],
+                       desc == descs_per_page()? "discarded" : "sent");
+
+               if (desc != descs_per_page()) {
+                       advance_available(&vni->in);
+                       add_used(&vni->in, desc, len);
+               } else 
+                       break;
+       }
+
+       if (credit != descs_per_page()) {
+                       verbose("%s:triggering irq\n", __FUNCTION__);
+                       dev->trigger_irq(dev->opaque);
+       }
+
+       return;
+}
+
+static void handle_virtnet_output(struct virtio_device *dev)
+{
+       unsigned desc, out_num, in_num;
+       int len;
+       struct virtnet_info *vni = dev->virtio_priv;
+       struct iovec iov[descs_per_page()];
+
+       if (!dev->register_done)
+               return;
+
+       /* Send all output descriptors. */
+       while ((desc = get_available(&vni->out)) < descs_per_page()) {
+               advance_available(&vni->out);
+               gather_desc(dev, vni->out.desc, desc, iov, &out_num,
&in_num);
+               if (in_num != 0)
+                       fprintf(stderr, "network: recv descs in output
queue?");
+               verbose("%s:gathered %d out dec\n", __FUNCTION__,
out_num);
+               len = writev(dev->tap_fd, iov, out_num);
+               add_used(&vni->out, desc, 0);
+       }
+       dev->trigger_irq(dev->opaque);
+}
+
+static struct virtio_device *new_device(struct vio_device_list
*devices, uint16_t type,
+               uint16_t num_pages, uint16_t features)
+{
+       struct virtio_device *dev = malloc(sizeof(*dev));
+
+       *(devices->lastdev) = dev;
+       dev->next = NULL;
+       devices->lastdev = &dev->next;
+
+       dev->desc = malloc(sizeof(*dev->desc));
+       dev->desc->type = type;
+       dev->desc->features = features;
+       dev->desc->num_pages = num_pages;
+
+       return dev;
+}
+
+void virtio_register_mem(unsigned long memstart, unsigned int key,
unsigned long out_addr, unsigned long in_addr)
+{
+         struct virtio_device *i;
+ 
+       for (i = devices.dev; i; i = i->next) {
+               if (key == i->id) {
+                       struct virtnet_info *vni = i->virtio_priv;
+                       setup_virtqueue_info(&vni->in, (void*)in_addr);
+                       setup_virtqueue_info(&vni->out,
(void*)out_addr);
+                       i->register_done = 1;
+                       i->memstart = memstart;
+                       return;
+               }
+       }
+       fprintf(stderr, "%s: no device found for key %x\n",
__FUNCTION__, key);
+}
+
+struct virtio_device* setup_virtnet(void *opaque,
+       unsigned int key,
+       int tap_fd,
+       void (*trigger_irq)(void *opaque))
+{
+       struct virtio_device *dev;
+       struct virtnet_info *vni;
+       unsigned char mac[6];
+
+       dev = new_device(&devices, 6, VIRTIO_DEVICE_T_NET,
+                        VIRTIO_DEVICE_F_RANDOMNESS);
+
+       dev->handle_output = handle_virtnet_output;
+       dev->handle_input = handle_virtnet_input;
+       dev->trigger_irq = trigger_irq;
+       dev->tap_fd = tap_fd;
+       dev->id = key;
+
+       dev->virtio_priv = vni = malloc(sizeof(*vni));
+       dev->opaque = opaque;
+       dev->register_done = 0;
+
+       return dev;
+}
+
+void virtio_init(void)
+{
+       devices.dev = NULL;
+       devices.lastdev = &devices.dev;
+}
+
diff --git a/qemu/virtio.h b/qemu/virtio.h
new file mode 100644
index 0000000..64306fd
--- /dev/null
+++ b/qemu/virtio.h
@@ -0,0 +1,88 @@
+/*
+ * More efficient lguest implementation of virtio, using descriptors.
+ * 
+ * This allows zero-copy from guest <-> host.  It uses a page of
+ * descriptors, a page to say what descriptors to use, and a page to
say
+ * what's been used: one each set for inbufs and one for outbufs.
+ *
+ * Copyright 2007 Dor Laor <[EMAIL PROTECTED]> Qumranet
+ * Copyright 2007 Rusty Russell <[EMAIL PROTECTED]> IBM Corporation
+ * 
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+*/
+
+#ifndef __VIRTIO_H_
+#define __VIRTIO_H_
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <asm/types.h>
+#include <asm/page.h>
+#include <linux/virtio_be.h>
+
+/* The device structure describes a single device. */
+struct virtio_device
+{
+       unsigned int id;
+       /* The linked-list pointer. */
+       struct virtio_device *next;
+       /* The descriptor for this device, as mapped into the Guest. */
+       struct virtio_device_desc *desc;
+
+       void (*trigger_irq)(void *opaque);
+
+       void (*handle_input)(struct virtio_device *vdev);
+       void (*handle_output)(struct virtio_device *vdev);
+
+       /* Device-specific data. */
+       void *virtio_priv;
+
+       /* Qemu private data */
+       void *opaque;
+       
+        /* The outside world fd*/
+       int tap_fd;
+
+       #define VIRTIO_DEVICE_INPUT 1
+       #define VIRTIO_DEVICE_OUTPUT 2
+       unsigned int io_type;
+
+       unsigned int register_done;
+
+       unsigned long memstart;
+};
+
+struct vio_device_list
+{
+       /* A single linked list of devices. */
+       struct virtio_device *dev;
+       /* ... And an end pointer so we can easily append new devices */
+       struct virtio_device **lastdev;
+};
+
+void virtio_register_mem(unsigned long memstart, unsigned int key,
unsigned long out_addr, unsigned long in_addr);
+void handle_notify(unsigned int key,  unsigned int iotype);
+struct virtio_device* setup_virtnet(void *opaque,
+       unsigned int key,
+       int tap_fd,
+       void (*trigger_irq)(void *opaque));
+void virtio_init(void);
+
+#endif

-----
In simplicity there is elegance.
Dor Laor ;)


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to