Following patch introduces a KVM guest balloon driver. Communication
to/from the host is performed via virtio.

Next patch implements the QEMU driver and handling.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: linux-2.6-nv/drivers/virtio/Kconfig
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/Kconfig
+++ linux-2.6-nv/drivers/virtio/Kconfig
@@ -23,3 +23,12 @@ config VIRTIO_PCI
 
          If unsure, say M.
 
+config KVM_BALLOON
+       tristate "KVM balloon driver (EXPERIMENTAL)"
+       depends on VIRTIO_PCI
+       ---help---
+        This driver provides support for ballooning memory in/out of a
+        KVM paravirt guest.    
+
+        If unsure, say M.
+
Index: linux-2.6-nv/drivers/virtio/Makefile
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/Makefile
+++ linux-2.6-nv/drivers/virtio/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
+obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o
Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c
===================================================================
--- /dev/null
+++ linux-2.6-nv/drivers/virtio/kvm_balloon.c
@@ -0,0 +1,577 @@
+/*
+ * KVM guest balloon driver
+ *
+ * Copyright (C) 2007, Qumranet, Inc., Dor Laor <[EMAIL PROTECTED]>
+ * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <[EMAIL PROTECTED]>
+ * 
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_pci.h>
+#include <linux/preempt.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+MODULE_AUTHOR ("Dor Laor");
+MODULE_DESCRIPTION ("Implements guest ballooning support");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
+
+#define CMD_BALLOON_INFLATE 0x1
+#define CMD_BALLOON_DEFLATE 0x2
+
+static int kvm_balloon_debug;
+
+#define DEBUG
+#ifdef DEBUG
+#define dprintk(str...) if (kvm_balloon_debug) printk(str)
+#else
+#define dprintk(str...)
+#endif
+
+static LIST_HEAD(balloon_plist);
+static LIST_HEAD(balloon_work);
+static int balloon_size = 0;
+static DEFINE_SPINLOCK(balloon_plist_lock);
+static DEFINE_SPINLOCK(balloon_queue_lock);
+
+struct virtio_balloon_hdr {
+       uint8_t cmd;
+       uint8_t status;
+};
+
+#define BALLOON_DATA_SIZE 200
+
+struct balloon_buf {
+       struct virtio_balloon_hdr hdr;
+       u8 data[BALLOON_DATA_SIZE];
+};
+
+struct balloon_work {
+       struct balloon_buf *buf;
+       struct list_head list;
+};
+
+#define VIRTIO_MAX_SG 2
+
+struct virtballoon {
+       struct virtio_device *dev;
+       struct virtqueue *vq;
+       struct task_struct *balloon_thread;
+       wait_queue_head_t balloon_wait;
+       wait_queue_head_t rmmod_wait;
+       uint32_t target_nrpages;
+       atomic_t inflight_bufs;
+};
+
+struct balloon_page {
+       struct page *bpage;
+       struct list_head bp_list;
+};
+
+struct virtballoon virtballoon;
+
+struct balloon_buf *alloc_balloon_buf(void)
+{
+       struct balloon_buf *buf;
+
+       buf = kzalloc(sizeof(struct balloon_buf), GFP_KERNEL);
+       if (!buf)
+               printk(KERN_ERR "%s: allocation failed\n", __func__);
+
+       return buf;
+}
+
+static int send_balloon_buf(uint8_t cmd, struct balloon_buf *buf)
+{
+       struct scatterlist sg[VIRTIO_MAX_SG];
+       struct virtqueue *vq = virtballoon.vq;
+       int err = 0;
+
+       buf->hdr.cmd = cmd;
+
+       sg_init_table(sg, VIRTIO_MAX_SG);
+       sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr));
+       sg_set_buf(&sg[1], &buf->data, sizeof(buf->data));
+
+       spin_lock_irq(&balloon_queue_lock);
+       err = vq->vq_ops->add_buf(vq, sg, 0, 2, buf);
+       if (err) {
+               printk("%s: add_buf err\n", __func__);
+               goto out;
+       }
+       atomic_inc(&virtballoon.inflight_bufs);
+
+       /* TODO: kick several balloon buffers at once */
+       vq->vq_ops->kick(vq);
+out:
+       spin_unlock_irq(&balloon_queue_lock);
+       return err;
+}
+
+static int kvm_balloon_inflate(int32_t npages)
+{
+       LIST_HEAD(tmp_list);
+       struct balloon_page *node, *tmp;
+       struct balloon_buf *buf;
+       u32 *pfn;
+       int allocated = 0;
+       int i, r = -ENOMEM;
+
+       buf = alloc_balloon_buf();
+       if (!buf)
+               return r;
+
+       pfn = (u32 *)&buf->data;
+       *pfn++ = (u32)npages;
+
+       for (i = 0; i < npages; i++) {
+               node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL);
+               if (!node)
+                       goto out_free;
+
+               node->bpage = alloc_page(GFP_HIGHUSER | __GFP_NORETRY);
+               if (!node->bpage) {
+                       kfree(node);
+                       goto out_free;
+               }
+
+               list_add(&node->bp_list, &tmp_list);
+               allocated++;
+               *pfn = page_to_pfn(node->bpage);
+               pfn++;
+       }
+
+       r = send_balloon_buf(CMD_BALLOON_INFLATE, buf);
+       if (r)
+               goto out_free;
+
+       spin_lock(&balloon_plist_lock);
+       list_splice(&tmp_list, &balloon_plist);
+       balloon_size += allocated;
+       totalram_pages -= allocated;
+       dprintk("%s: current balloon size=%d\n", __FUNCTION__,
+              balloon_size);
+       spin_unlock(&balloon_plist_lock);
+       return allocated;
+
+out_free:
+       list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) {
+               __free_page(node->bpage);
+               list_del(&node->bp_list);
+               kfree(node);
+       }
+       return r;
+}
+
+static int kvm_balloon_deflate(int32_t npages)
+{
+       LIST_HEAD(tmp_list);
+       struct balloon_page *node, *tmp;
+       struct balloon_buf *buf;
+       u32 *pfn;
+       int deallocated = 0;
+       int r = 0;
+
+       buf = alloc_balloon_buf();
+       if (!buf)
+               return r;
+
+       spin_lock(&balloon_plist_lock);
+
+       if (balloon_size < npages) {
+               printk("%s: balloon=%d while deflate rq=%d\n",
+                      __FUNCTION__, balloon_size, npages);
+               npages = balloon_size;
+               if (!npages)
+                       goto out;
+       }
+
+       pfn = (u32 *)&buf->data;
+       *pfn++ = (u32)-npages;
+
+       /*
+        * Move the balloon pages to tmp list before issuing 
+        * the virtio buffer
+        */
+       list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) {
+               *pfn++ = page_to_pfn(node->bpage);
+               list_move(&node->bp_list, &tmp_list);
+               if (++deallocated == npages)
+                       break;
+       }
+
+       r = send_balloon_buf(CMD_BALLOON_DEFLATE, buf);
+       if (r)
+               goto out;
+
+       list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) {
+               list_del(&node->bp_list);
+               kfree(node);
+       }
+       balloon_size -= npages;
+       totalram_pages += npages;
+       dprintk("%s: current balloon size=%d\n", __FUNCTION__,
+              balloon_size);
+
+       spin_unlock(&balloon_plist_lock);
+       return deallocated;
+
+out:
+       list_splice(&tmp_list, &balloon_plist);
+       spin_unlock(&balloon_plist_lock);
+       return r;
+}
+
+#define MAX_BALLOON_PAGES_PER_OP (BALLOON_DATA_SIZE/sizeof(u32)) \
+                                - sizeof(int32_t)
+#define MAX_BALLOON_XFLATE_OP 1000000
+
+static int kvm_balloon_xflate(int32_t npages)
+{
+       int r = -EINVAL, i;
+       int iterations;
+       int abspages;
+       int curr_pages = 0;
+       int gfns_per_buf;
+
+       abspages = abs(npages);
+
+       if (abspages > MAX_BALLOON_XFLATE_OP) {
+               printk("%s: bad npages=%d\n", __func__,
+                       npages);
+               return -EINVAL;
+       }
+
+       dprintk("%s: got %s, npages=%d\n", __FUNCTION__,
+              (npages > 0)? "inflate":"deflate", npages);
+
+       gfns_per_buf = MAX_BALLOON_PAGES_PER_OP;
+
+       /*
+        * Call the balloon in PAGE_SIZE*pfns-per-buf
+        * iterations
+        */
+       iterations = DIV_ROUND_UP(abspages, gfns_per_buf);
+       dprintk("%s: iterations=%d\n", __FUNCTION__, iterations);
+
+       for (i = 0; i < iterations; i++) {
+               int32_t pages_in_iteration = 
+                       min(abspages - curr_pages, gfns_per_buf);
+
+               if (npages > 0)
+                       r = kvm_balloon_inflate(pages_in_iteration);
+               else
+                       r = kvm_balloon_deflate(pages_in_iteration);
+                                               
+               if (r < 0)
+                       return r;
+               curr_pages += r;
+               if (r != pages_in_iteration)
+                       break;
+               cond_resched();
+       }
+
+       return curr_pages;
+}
+
+static void inflate_done(struct balloon_buf *buf)
+{
+       uint8_t status = buf->hdr.status;
+
+       /* error inflating, return pages to the system */
+       if (status) {
+               struct balloon_page *node, *tmp;
+
+               spin_lock(&balloon_plist_lock);
+               list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) {
+                       u32 *pfn = (u32 *)&buf->data;
+                       int npages = (int)*pfn++;
+                       int i;
+
+                       for (i=0;i<npages;i++) {        
+                               if (page_to_pfn(node->bpage) == *pfn) {
+                                       list_del(&node->bp_list);
+                                       kfree(node);
+                                       __free_page(pfn_to_page(*pfn));
+                                       balloon_size--;
+                                       totalram_pages++;
+                                       virtballoon.target_nrpages++;
+                                       break;
+                               }
+                               pfn++;
+                       }
+               }
+               spin_unlock(&balloon_plist_lock);
+               virtballoon.dev->config->set(virtballoon.dev, 0, 
+                                          &virtballoon.target_nrpages,
+                                          sizeof(virtballoon.target_nrpages));
+       }
+}
+
+static void deflate_done(struct balloon_buf *buf)
+{
+       uint8_t status = buf->hdr.status;
+
+       /* deflate OK, return pages to the system */
+       if (!status) { 
+               u32 *pfn = (u32 *)&buf->data;
+               int npages, i;
+
+               npages = (int)*pfn++;
+               npages = abs(npages);
+
+               for (i = 0; i<npages; i++) {
+                       __free_page(pfn_to_page(*pfn));
+                       pfn++;
+               }
+       /* deflate error, add pages back to ballooned list */
+       } else {
+               u32 *pfn = (u32 *)&buf->data;
+               int npages, i;
+               struct balloon_page *node;
+
+               npages = (int)*pfn++;
+               npages = abs(npages);
+
+               spin_lock(&balloon_plist_lock);
+               for (i = 0; i < npages; i++) {
+                       node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL);
+                       if (!node) {
+                               spin_unlock(&balloon_plist_lock);
+                               printk(KERN_ERR "%s: allocation failure\n",
+                                       __func__);
+                               goto out;
+                       }
+
+                       node->bpage = pfn_to_page(*pfn++);
+                       list_add(&node->bp_list, &balloon_plist);
+                       balloon_size++;
+                       totalram_pages--;
+                       virtballoon.target_nrpages--;
+               }
+               spin_unlock(&balloon_plist_lock);
+               virtballoon.dev->config->set(virtballoon.dev, 0, 
+                                          &virtballoon.target_nrpages,
+                                          sizeof(virtballoon.target_nrpages));
+       }
+out:
+       return;
+}
+
+static int balloon_thread(void *p)
+{
+       struct virtballoon *v = p;
+       DEFINE_WAIT(wait);
+
+       set_freezable();
+       for (;;) {
+               prepare_to_wait(&v->balloon_wait, &wait, TASK_UNINTERRUPTIBLE);
+               schedule();
+               finish_wait(&v->balloon_wait, &wait);
+
+               try_to_freeze();
+
+               if (kthread_should_stop())
+                       break;
+
+               if (v->target_nrpages != totalram_pages) {
+                       int delta = totalram_pages - v->target_nrpages;
+                       kvm_balloon_xflate(delta);
+               }
+
+               spin_lock_irq(&balloon_queue_lock);
+               while (!list_empty(&balloon_work)) {
+                       struct balloon_work *work;
+                       struct balloon_buf *buf;
+
+                       work = list_entry(balloon_work.next,
+                                        struct balloon_work, list);
+                       list_del(&work->list);
+                       spin_unlock_irq(&balloon_queue_lock);
+                       buf = work->buf;
+                       kfree(work);
+
+                       switch(buf->hdr.cmd) {
+                       case CMD_BALLOON_DEFLATE:
+                               deflate_done(buf);
+                               break;
+                       case CMD_BALLOON_INFLATE:
+                               inflate_done(buf);
+                               break;
+                       default:
+                               printk("%s: unknown cmd 0x%x\n", __func__,
+                                       buf->hdr.cmd);
+                       }
+                       kfree(buf);
+                       if (atomic_dec_and_test(&v->inflight_bufs)) {
+                               if (waitqueue_active(&v->rmmod_wait)) {
+                                       wake_up(&v->rmmod_wait);
+                                       return 0;
+                               }
+                       }
+                       cond_resched();
+                       spin_lock_irq(&balloon_queue_lock);
+               }
+               spin_unlock_irq(&balloon_queue_lock);
+       }
+       return 0;
+}
+
+static bool balloon_tx_done(struct virtqueue *vq)
+{
+       struct balloon_buf *buf;
+       unsigned int len;
+
+       spin_lock(&balloon_queue_lock);
+       while ((buf = vq->vq_ops->get_buf(vq, &len)) != NULL) {
+               struct balloon_work *work;
+
+               work = kzalloc(sizeof(struct balloon_work), GFP_ATOMIC);
+               if (!work)
+                       continue;
+               INIT_LIST_HEAD(&work->list);
+               work->buf = buf;
+
+               list_add(&work->list, &balloon_work);
+       }
+       spin_unlock(&balloon_queue_lock);
+       wake_up(&virtballoon.balloon_wait);
+
+       return true;
+}
+
+static irqreturn_t balloon_irq(int irq, void *opaque)
+{
+       struct virtballoon *vb = opaque;
+       uint32_t target_nrpages;
+       
+       __virtio_config_val(vb->dev, 0, &target_nrpages);
+
+       dprintk("%s: target_nrpages = %d, vb->target_nrpages = %d\n",
+               __func__, target_nrpages, vb->target_nrpages);
+
+       if (target_nrpages != vb->target_nrpages) {
+               vb->target_nrpages = target_nrpages;
+               wake_up(&vb->balloon_wait);
+               return IRQ_HANDLED;
+       }
+
+       return IRQ_NONE;
+}
+
+#define VIRTIO_ID_BALLOON 3
+
+static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID},
+       { 0 },
+};
+
+static int balloon_probe(struct virtio_device *vdev)
+{
+       struct virtio_pci_device *pvdev = to_vp_device(vdev);
+       int err = -EBUSY;
+
+       if (virtballoon.dev) { 
+               printk("kvm_ballon: error, already registered\n");
+               return -EBUSY;
+       }
+
+       virtballoon.vq = vdev->config->find_vq(vdev, 0, balloon_tx_done);
+       if (IS_ERR(virtballoon.vq)) {
+               printk("%s: error finding vq\n", __func__);
+               return -EINVAL;
+       }
+
+       virtballoon.dev = vdev;
+       init_waitqueue_head(&virtballoon.balloon_wait);
+       init_waitqueue_head(&virtballoon.rmmod_wait);
+       atomic_set(&virtballoon.inflight_bufs, 0);
+
+       err = request_irq(pvdev->pci_dev->irq, balloon_irq, IRQF_SHARED,
+                         pvdev->vdev.dev.bus_id, &virtballoon);
+       if (err)
+               goto out_free_vq;
+
+       virtballoon.balloon_thread = kthread_run(balloon_thread,
+                                                &virtballoon,
+                                                "kvm_balloond");
+       printk("kvm_balloon: registered\n");
+
+       return 0;
+
+out_free_vq:
+       vdev->config->del_vq(virtballoon.vq);
+       virtballoon.dev = NULL;
+       return err;
+}
+
+static void balloon_remove(struct virtio_device *vdev)
+{
+       kthread_stop(virtballoon.balloon_thread);
+       vdev->config->del_vq(virtballoon.vq);
+}
+
+static struct virtio_driver virtio_balloon = {
+       .driver.name =  KBUILD_MODNAME,
+       .driver.owner = THIS_MODULE,
+       .id_table =     id_table,
+       .probe =        balloon_probe,
+       .remove =       __devexit_p(balloon_remove),
+};
+
+module_param(kvm_balloon_debug, int, 0);
+
+static int __init kvm_balloon_init(void)
+{
+       virtballoon.dev = NULL;
+
+       return register_virtio_driver(&virtio_balloon);
+}
+
+static void __exit kvm_balloon_exit(void)
+{
+       spin_lock(&balloon_plist_lock);
+       if (balloon_size) {
+               DEFINE_WAIT(wait);
+
+               virtballoon.target_nrpages += balloon_size;
+               spin_unlock(&balloon_plist_lock);
+               virtballoon.dev->config->set(virtballoon.dev, 0, 
+                                          &virtballoon.target_nrpages,
+                                          sizeof(virtballoon.target_nrpages));
+               wake_up(&virtballoon.balloon_wait);
+               prepare_to_wait(&virtballoon.rmmod_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               schedule();
+               finish_wait(&virtballoon.rmmod_wait, &wait);
+               spin_lock(&balloon_plist_lock);
+       }
+
+       if (balloon_size)
+               printk(KERN_ERR "%s: exit while balloon not empty!\n",
+                       __FUNCTION__);
+
+       spin_unlock(&balloon_plist_lock);
+
+       unregister_virtio_driver(&virtio_balloon);
+}
+
+module_init(kvm_balloon_init);
+module_exit(kvm_balloon_exit);
Index: linux-2.6-nv/drivers/virtio/virtio_pci.c
===================================================================
--- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c
+++ linux-2.6-nv/drivers/virtio/virtio_pci.c
@@ -30,20 +30,6 @@ MODULE_DESCRIPTION("virtio-pci");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1");
 
-/* Our device structure */
-struct virtio_pci_device
-{
-       struct virtio_device vdev;
-       struct pci_dev *pci_dev;
-
-       /* the IO mapping for the PCI config space */
-       void *ioaddr;
-
-       /* a list of queues so we can dispatch IRQs */
-       spinlock_t lock;
-       struct list_head virtqueues;
-};
-
 struct virtio_pci_vq_info
 {
        /* the actual virtqueue */
@@ -67,6 +53,7 @@ static struct pci_device_id virtio_pci_i
        { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
        { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
        { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */
+       { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */
        { 0 },
 };
 
@@ -89,12 +76,6 @@ static struct device virtio_pci_root = {
 /* Unique numbering for devices under the kvm root */
 static unsigned int dev_index;
 
-/* Convert a generic virtio device to our structure */
-static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
-{
-       return container_of(vdev, struct virtio_pci_device, vdev);
-}
-
 /* virtio config->feature() implementation */
 static bool vp_feature(struct virtio_device *vdev, unsigned bit)
 {
Index: linux-2.6-nv/include/linux/virtio_pci.h
===================================================================
--- linux-2.6-nv.orig/include/linux/virtio_pci.h
+++ linux-2.6-nv/include/linux/virtio_pci.h
@@ -19,6 +19,26 @@
 
 #include <linux/virtio_config.h>
 
+/* Our device structure */
+struct virtio_pci_device
+{
+       struct virtio_device vdev;
+       struct pci_dev *pci_dev;
+
+       /* the IO mapping for the PCI config space */
+       void *ioaddr;
+
+       /* a list of queues so we can dispatch IRQs */
+       spinlock_t lock;
+       struct list_head virtqueues;
+};
+
+/* Convert a generic virtio device to our structure */
+struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
+{
+       return container_of(vdev, struct virtio_pci_device, vdev);
+}
+
 /* A 32-bit r/o bitmask of the features supported by the host */
 #define VIRTIO_PCI_HOST_FEATURES       0
 

-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to