Anthony Liguori wrote:
> This patch adds support to QEMU for Rusty's recently introduce virtio balloon
> driver.  The user-facing portions of this are the introduction of a "balloon"
> and "info balloon" command in the monitor.
>
> I think using madvise unconditionally is okay but I am not sure.

Looks like it's not.  I just hung my host system after doing a bunch of 
ballooning with a kernel that doesn't have MM notifiers.

I'm inclined to think that we should have a capability check for MM 
notifiers and just not do madvise if they aren't present.  I don't think 
the ioctl approach that Marcelo took is sufficient as a malicious guest 
could possibly hose the host.

Having the guest allocate and not touch memory means that it should 
eventually be removed from the shadow page cache and eventually swapped 
out so ballooning isn't totally useless in the absence of MM notifiers.

Regards,

Anthony Liguori

>   If madvise
> is called on memory that is essentially locked (which is what pre-MM notifiers
> is like) then it should just be a nop right?
>
> Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
>
> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
> index bb7be0f..d6b4f46 100644
> --- a/qemu/Makefile.target
> +++ b/qemu/Makefile.target
> @@ -464,7 +464,7 @@ VL_OBJS += rtl8139.o
>  VL_OBJS+= hypercall.o
>
>  # virtio devices
> -VL_OBJS += virtio.o virtio-net.o virtio-blk.o
> +VL_OBJS += virtio.o virtio-net.o virtio-blk.o virtio-balloon.o
>
>  ifeq ($(TARGET_BASE_ARCH), i386)
>  # Hardware support
> diff --git a/qemu/balloon.h b/qemu/balloon.h
> new file mode 100644
> index 0000000..ffce1fa
> --- /dev/null
> +++ b/qemu/balloon.h
> @@ -0,0 +1,14 @@
> +#ifndef _QEMU_BALLOON_H
> +#define _QEMU_BALLOON_H
> +
> +#include "cpu-defs.h"
> +
> +typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
> +
> +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
> +
> +void qemu_balloon(ram_addr_t target);
> +
> +ram_addr_t qemu_balloon_status(void);
> +
> +#endif
> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
> index 652b263..6c8d907 100644
> --- a/qemu/hw/pc.c
> +++ b/qemu/hw/pc.c
> @@ -1122,6 +1122,9 @@ static void pc_init1(ram_addr_t ram_size, int 
> vga_ram_size,
>       }
>      }
>
> +    if (pci_enabled)
> +     virtio_balloon_init(pci_bus, 0x1AF4, 0x1002);
> +
>      if (extboot_drive != -1) {
>       DriveInfo *info = &drives_table[extboot_drive];
>       int cyls, heads, secs;
> diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
> index f640395..1899c11 100644
> --- a/qemu/hw/pc.h
> +++ b/qemu/hw/pc.h
> @@ -152,6 +152,9 @@ void virtio_net_poll(void);
>  void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
>                     BlockDriverState *bs);
>
> +/* virtio-balloon.h */
> +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device);
> +
>  /* extboot.c */
>
>  void extboot_init(BlockDriverState *bs, int cmd);
> diff --git a/qemu/hw/virtio-balloon.c b/qemu/hw/virtio-balloon.c
> new file mode 100644
> index 0000000..1b5a689
> --- /dev/null
> +++ b/qemu/hw/virtio-balloon.c
> @@ -0,0 +1,142 @@
> +/*
> + * Virtio Block Device
> + *
> + * Copyright IBM, Corp. 2008
> + *
> + * Authors:
> + *  Anthony Liguori   <[EMAIL PROTECTED]>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "virtio.h"
> +#include "block.h"
> +#include "pc.h"
> +#include "balloon.h"
> +#include "sysemu.h"
> +
> +#include <sys/mman.h>
> +
> +/* from Linux's linux/virtio_blk.h */
> +
> +/* The ID for virtio_balloon */
> +#define VIRTIO_ID_BALLOON    5
> +
> +/* The feature bitmap for virtio balloon */
> +#define VIRTIO_BALLOON_F_MUST_TELL_HOST      0 /* Tell before reclaiming 
> pages */
> +
> +struct virtio_balloon_config
> +{
> +    /* Number of pages host wants Guest to give up. */
> +    uint32_t num_pages;
> +    /* Number of pages we've actually got in balloon. */
> +    uint32_t actual;
> +};
> +
> +typedef struct VirtIOBalloon
> +{
> +    VirtIODevice vdev;
> +    VirtQueue *ivq, *dvq;
> +    uint32_t num_pages;
> +    uint32_t actual;
> +} VirtIOBalloon;
> +
> +static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
> +{
> +    return (VirtIOBalloon *)vdev;
> +}
> +
> +static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    VirtIOBalloon *s = to_virtio_balloon(vdev);
> +    VirtQueueElement elem;
> +    unsigned int count;
> +
> +    while ((count = virtqueue_pop(vq, &elem)) != 0) {
> +     int i;
> +     unsigned int wlen = 0;
> +
> +     for (i = 0; i < elem.out_num; i++) {
> +         int flags;
> +         uint32_t *pfns = elem.out_sg[i].iov_base;
> +         unsigned int n_pfns = elem.out_sg[i].iov_len / 4;
> +         int j;
> +
> +         for (j = 0; j < n_pfns; j++) {
> +             if (vq == s->dvq) /* deflate */
> +                 flags = MADV_WILLNEED;
> +             else /* inflate */
> +                 flags = MADV_DONTNEED;
> +
> +             madvise(phys_ram_base + (pfns[j] << TARGET_PAGE_BITS),
> +                     TARGET_PAGE_SIZE, flags);
> +         }
> +
> +         wlen += elem.out_sg[i].iov_len;
> +     }
> +
> +     virtqueue_push(vq, &elem, wlen);
> +     virtio_notify(vdev, vq);
> +    }
> +}
> +
> +static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t 
> *config_data)
> +{
> +    VirtIOBalloon *dev = to_virtio_balloon(vdev);
> +    struct virtio_balloon_config config;
> +
> +    config.num_pages = dev->num_pages;
> +    config.actual = dev->actual;
> +
> +    memcpy(config_data, &config, 8);
> +}
> +
> +static void virtio_balloon_set_config(VirtIODevice *vdev,
> +                                   const uint8_t *config_data)
> +{
> +    VirtIOBalloon *dev = to_virtio_balloon(vdev);
> +    struct virtio_balloon_config config;
> +    memcpy(&config, config_data, 8);
> +    dev->actual = config.actual;
> +}
> +
> +static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
> +{
> +    return 0;
> +}
> +
> +static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
> +{
> +    VirtIOBalloon *dev = opaque;
> +
> +    if (target) {
> +     dev->num_pages = (ram_size - target) >> TARGET_PAGE_BITS;
> +     virtio_notify_config(&dev->vdev);
> +    }
> +
> +    return ram_size - (dev->actual << TARGET_PAGE_BITS);
> +}
> +
> +void *virtio_balloon_init(PCIBus *bus, uint16_t vendor, uint16_t device)
> +{
> +    VirtIOBalloon *s;
> +
> +    s = (VirtIOBalloon *)virtio_init_pci(bus, "virtio-balloon",
> +                                      vendor, device,
> +                                      0, VIRTIO_ID_BALLOON,
> +                                      0x05, 0x00, 0x00,
> +                                      8, sizeof(VirtIOBalloon));
> +
> +    s->vdev.get_config = virtio_balloon_get_config;
> +    s->vdev.set_config = virtio_balloon_set_config;
> +    s->vdev.get_features = virtio_balloon_get_features;
> +
> +    s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
> +    s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
> +
> +    qemu_add_balloon_handler(virtio_balloon_to_target, s);
> +
> +    return &s->vdev;
> +}
> diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c
> index 301b5a1..c8c3233 100644
> --- a/qemu/hw/virtio-blk.c
> +++ b/qemu/hw/virtio-blk.c
> @@ -153,7 +153,7 @@ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, 
> uint16_t device,
>                                      0x01, 0x80, 0x00,
>                                      16, sizeof(VirtIOBlock));
>
> -    s->vdev.update_config = virtio_blk_update_config;
> +    s->vdev.get_config = virtio_blk_update_config;
>      s->vdev.get_features = virtio_blk_get_features;
>      s->bs = bs;
>
> diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
> index 86f9e5a..8c1f953 100644
> --- a/qemu/hw/virtio-net.c
> +++ b/qemu/hw/virtio-net.c
> @@ -288,7 +288,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
>                                    0x02, 0x00, 0x00,
>                                    6, sizeof(VirtIONet));
>
> -    n->vdev.update_config = virtio_net_update_config;
> +    n->vdev.get_config = virtio_net_update_config;
>      n->vdev.get_features = virtio_net_get_features;
>      n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx);
>      n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
> diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
> index bbcb44c..1e8fb44 100644
> --- a/qemu/hw/virtio.c
> +++ b/qemu/hw/virtio.c
> @@ -304,6 +304,9 @@ static void virtio_config_writeb(void *opaque, uint32_t 
> addr, uint32_t data)
>       return;
>
>      memcpy(vdev->config + addr, &val, sizeof(val));
> +
> +    if (vdev->set_config)
> +     vdev->set_config(vdev, vdev->config);
>  }
>
>  static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
> @@ -316,6 +319,9 @@ static void virtio_config_writew(void *opaque, uint32_t 
> addr, uint32_t data)
>       return;
>
>      memcpy(vdev->config + addr, &val, sizeof(val));
> +
> +    if (vdev->set_config)
> +     vdev->set_config(vdev, vdev->config);
>  }
>
>  static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
> @@ -328,6 +334,9 @@ static void virtio_config_writel(void *opaque, uint32_t 
> addr, uint32_t data)
>       return;
>
>      memcpy(vdev->config + addr, &val, sizeof(val));
> +
> +    if (vdev->set_config)
> +     vdev->set_config(vdev, vdev->config);
>  }
>
>  static void virtio_map(PCIDevice *pci_dev, int region_num,
> @@ -356,7 +365,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
>       register_ioport_read(addr + 20, vdev->config_len, 4,
>                            virtio_config_readl, vdev);
>
> -     vdev->update_config(vdev, vdev->config);
> +     vdev->get_config(vdev, vdev->config);
>      }
>  }
>
> @@ -380,6 +389,14 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
> queue_size,
>      return &vdev->vq[i];
>  }
>
> +void virtio_notify_config(VirtIODevice *vdev)
> +{
> +    /* make sure we have the latest config */
> +    vdev->get_config(vdev, vdev->config);
> +    vdev->isr = 3;
> +    virtio_update_irq(vdev);
> +}
> +
>  void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      /* Always notify when queue is empty */
> diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
> index dee97ba..ed8bebd 100644
> --- a/qemu/hw/virtio.h
> +++ b/qemu/hw/virtio.h
> @@ -118,7 +118,8 @@ struct VirtIODevice
>      void *config;
>      uint32_t (*get_features)(VirtIODevice *vdev);
>      void (*set_features)(VirtIODevice *vdev, uint32_t val);
> -    void (*update_config)(VirtIODevice *vdev, uint8_t *config);
> +    void (*get_config)(VirtIODevice *vdev, uint8_t *config);
> +    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>      VirtQueue vq[VIRTIO_PCI_QUEUE_MAX];
>  };
>
> @@ -140,4 +141,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
>
>  void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
>
> +void virtio_notify_config(VirtIODevice *vdev);
> +
>  #endif
> diff --git a/qemu/monitor.c b/qemu/monitor.c
> index e03c473..e05f717 100644
> --- a/qemu/monitor.c
> +++ b/qemu/monitor.c
> @@ -35,6 +35,7 @@
>  #include "audio/audio.h"
>  #include "disas.h"
>  #include "migration.h"
> +#include "balloon.h"
>  #include <dirent.h>
>
>  #include "qemu-kvm.h"
> @@ -1262,6 +1263,23 @@ static void do_wav_capture (const char *path,
>  }
>  #endif
>
> +static void do_balloon(int value)
> +{
> +    ram_addr_t target = value;
> +    qemu_balloon(target << 20);
> +}
> +
> +static void do_info_balloon(void)
> +{
> +    ram_addr_t actual;
> +
> +    actual = qemu_balloon_status();
> +    if (actual == 0)
> +     term_printf("Ballooning not activated in VM\n");
> +    else
> +     term_printf("balloon: actual=%d\n", (int)(actual >> 20));
> +}
> +
>  static term_cmd_t term_cmds[] = {
>      { "help|?", "s?", do_help,
>        "[cmd]", "show the help" },
> @@ -1339,6 +1357,8 @@ static term_cmd_t term_cmds[] = {
>        "", "cancel the current VM migration" },
>      { "migrate_set_speed", "s", do_migrate_set_speed,
>        "value", "set maximum speed (in bytes) for migrations" },
> +    { "balloon", "i", do_balloon,
> +      "target", "request VM to change it's memory allocation (in MB)" },
>      { NULL, NULL, },
>  };
>
> @@ -1401,6 +1421,8 @@ static term_cmd_t info_cmds[] = {
>  #endif
>      { "migration", "", do_info_migration,
>        "", "show migration information" },
> +    { "balloon", "", do_info_balloon,
> +      "", "show balloon information" },
>      { NULL, NULL, },
>  };
>
> diff --git a/qemu/vl.c b/qemu/vl.c
> index 756e13d..d339eb2 100644
> --- a/qemu/vl.c
> +++ b/qemu/vl.c
> @@ -38,6 +38,7 @@
>  #include "block.h"
>  #include "audio/audio.h"
>  #include "migration.h"
> +#include "balloon.h"
>  #include "qemu-kvm.h"
>
>  #include <unistd.h>
> @@ -511,6 +512,31 @@ void hw_error(const char *fmt, ...)
>      abort();
>  }
>
> +/***************/
> +/* ballooning */
> +
> +static QEMUBalloonEvent *qemu_balloon_event;
> +void *qemu_balloon_event_opaque;
> +
> +void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
> +{
> +    qemu_balloon_event = func;
> +    qemu_balloon_event_opaque = opaque;
> +}
> +
> +void qemu_balloon(ram_addr_t target)
> +{
> +    if (qemu_balloon_event)
> +     qemu_balloon_event(qemu_balloon_event_opaque, target);
> +}
> +
> +ram_addr_t qemu_balloon_status(void)
> +{
> +    if (qemu_balloon_event)
> +     return qemu_balloon_event(qemu_balloon_event_opaque, 0);
> +    return 0;
> +}
> +
>  /***********************************************************/
>  /* keyboard/mouse */
>
>   


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to