Automatic ballooning consists of dynamically adjusting the guest's
balloon according to memory pressure in the host and in the guest.

This commit implements the host side of automatic balloning, which
basically consists of:

 1. Registering with the memory.pressure_level API (from the
    Linux memory controller cgroup) for the MEDIUM pressure event

    This is a new feature starting on Linux kernel 3.10. For
    more information on this please check
        Documentation/cgroups/memory.txt in Linux kernel sources.

 2. On MEDIUM pressure event reception, QEMU asks the guest kernel
    to inflate the balloon by 16MB

 3. This is only done if the guest negotiates VIRTIO_BALLOON_F_AUTO_BALLOON
    which means the guest's kernel virtio-balloon driver also supports
    automatic ballooning

Automatic deflate is performed by the guest.

Here are some numbers. The test-case is to run 35 VMs (1G of RAM each)
in parallel doing a kernel build. Host has 32GB of RAM and 16GB of swap.
SWAP IN and SWAP OUT correspond to the number of pages swapped in and
swapped out, respectively.

Auto-ballooning disabled:

RUN  TIME(s)  SWAP IN  SWAP OUT

1    634      930980   1588522
2    610      627422   1362174
3    649      1079847  1616367
4    543      953289   1635379
5    642      913237   1514000

Auto-ballooning enabled:

RUN  TIME(s)  SWAP IN  SWAP OUT

1    629      901      12537
2    624      981      18506
3    626      573      9085
4    631      2250     42534
5    627      1610     20808

FIXMEs/TODOs:
 - Should we have a lower limit for guest memory? Otherwise it can
   reach 0 if too many events are received
 - Or maybe we should rate-limit events?
 - It seems that events are being lost when too many of them are
   sent at the same time on a busy host
 - Allow this to be dynamically enabled by mngt

Signed-off-by: Luiz Capitulino <lcapitul...@redhat.com>
---

o You can find my test script here:

  
http://repo.or.cz/w/qemu/qmp-unstable.git/blob/refs/heads/balloon/auto-ballooning/memcg/rfc:/scripts/autob-test

o You can find the guest driver counterpart code at:

   
http://repo.or.cz/w/linux-2.6/luiz-linux-2.6.git/shortlog/refs/heads/virtio-balloon/auto-deflate/rfc

o To play with automatic ballooning, do the following:

 1. You'll need 3.9+ for the host kernel
 2. Get the guest kernel bits from:
   git://repo.or.cz/linux-2.6/luiz-linux-2.6.git virtio-balloon/auto-deflate/rfc
 3. Apply this patch to QEMU
 4. Enable the balloon device in qemu with:
    -device virtio-balloon-pci,auto-balloon=true
 5. Generate memory pressure in the host, or put QEMU in a memcg cgroup with
    limited memory. Watch the VM memory going down
 6. Generate pressure in the guest to see it going up again (say, a kernel
    build with -j16)

 hw/virtio/virtio-balloon.c         | 162 +++++++++++++++++++++++++++++++++++++
 hw/virtio/virtio-pci.c             |   5 ++
 hw/virtio/virtio-pci.h             |   1 +
 include/hw/virtio/virtio-balloon.h |  15 ++++
 4 files changed, 183 insertions(+)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d669756..4b23360 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -31,6 +31,12 @@
 
 #include "hw/virtio/virtio-bus.h"
 
+void virtio_balloon_set_conf(DeviceState *dev, VirtIOBalloonConf *bconf)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(dev);
+    memcpy(&(s->bconf), bconf, sizeof(struct VirtIOBalloonConf));
+}
+
 static void balloon_page(void *addr, int deflate)
 {
 #if defined(__linux__)
@@ -279,9 +285,21 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
     }
 }
 
+static bool auto_balloon_enabled(const VirtIOBalloon *s)
+{
+    return s->bconf.auto_balloon;
+}
+
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
 {
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+
     f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
+
+    if (auto_balloon_enabled(s)) {
+        f |= (1 << VIRTIO_BALLOON_F_AUTO_BALLOON);
+    }
+
     return f;
 }
 
@@ -336,6 +354,141 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, 
int version_id)
     return 0;
 }
 
+static int open_sysfile(const char *path, const char *file, mode_t mode)
+{
+    char *p;
+    int fd;
+
+    p = g_strjoin("/", path, file, NULL);
+    fd = qemu_open(p, mode);
+    if (fd < 0) {
+        error_report("balloon: can't open '%s': %s", p, strerror(errno));
+    }
+
+    g_free(p);
+    return fd;
+}
+
+static int write_fd(int fd, const char *fmt, ...)
+{
+    va_list ap;
+    char *str;
+    int ret;
+
+    va_start(ap, fmt);
+    str = g_strdup_vprintf(fmt, ap);
+    va_end(ap);
+
+    do {
+        ret = write(fd, str, strlen(str));
+    } while (ret < 0 && errno == EINTR);
+
+    if (ret < 0) {
+        error_report("balloon: write failed: %s", strerror(errno));
+    }
+
+    g_free(str);
+    return ret;
+}
+
+static bool guest_supports_auto_balloon(const VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+    return vdev->guest_features & (1 << VIRTIO_BALLOON_F_AUTO_BALLOON);
+}
+
+static int auto_balloon_ack_event(EventNotifier *ev)
+{
+    uint64_t res;
+    int ret, fd;
+
+    fd = event_notifier_get_fd(ev);
+    
+    do {
+        ret = read(fd, &res, sizeof(res));
+    } while (ret == -1 && errno == EINTR);
+
+    return (ret < 0 ? ret : 0);
+}
+
+#define AUTO_INFLATE_INCREASE (16 * 1024 * 1024) /* 16 MB */
+
+static void auto_balloon_event_medium(EventNotifier *ev)
+{
+    VirtIOBalloon *s = container_of(ev, VirtIOBalloon, event);
+    int ret;
+
+    ret = auto_balloon_ack_event(ev);
+    if (ret < 0) {
+        fprintf(stderr, "balloon: failied to ack memory pressure event\n");
+        return;
+    }
+
+    if (!guest_supports_auto_balloon(s)) {
+        fprintf(stderr, "balloon: warning: guest doesn't support 
auto-ballooning, skipping memory pressure event\n");
+        return;
+    }
+
+    s->num_pages = s->actual +
+                   (AUTO_INFLATE_INCREASE >> VIRTIO_BALLOON_PFN_SHIFT);
+    virtio_notify_config(VIRTIO_DEVICE(s));
+}
+
+#define LINUX_MEMCG_PATH "/sys/fs/cgroup/memory"
+
+static int auto_balloon_init(VirtIOBalloon *s)
+{
+    const char *path;
+    int ret;
+
+    path = s->bconf.auto_balloon_memcg_path;
+    if (!path) {
+        path = LINUX_MEMCG_PATH;
+    }
+
+    s->lfd = open_sysfile(path, "memory.pressure_level", O_RDONLY);
+    if (s->lfd < 0) {
+        return -1;
+    }
+
+    s->cfd = open_sysfile(path, "cgroup.event_control", O_WRONLY);
+    if (s->cfd < 0) {
+        close(s->lfd);
+        return -1;
+    }
+
+    ret = event_notifier_init(&s->event, false);
+    if (ret < 0) {
+        error_report("failed to create notifier: %s", strerror(-ret));
+        goto out_err;
+    }
+
+    ret = write_fd(s->cfd, "%d %d medium",
+            event_notifier_get_fd(&s->event), s->lfd);
+    if (ret < 0) {
+        goto out_ev;
+    }
+
+    event_notifier_set_handler(&s->event, auto_balloon_event_medium);
+    return 0;
+
+out_ev:
+    event_notifier_cleanup(&s->event);
+out_err:
+    close(s->lfd);
+    close(s->cfd);
+    return -1;
+}
+
+static void auto_balloon_cleanup(VirtIOBalloon *s)
+{
+    if (auto_balloon_enabled(s)) {
+        event_notifier_cleanup(&s->event);
+        close(s->lfd);
+        close(s->cfd);
+    }
+}
+
 static int virtio_balloon_device_init(VirtIODevice *vdev)
 {
     DeviceState *qdev = DEVICE(vdev);
@@ -344,6 +497,14 @@ static int virtio_balloon_device_init(VirtIODevice *vdev)
 
     virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 8);
 
+    if (auto_balloon_enabled(s)) {
+        ret = auto_balloon_init(s);
+        if (ret < 0) {
+            virtio_cleanup(VIRTIO_DEVICE(s));
+            return -1;
+        }
+    }
+
     ret = qemu_add_balloon_handler(virtio_balloon_to_target,
                                    virtio_balloon_stat, s);
 
@@ -374,6 +535,7 @@ static int virtio_balloon_device_exit(DeviceState *qdev)
     VirtIOBalloon *s = VIRTIO_BALLOON(qdev);
     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
 
+    auto_balloon_cleanup(s);
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     unregister_savevm(qdev, "virtio-balloon", s);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index ec0066b..378fe8d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1246,6 +1246,10 @@ static void balloon_pci_stats_set_poll_interval(Object 
*obj, struct Visitor *v,
 static Property virtio_balloon_pci_properties[] = {
     DEFINE_VIRTIO_COMMON_FEATURES(VirtIOPCIProxy, host_features),
     DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
+#ifdef __linux__
+    DEFINE_PROP_BIT("auto-balloon", VirtIOBalloonPCI, bconf.auto_balloon, 0, 
false),
+    DEFINE_PROP_STRING("auto-balloon-memory-cgroup-path", VirtIOBalloonPCI, 
bconf.auto_balloon_memcg_path),
+#endif
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1259,6 +1263,7 @@ static int virtio_balloon_pci_init(VirtIOPCIProxy 
*vpci_dev)
         vpci_dev->class_code = PCI_CLASS_OTHERS;
     }
 
+    virtio_balloon_set_conf(vdev, &(dev->bconf));
     qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
     if (qdev_init(vdev) < 0) {
         return -1;
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 917bcc5..eb46401 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -145,6 +145,7 @@ struct VirtIOBlkPCI {
 struct VirtIOBalloonPCI {
     VirtIOPCIProxy parent_obj;
     VirtIOBalloon vdev;
+    VirtIOBalloonConf bconf;
 };
 
 /*
diff --git a/include/hw/virtio/virtio-balloon.h 
b/include/hw/virtio/virtio-balloon.h
index f863bfe..1a0c255 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -30,10 +30,17 @@
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ 1       /* Memory stats virtqueue */
+#define VIRTIO_BALLOON_F_AUTO_BALLOON 2   /* Automatic ballooning */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
+typedef struct VirtIOBalloonConf
+{
+    uint32_t auto_balloon;
+    char *auto_balloon_memcg_path;
+} VirtIOBalloonConf;
+
 struct virtio_balloon_config
 {
     /* Number of pages host wants Guest to give up. */
@@ -67,6 +74,14 @@ typedef struct VirtIOBalloon {
     QEMUTimer *stats_timer;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
+    VirtIOBalloonConf bconf;
+
+    /* auto-balloon */
+    int cfd;
+    int lfd;
+    EventNotifier event;
 } VirtIOBalloon;
 
+void virtio_balloon_set_conf(DeviceState *dev, VirtIOBalloonConf *bconf);
+
 #endif
-- 
1.8.1.4


Reply via email to