Avi and Anthony, If you agree that I've addressed all outstanding issues, please consider this patch for inclusion. Thanks.
Changes since V3: - Increase stat field size to 64 bits - Report all sizes in kb (not pages) - Drop anon_pages stat Changes since V2: - Use a virtqueue for communication instead of the device config space Changes since V1: - In the monitor, print all stats on one line with less abbreviated names - Coding style changes When using ballooning to manage overcommitted memory on a host, a system for guests to communicate their memory usage to the host can provide information that will minimize the impact of ballooning on the guests. The current method employs a daemon running in each guest that communicates memory statistics to a host daemon at a specified time interval. The host daemon aggregates this information and inflates and/or deflates balloons according to the level of host memory pressure. This approach is effective but overly complex since a daemon must be installed inside each guest and coordinated to communicate with the host. A simpler approach is to collect memory statistics in the virtio balloon driver and communicate them directly to the hypervisor. This patch implements the qemu side of the communication channel. I will post the kernel driver modifications in-reply to this message. Signed-off-by: Adam Litke <a...@us.ibm.com> Cc: Anthony Liguori <aligu...@us.ibm.com> Cc: Avi Kivity <a...@redhat.com> Cc: qemu-devel@nongnu.org diff --git a/balloon.h b/balloon.h index 60b4a5d..def4c56 100644 --- a/balloon.h +++ b/balloon.h @@ -16,12 +16,12 @@ #include "cpu-defs.h" -typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target); +typedef int (QEMUBalloonEvent)(void *opaque, ram_addr_t target); void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque); void qemu_balloon(ram_addr_t target); -ram_addr_t qemu_balloon_status(void); +int qemu_balloon_status(void); #endif diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index cfd3b41..b90e1f2 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -19,6 +19,7 @@ #include "balloon.h" #include "virtio-balloon.h" #include "kvm.h" +#include "monitor.h" #if defined(__linux__) #include <sys/mman.h> @@ -27,9 +28,13 @@ typedef struct VirtIOBalloon { VirtIODevice vdev; - VirtQueue *ivq, *dvq; + VirtQueue *ivq, *dvq, *svq; uint32_t num_pages; uint32_t actual; + uint64_t stats[VIRTIO_BALLOON_S_NR]; + VirtQueueElement stats_vq_elem; + size_t stats_vq_offset; + uint8_t stats_requested; } VirtIOBalloon; static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) @@ -46,6 +51,33 @@ static void balloon_page(void *addr, int deflate) #endif } +static inline void reset_stats(VirtIOBalloon *dev) +{ + int i; + for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); +} + +static inline void print_stat(uint64_t val, const char *label) +{ + if (val != -1) { + monitor_printf(cur_mon, ",%s=%lld", label, (unsigned long long)val); + } +} + +static void virtio_balloon_print_stats(VirtIOBalloon *dev) +{ + uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT); + + monitor_printf(cur_mon, "balloon: actual=%d", (int)(actual >> 20)); + print_stat(dev->stats[VIRTIO_BALLOON_S_SWAP_IN], "mem_swapped_in"); + print_stat(dev->stats[VIRTIO_BALLOON_S_SWAP_OUT], "mem_swapped_out"); + print_stat(dev->stats[VIRTIO_BALLOON_S_MAJFLT], "major_page_faults"); + print_stat(dev->stats[VIRTIO_BALLOON_S_MINFLT], "minor_page_faults"); + print_stat(dev->stats[VIRTIO_BALLOON_S_MEMFREE], "free_mem"); + print_stat(dev->stats[VIRTIO_BALLOON_S_MEMTOT], "total_mem"); + monitor_printf(cur_mon, "\n"); +} + /* FIXME: once we do a virtio refactoring, this will get subsumed into common * code */ static size_t memcpy_from_iovector(void *data, size_t offset, size_t size, @@ -104,6 +136,31 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) } } +static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *s = to_virtio_balloon(vdev); + VirtQueueElement *elem = &s->stats_vq_elem; + VirtIOBalloonStat stat; + size_t offset = 0; + + if (!virtqueue_pop(vq, elem)) + return; + + while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg, + elem->out_num) == sizeof(stat)) { + offset += sizeof(stat); + if (stat.tag < VIRTIO_BALLOON_S_NR) + s->stats[stat.tag] = stat.val; + } + s->stats_vq_offset = offset; + + if (s->stats_requested) { + virtio_balloon_print_stats(s); + monitor_resume(cur_mon); + s->stats_requested = 0; + } +} + static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) { VirtIOBalloon *dev = to_virtio_balloon(vdev); @@ -126,10 +183,19 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, static uint32_t virtio_balloon_get_features(VirtIODevice *vdev) { - return 0; + return 1 << VIRTIO_BALLOON_F_STATS_VQ; } -static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target) +static void request_stats(VirtIOBalloon *vb) +{ + vb->stats_requested = 1; + reset_stats(vb); + monitor_suspend(cur_mon); + virtqueue_push(vb->svq, &vb->stats_vq_elem, vb->stats_vq_offset); + virtio_notify(&vb->vdev, vb->svq); +} + +static int virtio_balloon_to_target(void *opaque, ram_addr_t target) { VirtIOBalloon *dev = opaque; @@ -139,9 +205,14 @@ static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target) if (target) { dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT; virtio_notify_config(&dev->vdev); + } else if (dev->vdev.features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) { + request_stats(dev); + } else { + reset_stats(dev); + virtio_balloon_print_stats(dev); } - return ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT); + return 0; } static void virtio_balloon_save(QEMUFile *f, void *opaque) @@ -152,6 +223,9 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque) qemu_put_be32(f, s->num_pages); qemu_put_be32(f, s->actual); + qemu_put_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement)); + qemu_put_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t)); + qemu_put_byte(f, s->stats_requested); } static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) @@ -165,6 +239,9 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) s->num_pages = qemu_get_be32(f); s->actual = qemu_get_be32(f); + qemu_get_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement)); + qemu_get_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t)); + s->stats_requested = qemu_get_byte(f); return 0; } @@ -183,6 +260,7 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev) s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); + s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats); qemu_add_balloon_handler(virtio_balloon_to_target, s); diff --git a/hw/virtio-balloon.h b/hw/virtio-balloon.h index 9a0d119..8a08153 100644 --- a/hw/virtio-balloon.h +++ b/hw/virtio-balloon.h @@ -25,6 +25,7 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory stats virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -37,4 +38,18 @@ struct virtio_balloon_config uint32_t actual; }; +/* Memory Statistics */ +#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ +#define VIRTIO_BALLOON_S_SWAP_OUT 1 /* Amount of memory swapped out */ +#define VIRTIO_BALLOON_S_MAJFLT 2 /* Number of major faults */ +#define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ +#define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ +#define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ +#define VIRTIO_BALLOON_S_NR 6 + +typedef struct VirtIOBalloonStat { + uint16_t tag; + uint64_t val; +} VirtIOBalloonStat; + #endif diff --git a/monitor.c b/monitor.c index ed1ce6e..73484c8 100644 --- a/monitor.c +++ b/monitor.c @@ -1583,16 +1583,14 @@ static void do_balloon(Monitor *mon, int value) static void do_info_balloon(Monitor *mon) { - ram_addr_t actual; + int ret; - actual = qemu_balloon_status(); if (kvm_enabled() && !kvm_has_sync_mmu()) monitor_printf(mon, "Using KVM without synchronous MMU, " "ballooning disabled\n"); - else if (actual == 0) + ret = qemu_balloon_status(); + if (ret == -1) monitor_printf(mon, "Ballooning not activated in VM\n"); - else - monitor_printf(mon, "balloon: actual=%d\n", (int)(actual >> 20)); } static qemu_acl *find_acl(Monitor *mon, const char *name) diff --git a/vl.c b/vl.c index 710d52e..aebceab 100644 --- a/vl.c +++ b/vl.c @@ -337,11 +337,11 @@ void qemu_balloon(ram_addr_t target) qemu_balloon_event(qemu_balloon_event_opaque, target); } -ram_addr_t qemu_balloon_status(void) +int qemu_balloon_status(void) { if (qemu_balloon_event) return qemu_balloon_event(qemu_balloon_event_opaque, 0); - return 0; + return -1; } /***********************************************************/ -- Thanks, Adam