Let's properly notify when (un)plugging blocks, after discarding memory
and before allowing the guest to consume memory. Handle errors from
notifiers gracefully (e.g., no remaining VFIO mappings) when plugging,
rolling back the change and telling the guest that the VM is busy.

One special case to take care of is replaying all notifications after
restoring the vmstate. The device starts out with all memory discarded,
so after loading the vmstate, we have to notify about all plugged
blocks.

Acked-by: Michael S. Tsirkin <m...@redhat.com>
Reviewed-by: Michael S. Tsirkin <m...@redhat.com>
Cc: Paolo Bonzini <pbonz...@redhat.com>
Cc: "Michael S. Tsirkin" <m...@redhat.com>
Cc: Alex Williamson <alex.william...@redhat.com>
Cc: Dr. David Alan Gilbert <dgilb...@redhat.com>
Cc: Igor Mammedov <imamm...@redhat.com>
Cc: Pankaj Gupta <pankaj.gupta.li...@gmail.com>
Cc: Peter Xu <pet...@redhat.com>
Cc: Auger Eric <eric.au...@redhat.com>
Cc: Wei Yang <richard.weiy...@linux.alibaba.com>
Cc: teawater <teawat...@linux.alibaba.com>
Cc: Marek Kedzierski <mkedz...@redhat.com>
Signed-off-by: David Hildenbrand <da...@redhat.com>
---
 hw/virtio/virtio-mem.c         | 253 ++++++++++++++++++++++++++++++++-
 include/hw/virtio/virtio-mem.h |   3 +
 2 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index bbe42ad83b..3132e4b2b1 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -172,7 +172,105 @@ static int virtio_mem_for_each_unplugged_range(const 
VirtIOMEM *vmem, void *arg,
     return ret;
 }
 
-static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
+static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
+                                             virtio_mem_range_cb cb)
+{
+    unsigned long first_bit, last_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
+    while (first_bit < vmem->bitmap_size) {
+        offset = first_bit * vmem->block_size;
+        last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                      first_bit + 1) - 1;
+        size = (last_bit - first_bit + 1) * vmem->block_size;
+
+        ret = cb(vmem, arg, offset, size);
+        if (ret) {
+            break;
+        }
+        first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                  last_bit + 2);
+    }
+    return ret;
+}
+
+static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
+                                     uint64_t size)
+{
+    RamDiscardListener *rdl;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        rdl->notify_discard(rdl, &vmem->memdev->mr, offset, size);
+    }
+}
+
+static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
+                                  uint64_t size)
+{
+    RamDiscardListener *rdl, *rdl2;
+    int ret = 0;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        ret = rdl->notify_populate(rdl, &vmem->memdev->mr, offset, size);
+        if (ret) {
+            break;
+        }
+    }
+
+    if (ret) {
+        /* Could be a mapping attempt resulted in memory getting populated. */
+        ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+
+        /* Notify all already-notified listeners. */
+        QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
+            if (rdl2 == rdl) {
+                break;
+            }
+            rdl2->notify_discard(rdl2, &vmem->memdev->mr, offset, size);
+        }
+    }
+    return ret;
+}
+
+static int virtio_mem_notify_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+                                              uint64_t offset, uint64_t size)
+{
+    RamDiscardListener *rdl;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        if (!rdl->notify_discard_all) {
+            rdl->notify_discard(rdl, &vmem->memdev->mr, offset, size);
+        }
+    }
+    return 0;
+}
+
+static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
+{
+    bool individual_calls = false;
+    RamDiscardListener *rdl;
+
+    if (!vmem->size) {
+        return;
+    }
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        if (rdl->notify_discard_all) {
+            rdl->notify_discard_all(rdl, &vmem->memdev->mr);
+        } else {
+            individual_calls = true;
+        }
+    }
+
+    if (individual_calls) {
+        virtio_mem_for_each_unplugged_range(vmem, NULL,
+                                            
virtio_mem_notify_discard_range_cb);
+    }
+}
+
+static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
                                    uint64_t size, bool plugged)
 {
     const unsigned long first_bit = (start_gpa - vmem->addr) / 
vmem->block_size;
@@ -225,7 +323,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
     virtio_mem_send_response(vmem, elem, &resp);
 }
 
-static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t 
size)
+static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
+                                   uint64_t size)
 {
     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
         return false;
@@ -256,6 +355,9 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, 
uint64_t start_gpa,
         if (ram_block_discard_range(rb, offset, size)) {
             return -EBUSY;
         }
+        virtio_mem_notify_unplug(vmem, offset, size);
+    } else if (virtio_mem_notify_plug(vmem, offset, size)) {
+        return -EBUSY;
     }
     virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
     return 0;
@@ -350,6 +452,8 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
     if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
         return -EBUSY;
     }
+    virtio_mem_notify_unplug_all(vmem);
+
     bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
     if (vmem->size) {
         vmem->size = 0;
@@ -598,6 +702,12 @@ static void virtio_mem_device_realize(DeviceState *dev, 
Error **errp)
     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
     qemu_register_reset(virtio_mem_system_reset, vmem);
     precopy_add_notifier(&vmem->precopy_notifier);
+
+    /*
+     * Set ourselves as RamDiscardMgr before the plug handler maps the memory
+     * region and exposes it via an address space.
+     */
+    memory_region_set_ram_discard_mgr(&vmem->memdev->mr, 
RAM_DISCARD_MGR(vmem));
 }
 
 static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -605,6 +715,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOMEM *vmem = VIRTIO_MEM(dev);
 
+    /*
+     * The unplug handler unmapped the memory region, it cannot be
+     * found via an address space anymore. Unset ourselves.
+     */
+    memory_region_set_ram_discard_mgr(&vmem->memdev->mr, NULL);
     precopy_remove_notifier(&vmem->precopy_notifier);
     qemu_unregister_reset(virtio_mem_system_reset, vmem);
     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
@@ -630,13 +745,41 @@ static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
                                                virtio_mem_discard_range_cb);
 }
 
+static int virtio_mem_post_load_replay_cb(const VirtIOMEM *vmem, void *arg,
+                                          uint64_t offset, uint64_t size)
+{
+    RamDiscardListener *rdl;
+    int ret = 0;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        ret = rdl->notify_populate(rdl, &vmem->memdev->mr, offset, size);
+        if (ret) {
+            break;
+        }
+    }
+    return ret;
+}
+
 static int virtio_mem_post_load(void *opaque, int version_id)
 {
+    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+    int ret;
+
+    /*
+     * We started out with all memory discarded and our memory region is mapped
+     * into an address space. Replay, now that we updated the bitmap.
+     */
+    ret = virtio_mem_for_each_plugged_range(vmem, NULL,
+                                            virtio_mem_post_load_replay_cb);
+    if (ret) {
+        return ret;
+    }
+
     if (migration_in_incoming_postcopy()) {
         return 0;
     }
 
-    return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque));
+    return virtio_mem_restore_unplugged(vmem);
 }
 
 typedef struct VirtIOMEMMigSanityChecks {
@@ -921,6 +1064,7 @@ static void virtio_mem_instance_init(Object *obj)
 
     notifier_list_init(&vmem->size_change_notifiers);
     vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
+    QLIST_INIT(&vmem->rdl_list);
 
     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
                         NULL, NULL, NULL);
@@ -940,11 +1084,104 @@ static Property virtio_mem_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardMgr *rdm,
+                                                   const MemoryRegion *mr)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(mr == &vmem->memdev->mr);
+    return vmem->block_size;
+}
+
+static bool virtio_mem_rdm_is_populated(const RamDiscardMgr *rdm,
+                                        const MemoryRegion *mr,
+                                        ram_addr_t offset, ram_addr_t size)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    uint64_t start_gpa = QEMU_ALIGN_DOWN(vmem->addr + offset, 
vmem->block_size);
+    uint64_t end_gpa = QEMU_ALIGN_UP(vmem->addr + offset + size,
+                                     vmem->block_size);
+
+    g_assert(mr == &vmem->memdev->mr);
+    if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
+        return false;
+    }
+
+    return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
+}
+
+static int virtio_mem_notify_populate_range_single_cb(const VirtIOMEM *vmem,
+                                                      void *arg,
+                                                      uint64_t offset,
+                                                      uint64_t size)
+{
+    RamDiscardListener *rdl = arg;
+
+    return rdl->notify_populate(rdl, &vmem->memdev->mr, offset, size);
+}
+
+static int virtio_mem_notify_discard_range_single_cb(const VirtIOMEM *vmem,
+                                                     void *arg,
+                                                     uint64_t offset,
+                                                     uint64_t size)
+{
+    RamDiscardListener *rdl = arg;
+
+    rdl->notify_discard(rdl, &vmem->memdev->mr, offset, size);
+    return 0;
+}
+
+static void virtio_mem_rdm_register_listener(RamDiscardMgr *rdm,
+                                             const MemoryRegion *mr,
+                                             RamDiscardListener *rdl)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    int ret;
+
+    g_assert(mr == &vmem->memdev->mr);
+    QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
+    ret = virtio_mem_for_each_plugged_range(vmem, rdl,
+                                    
virtio_mem_notify_populate_range_single_cb);
+    if (ret) {
+        error_report("%s: Replaying plugged ranges failed: %s", __func__,
+                     strerror(-ret));
+    }
+}
+
+static void virtio_mem_rdm_unregister_listener(RamDiscardMgr *rdm,
+                                               const MemoryRegion *mr,
+                                               RamDiscardListener *rdl)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(mr == &vmem->memdev->mr);
+    if (rdl->notify_discard_all) {
+        rdl->notify_discard_all(rdl, &vmem->memdev->mr);
+    } else {
+        virtio_mem_for_each_plugged_range(vmem, rdl,
+                                     
virtio_mem_notify_discard_range_single_cb);
+
+    }
+    QLIST_REMOVE(rdl, next);
+}
+
+static int virtio_mem_rdm_replay_populated(const RamDiscardMgr *rdm,
+                                           const MemoryRegion *mr,
+                                           RamDiscardListener *rdl)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(mr == &vmem->memdev->mr);
+    return virtio_mem_for_each_plugged_range(vmem, rdl,
+                                    
virtio_mem_notify_populate_range_single_cb);
+}
+
 static void virtio_mem_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
+    RamDiscardMgrClass *rdmc = RAM_DISCARD_MGR_CLASS(klass);
 
     device_class_set_props(dc, virtio_mem_properties);
     dc->vmsd = &vmstate_virtio_mem;
@@ -960,6 +1197,12 @@ static void virtio_mem_class_init(ObjectClass *klass, 
void *data)
     vmc->get_memory_region = virtio_mem_get_memory_region;
     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
+
+    rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+    rdmc->is_populated = virtio_mem_rdm_is_populated;
+    rdmc->register_listener = virtio_mem_rdm_register_listener;
+    rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
+    rdmc->replay_populated = virtio_mem_rdm_replay_populated;
 }
 
 static const TypeInfo virtio_mem_info = {
@@ -969,6 +1212,10 @@ static const TypeInfo virtio_mem_info = {
     .instance_init = virtio_mem_instance_init,
     .class_init = virtio_mem_class_init,
     .class_size = sizeof(VirtIOMEMClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_RAM_DISCARD_MGR },
+        { }
+    },
 };
 
 static void virtio_register_types(void)
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index 4eeb82d5dd..9a6e348fa2 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -67,6 +67,9 @@ struct VirtIOMEM {
 
     /* don't migrate unplugged memory */
     NotifierWithReturn precopy_notifier;
+
+    /* listeners to notify on plug/unplug activity. */
+    QLIST_HEAD(, RamDiscardListener) rdl_list;
 };
 
 struct VirtIOMEMClass {
-- 
2.29.2


Reply via email to