On Mon, Oct 12, 2020 at 02:53:23PM +0200, David Hildenbrand wrote:
>Let's add a safe mechanism to unplug memory, avoiding long/endless loops
>when trying to offline memory - similar to in SBM.
>
>Fake-offline all memory (via alloc_contig_range()) before trying to
>offline+remove it. Use this mode as default, but allow to enable the other
>mode explicitly (which could give better memory hotunplug guarantees in

I don't get the point how unsafe mode would have a better guarantees?

>some environments).
>
>The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0
>on the cmdline.
>
>Cc: "Michael S. Tsirkin" <[email protected]>
>Cc: Jason Wang <[email protected]>
>Cc: Pankaj Gupta <[email protected]>
>Cc: Michal Hocko <[email protected]>
>Cc: Oscar Salvador <[email protected]>
>Cc: Wei Yang <[email protected]>
>Cc: Andrew Morton <[email protected]>
>Signed-off-by: David Hildenbrand <[email protected]>
>---
> drivers/virtio/virtio_mem.c | 97 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 95 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
>index 6bcd0acbff32..09f11489be6f 100644
>--- a/drivers/virtio/virtio_mem.c
>+++ b/drivers/virtio/virtio_mem.c
>@@ -37,6 +37,11 @@ module_param(bbm_block_size, ulong, 0444);
> MODULE_PARM_DESC(bbm_block_size,
>                "Big Block size in bytes. Default is 0 (auto-detection).");
> 
>+static bool bbm_safe_unplug = true;
>+module_param(bbm_safe_unplug, bool, 0444);
>+MODULE_PARM_DESC(bbm_safe_unplug,
>+           "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
>+
> /*
>  * virtio-mem currently supports the following modes of operation:
>  *
>@@ -87,6 +92,8 @@ enum virtio_mem_bbm_bb_state {
>       VIRTIO_MEM_BBM_BB_PLUGGED,
>       /* Plugged and added to Linux. */
>       VIRTIO_MEM_BBM_BB_ADDED,
>+      /* All online parts are fake-offline, ready to remove. */
>+      VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
>       VIRTIO_MEM_BBM_BB_COUNT
> };
> 
>@@ -889,6 +896,32 @@ static void virtio_mem_sbm_notify_cancel_offline(struct 
>virtio_mem *vm,
>       }
> }
> 
>+static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
>+                                              unsigned long bb_id,
>+                                              unsigned long pfn,
>+                                              unsigned long nr_pages)
>+{
>+      /*
>+       * When marked as "fake-offline", all online memory of this device block
>+       * is allocated by us. Otherwise, we don't have any memory allocated.
>+       */
>+      if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
>+          VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
>+              return;
>+      virtio_mem_fake_offline_going_offline(pfn, nr_pages);
>+}
>+
>+static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
>+                                               unsigned long bb_id,
>+                                               unsigned long pfn,
>+                                               unsigned long nr_pages)
>+{
>+      if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
>+          VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
>+              return;
>+      virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
>+}
>+
> /*
>  * This callback will either be called synchronously from add_memory() or
>  * asynchronously (e.g., triggered via user space). We have to be careful
>@@ -949,6 +982,10 @@ static int virtio_mem_memory_notifier_cb(struct 
>notifier_block *nb,
>               vm->hotplug_active = true;
>               if (vm->in_sbm)
>                       virtio_mem_sbm_notify_going_offline(vm, id);
>+              else
>+                      virtio_mem_bbm_notify_going_offline(vm, id,
>+                                                          mhp->start_pfn,
>+                                                          mhp->nr_pages);
>               break;
>       case MEM_GOING_ONLINE:
>               mutex_lock(&vm->hotplug_mutex);
>@@ -999,6 +1036,10 @@ static int virtio_mem_memory_notifier_cb(struct 
>notifier_block *nb,
>                       break;
>               if (vm->in_sbm)
>                       virtio_mem_sbm_notify_cancel_offline(vm, id);
>+              else
>+                      virtio_mem_bbm_notify_cancel_offline(vm, id,
>+                                                           mhp->start_pfn,
>+                                                           mhp->nr_pages);
>               vm->hotplug_active = false;
>               mutex_unlock(&vm->hotplug_mutex);
>               break;
>@@ -1189,7 +1230,13 @@ static void virtio_mem_online_page_cb(struct page 
>*page, unsigned int order)
>                       do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
>                                                                  sb_id, 1);
>               } else {
>-                      do_online = true;
>+                      /*
>+                       * If the whole block is marked fake offline, keep
>+                       * everything that way.
>+                       */
>+                      id = virtio_mem_phys_to_bb_id(vm, addr);
>+                      do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
>+                                  VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
>               }
>               if (do_online)
>                       generic_online_page(page, order);
>@@ -1969,15 +2016,50 @@ static int virtio_mem_sbm_unplug_request(struct 
>virtio_mem *vm, uint64_t diff)
> static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
>                                                      unsigned long bb_id)
> {
>+      const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, 
>bb_id));
>+      const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
>+      unsigned long end_pfn = start_pfn + nr_pages;
>+      unsigned long pfn;
>+      struct page *page;
>       int rc;
> 
>       if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
>                        VIRTIO_MEM_BBM_BB_ADDED))
>               return -EINVAL;
> 
>+      if (bbm_safe_unplug) {
>+              /*
>+               * Start by fake-offlining all memory. Once we marked the device
>+               * block as fake-offline, all newly onlined memory will
>+               * automatically be kept fake-offline. Protect from concurrent
>+               * onlining/offlining until we have a consistent state.
>+               */
>+              mutex_lock(&vm->hotplug_mutex);
>+              virtio_mem_bbm_set_bb_state(vm, bb_id,
>+                                          VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
>+

State is set here.

>+              for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>+                      page = pfn_to_online_page(pfn);
>+                      if (!page)
>+                              continue;
>+
>+                      rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
>+                      if (rc) {
>+                              end_pfn = pfn;
>+                              goto rollback_safe_unplug;
>+                      }
>+              }
>+              mutex_unlock(&vm->hotplug_mutex);
>+      }
>+
>       rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
>-      if (rc)
>+      if (rc) {
>+              if (bbm_safe_unplug) {
>+                      mutex_lock(&vm->hotplug_mutex);
>+                      goto rollback_safe_unplug;
>+              }
>               return rc;
>+      }
> 
>       rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
>       if (rc)

And changed to PLUGGED or UNUSED based on rc.

>@@ -1987,6 +2069,17 @@ static int 
>virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
>               virtio_mem_bbm_set_bb_state(vm, bb_id,
>                                           VIRTIO_MEM_BBM_BB_UNUSED);
>       return rc;
>+
>+rollback_safe_unplug:
>+      for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>+              page = pfn_to_online_page(pfn);
>+              if (!page)
>+                      continue;
>+              virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
>+      }
>+      virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);

And changed to ADDED if failed.

>+      mutex_unlock(&vm->hotplug_mutex);
>+      return rc;
> }

So in which case, the bbm state is FAKE_OFFLINE during
virtio_mem_bbm_notify_going_offline() and
virtio_mem_bbm_notify_cancel_offline() ?

> 
> /*
>-- 
>2.26.2

-- 
Wei Yang
Help you, Help me

Reply via email to