From: Ackerley Tng <[email protected]>

When checking if a guest_memfd folio is safe for conversion, its refcount
is examined. A folio may be present in a per-CPU lru_add fbatch, which
temporarily increases its refcount. This can lead to a false positive,
incorrectly indicating that the folio is in use and preventing the
conversion, even if it is otherwise safe. The conversion process might not
be on the same CPU that holds the folio in its fbatch, making a simple
per-CPU check insufficient.

To address this, drain all CPUs' lru_add fbatches if an unexpectedly high
refcount is encountered during the safety check. This is performed at most
once per conversion request. Draining only if the folio in question may be
lru cached.

guest_memfd folios are unevictable, so they can only reside in the lru_add
fbatch. If the folio's refcount is still unsafe after draining, then the
conversion is truly deemed unsafe.

Signed-off-by: Ackerley Tng <[email protected]>
---
 mm/swap.c              |  2 ++
 virt/kvm/guest_memfd.c | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index 5cc44f0de9877..3134d9d3d7c30 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -37,6 +37,7 @@
 #include <linux/page_idle.h>
 #include <linux/local_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/kvm_types.h>
 
 #include "internal.h"
 
@@ -904,6 +905,7 @@ void lru_add_drain_all(void)
        lru_add_drain();
 }
 #endif /* CONFIG_SMP */
+EXPORT_SYMBOL_FOR_KVM(lru_add_drain_all);
 
 atomic_t lru_disable_count = ATOMIC_INIT(0);
 
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index d8bdb51c50cf0..18dec87dd4baa 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -8,6 +8,7 @@
 #include <linux/mempolicy.h>
 #include <linux/pseudo_fs.h>
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 
 #include "kvm_mm.h"
 
@@ -608,18 +609,27 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode 
*inode, pgoff_t start,
        const int filemap_get_folios_refcount = 1;
        pgoff_t last = start + nr_pages - 1;
        struct folio_batch fbatch;
+       bool lru_drained = false;
        bool safe = true;
        int i;
 
        folio_batch_init(&fbatch);
        while (safe && filemap_get_folios(mapping, &start, last, &fbatch)) {
 
-               for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+               for (i = 0; i < folio_batch_count(&fbatch);) {
                        struct folio *folio = fbatch.folios[i];
 
-                       if (folio_ref_count(folio) !=
-                           folio_nr_pages(folio) + 
filemap_get_folios_refcount) {
-                               safe = false;
+                       safe = (folio_ref_count(folio) ==
+                               folio_nr_pages(folio) +
+                               filemap_get_folios_refcount);
+
+                       if (safe) {
+                               ++i;
+                       } else if (folio_may_be_lru_cached(folio) &&
+                                  !lru_drained) {
+                               lru_add_drain_all();
+                               lru_drained = true;
+                       } else {
                                *err_index = folio->index;
                                break;
                        }

-- 
2.54.0.545.g6539524ca2-goog



Reply via email to