From: Nadav Amit <na...@vmware.com>

Certain use-cases (e.g., prefetch_page()) may want to avoid polling
while a page is brought from the swap. Yet, swap_cluster_readahead()
and swap_vma_readahead() do not respect FAULT_FLAG_RETRY_NOWAIT.

Add support to respect FAULT_FLAG_RETRY_NOWAIT by not polling in these
cases.

Cc: Andy Lutomirski <l...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Sean Christopherson <sea...@google.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: x...@kernel.org
Signed-off-by: Nadav Amit <na...@vmware.com>
---
 mm/memory.c     | 15 +++++++++++++--
 mm/shmem.c      |  1 +
 mm/swap_state.c | 12 +++++++++---
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index feff48e1465a..13b9cf36268f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3326,12 +3326,23 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                }
 
                if (!page) {
+                       /*
+                        * Back out if we failed to bring the page while we
+                        * tried to avoid I/O.
+                        */
+                       if (fault_flag_allow_retry_first(vmf->flags) &&
+                           (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                               ret = VM_FAULT_RETRY;
+                               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+                               goto out;
+                       }
+
                        /*
                         * Back out if somebody else faulted in this pte
                         * while we released the pte lock.
                         */
-                       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-                                       vmf->address, &vmf->ptl);
+                       vmf->pte = pte_offset_map_lock(vma->vm_mm,
+                               vmf->pmd, vmf->address, &vmf->ptl);
                        if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
                                ret = VM_FAULT_OOM;
                        delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..b108e9ba9e89 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1525,6 +1525,7 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t 
gfp,
        shmem_pseudo_vma_init(&pvma, info, index);
        vmf.vma = &pvma;
        vmf.address = 0;
+       vmf.flags = 0;
        page = swap_cluster_readahead(swap, gfp, &vmf);
        shmem_pseudo_vma_destroy(&pvma);
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 751c1ef2fe0e..1e930f7ff8b3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -656,10 +656,13 @@ struct page *swap_cluster_readahead(swp_entry_t entry, 
gfp_t gfp_mask,
        unsigned long mask;
        struct swap_info_struct *si = swp_swap_info(entry);
        struct blk_plug plug;
-       bool do_poll = true, page_allocated;
+       bool page_allocated, do_poll;
        struct vm_area_struct *vma = vmf->vma;
        unsigned long addr = vmf->address;
 
+       do_poll = !fault_flag_allow_retry_first(vmf->flags) ||
+               !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
+
        mask = swapin_nr_pages(offset) - 1;
        if (!mask)
                goto skip;
@@ -838,7 +841,7 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, 
gfp_t gfp_mask,
        pte_t *pte, pentry;
        swp_entry_t entry;
        unsigned int i;
-       bool page_allocated;
+       bool page_allocated, do_poll;
        struct vma_swap_readahead ra_info = {
                .win = 1,
        };
@@ -873,9 +876,12 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, 
gfp_t gfp_mask,
        }
        blk_finish_plug(&plug);
        lru_add_drain();
+
 skip:
+       do_poll = (!fault_flag_allow_retry_first(vmf->flags) ||
+               !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) && ra_info.win == 1;
        return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
-                                    ra_info.win == 1);
+                                    do_poll);
 }
 
 /**
-- 
2.25.1

Reply via email to