In the speculative case, call the vm_ops->fault() method from within
an rcu read locked section, and verify the mmap sequence lock at the
start of the section. A match guarantees that the original vma is still
valid at that time, and that the associated vma->vm_file stays valid
while the vm_ops->fault() method is running.

Note that this implies that speculative faults can not sleep within
the vm_ops->fault method. We will only attempt to fetch existing pages
from the page cache during speculative faults; any miss (or prefetch)
will be handled by falling back to non-speculative fault handling.

The speculative handling case also does not preallocate page tables,
as it is always called with a pre-existing page table.

Signed-off-by: Michel Lespinasse <[email protected]>
---
 mm/memory.c | 63 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 6eddd7b4e89c..7139004c624d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3709,29 +3709,50 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret;
 
-       /*
-        * Preallocate pte before we take page_lock because this might lead to
-        * deadlocks for memcg reclaim which waits for pages under writeback:
-        *                              lock_page(A)
-        *                              SetPageWriteback(A)
-        *                              unlock_page(A)
-        * lock_page(B)
-        *                              lock_page(B)
-        * pte_alloc_one
-        *   shrink_page_list
-        *     wait_on_page_writeback(A)
-        *                              SetPageWriteback(B)
-        *                              unlock_page(B)
-        *                              # flush A, B to clear the writeback
-        */
-       if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
-               vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
-               if (!vmf->prealloc_pte)
-                       return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+       if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+               rcu_read_lock();
+               if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq)) {
+                       ret = VM_FAULT_RETRY;
+               } else {
+                       /*
+                        * The mmap sequence count check guarantees that the
+                        * vma we fetched at the start of the fault was still
+                        * current at that point in time. The rcu read lock
+                        * ensures vmf->vma->vm_file stays valid.
+                        */
+                       ret = vma->vm_ops->fault(vmf);
+               }
+               rcu_read_unlock();
+       } else
+#endif
+       {
+               /*
+                * Preallocate pte before we take page_lock because
+                * this might lead to deadlocks for memcg reclaim
+                * which waits for pages under writeback:
+                *                              lock_page(A)
+                *                              SetPageWriteback(A)
+                *                              unlock_page(A)
+                * lock_page(B)
+                *                              lock_page(B)
+                * pte_alloc_one
+                *   shrink_page_list
+                *     wait_on_page_writeback(A)
+                *                              SetPageWriteback(B)
+                *                              unlock_page(B)
+                *                              # flush A, B to clear writeback
+                */
+               if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
+                       vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
+                       if (!vmf->prealloc_pte)
+                               return VM_FAULT_OOM;
+                       smp_wmb(); /* See comment in __pte_alloc() */
+               }
+
+               ret = vma->vm_ops->fault(vmf);
        }
 
-       ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
                return ret;
-- 
2.20.1

Reply via email to