Call the vm_ops->map_pages method within an rcu read locked section.
In the speculative case, verify the mmap sequence lock at the start of
the section. A match guarantees that the original vma is still valid
at that time, and that the associated vma->vm_file stays valid while
the vm_ops->map_pages() method is running.

Do not test vmf->pmd in the speculative case - we only speculate when
a page table already exists, and and this saves us from having to handle
synchronization around the vmf->pmd read.

Change xfs_filemap_map_pages() account for the fact that it can not
block anymore, as it is now running within an rcu read lock.

Signed-off-by: Michel Lespinasse <[email protected]>
---
 fs/xfs/xfs_file.c |  3 +++
 mm/memory.c       | 22 ++++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a007ca0711d9..b360732b20ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1387,6 +1387,9 @@ xfs_filemap_map_pages(
        struct inode            *inode = file_inode(vmf->vma->vm_file);
        vm_fault_t ret;
 
+       if (!xfs_ilock_nowait(XFS_I(inode), XFS_MMAPLOCK_SHARED))
+               return (vmf->flags & FAULT_FLAG_SPECULATIVE) ?
+                       VM_FAULT_RETRY : 0;
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
        ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
diff --git a/mm/memory.c b/mm/memory.c
index 13e2aaf900e5..a20e13d84145 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4012,6 +4012,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
        pgoff_t start_pgoff = vmf->pgoff;
        pgoff_t end_pgoff;
        int off;
+       vm_fault_t ret;
 
        nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
        mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
@@ -4030,14 +4031,31 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
        end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 
1,
                        start_pgoff + nr_pages - 1);
 
-       if (pmd_none(*vmf->pmd)) {
+       if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
+           pmd_none(*vmf->pmd)) {
                vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
                smp_wmb(); /* See comment in __pte_alloc() */
        }
 
-       return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
+       rcu_read_lock();
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+       if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+               if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq)) {
+                       rcu_read_unlock();
+                       return VM_FAULT_RETRY;
+               }
+               /*
+                * the mmap sequence check verified that vmf->vma was still
+                * current at that point in time.
+                * The rcu read lock ensures vmf->vma->vm_file stays valid.
+                */
+       }
+#endif
+       ret = vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
+       rcu_read_unlock();
+       return ret;
 }
 
 static vm_fault_t do_read_fault(struct vm_fault *vmf)
-- 
2.20.1

Reply via email to