Le 01/09/2022 à 19:34, Suren Baghdasaryan a écrit :
> VMA flag modifications should be done under VMA lock to prevent concurrent
> page fault handling in that area.
> 
> Signed-off-by: Suren Baghdasaryan <sur...@google.com>
> ---
>  fs/proc/task_mmu.c | 1 +
>  fs/userfaultfd.c   | 6 ++++++
>  mm/madvise.c       | 1 +
>  mm/mlock.c         | 2 ++
>  mm/mmap.c          | 1 +
>  mm/mprotect.c      | 1 +
>  6 files changed, 12 insertions(+)

There are few changes also done in the driver's space, for instance:

*** arch/x86/kernel/cpu/sgx/driver.c:
sgx_mmap[98]                   vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND |
VM_DONTDUMP | VM_IO;
*** arch/x86/kernel/cpu/sgx/virt.c:
sgx_vepc_mmap[108]             vma->vm_flags |= VM_PFNMAP | VM_IO |
VM_DONTDUMP | VM_DONTCOPY;
*** drivers/dax/device.c:
dax_mmap[311]                  vma->vm_flags |= VM_HUGEPAGE;

I guess these changes to vm_flags should be protected as well, or to be
checked one by one.

> 
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 4e0023643f8b..ceffa5c2c650 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1285,6 +1285,7 @@ static ssize_t clear_refs_write(struct file *file, 
> const char __user *buf,
>                       for (vma = mm->mmap; vma; vma = vma->vm_next) {
>                               if (!(vma->vm_flags & VM_SOFTDIRTY))
>                                       continue;
> +                             vma_mark_locked(vma);
>                               vma->vm_flags &= ~VM_SOFTDIRTY;
>                               vma_set_page_prot(vma);
>                       }
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 175de70e3adf..fe557b3d1c07 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -620,6 +620,7 @@ static void userfaultfd_event_wait_completion(struct 
> userfaultfd_ctx *ctx,
>               mmap_write_lock(mm);
>               for (vma = mm->mmap; vma; vma = vma->vm_next)
>                       if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
> +                             vma_mark_locked(vma);
>                               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>                               vma->vm_flags &= ~__VM_UFFD_FLAGS;
>                       }
> @@ -653,6 +654,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct 
> list_head *fcs)
>  
>       octx = vma->vm_userfaultfd_ctx.ctx;
>       if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
> +             vma_mark_locked(vma);
>               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>               vma->vm_flags &= ~__VM_UFFD_FLAGS;
>               return 0;
> @@ -734,6 +736,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
>               atomic_inc(&ctx->mmap_changing);
>       } else {
>               /* Drop uffd context if remap feature not enabled */
> +             vma_mark_locked(vma);
>               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>               vma->vm_flags &= ~__VM_UFFD_FLAGS;
>       }
> @@ -891,6 +894,7 @@ static int userfaultfd_release(struct inode *inode, 
> struct file *file)
>                       vma = prev;
>               else
>                       prev = vma;
> +             vma_mark_locked(vma);
>               vma->vm_flags = new_flags;
>               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>       }
> @@ -1449,6 +1453,7 @@ static int userfaultfd_register(struct userfaultfd_ctx 
> *ctx,
>                * the next vma was merged into the current one and
>                * the current one has not been updated yet.
>                */
> +             vma_mark_locked(vma);
>               vma->vm_flags = new_flags;
>               vma->vm_userfaultfd_ctx.ctx = ctx;
>  
> @@ -1630,6 +1635,7 @@ static int userfaultfd_unregister(struct 
> userfaultfd_ctx *ctx,
>                * the next vma was merged into the current one and
>                * the current one has not been updated yet.
>                */
> +             vma_mark_locked(vma);
>               vma->vm_flags = new_flags;
>               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 5f0f0948a50e..a173f0025abd 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -181,6 +181,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
>       /*
>        * vm_flags is protected by the mmap_lock held in write mode.
>        */
> +     vma_mark_locked(vma);
>       vma->vm_flags = new_flags;
>       if (!vma->vm_file) {
>               error = replace_anon_vma_name(vma, anon_name);
> diff --git a/mm/mlock.c b/mm/mlock.c
> index b14e929084cc..f62e1a4d05f2 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -380,6 +380,7 @@ static void mlock_vma_pages_range(struct vm_area_struct 
> *vma,
>        */
>       if (newflags & VM_LOCKED)
>               newflags |= VM_IO;
> +     vma_mark_locked(vma);
>       WRITE_ONCE(vma->vm_flags, newflags);
>  
>       lru_add_drain();
> @@ -456,6 +457,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct 
> vm_area_struct **prev,
>  
>       if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
>               /* No work to do, and mlocking twice would be wrong */
> +             vma_mark_locked(vma);
>               vma->vm_flags = newflags;
>       } else {
>               mlock_vma_pages_range(vma, start, end, newflags);
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 693e6776be39..f89c9b058105 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1818,6 +1818,7 @@ unsigned long mmap_region(struct file *file, unsigned 
> long addr,
>  out:
>       perf_event_mmap(vma);
>  
> +     vma_mark_locked(vma);
>       vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
>       if (vm_flags & VM_LOCKED) {
>               if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||

I guess, this doesn't really impact, but the call to vma_mark_locked(vma)
may be done only in the case the vm_flags field is touched.
Something like this:

        vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
                if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
                                        is_vm_hugetlb_page(vma) ||
-                                       vma == get_gate_vma(current->mm))
+                                       vma == get_gate_vma(current->mm)) {
+                       vma_mark_locked(vma);
                        vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
-               else
+               } else
                        mm->locked_vm += (len >> PAGE_SHIFT);
        }


> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index bc6bddd156ca..df47fc21b0e4 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -621,6 +621,7 @@ mprotect_fixup(struct mmu_gather *tlb, struct 
> vm_area_struct *vma,
>        * vm_flags and vm_page_prot are protected by the mmap_lock
>        * held in write mode.
>        */
> +     vma_mark_locked(vma);
>       vma->vm_flags = newflags;
>       /*
>        * We want to check manually if we can change individual PTEs writable

Reply via email to