On Wed, Aug 15, 2018 at 12:16:06PM -0700, Matthew Wilcox wrote:
> (not even compiled, and I can see a good opportunity for combining the
> VM_LOCKED loop with the has_uprobes loop)

I was rushing to get that sent earlier.  Here it is tidied up to
actually compile.

Note the diffstat:

 mmap.c |   71 ++++++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 29 deletions(-)

I think that's a pretty small extra price to pay for having this improved
scalability.

diff --git a/mm/mmap.c b/mm/mmap.c
index de699523c0b7..b77bb3908f8c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2802,7 +2802,9 @@ int do_munmap(struct mm_struct *mm, unsigned long start, 
size_t len,
              struct list_head *uf)
 {
        unsigned long end;
-       struct vm_area_struct *vma, *prev, *last;
+       struct vm_area_struct *vma, *prev, *last, *tmp;
+       int res = 0;
+       bool downgrade = false;
 
        if ((offset_in_page(start)) || start > TASK_SIZE || len > 
TASK_SIZE-start)
                return -EINVAL;
@@ -2811,17 +2813,20 @@ int do_munmap(struct mm_struct *mm, unsigned long 
start, size_t len,
        if (len == 0)
                return -EINVAL;
 
+       if (down_write_killable(&mm->mmap_sem))
+               return -EINTR;
+
        /* Find the first overlapping VMA */
        vma = find_vma(mm, start);
        if (!vma)
-               return 0;
+               goto unlock;
        prev = vma->vm_prev;
-       /* we have  start < vma->vm_end  */
+       /* we have start < vma->vm_end  */
 
        /* if it doesn't overlap, we have nothing.. */
        end = start + len;
        if (vma->vm_start >= end)
-               return 0;
+               goto unlock;
 
        /*
         * If we need to split any vma, do it now to save pain later.
@@ -2831,28 +2836,27 @@ int do_munmap(struct mm_struct *mm, unsigned long 
start, size_t len,
         * places tmp vma above, and higher split_vma places tmp vma below.
         */
        if (start > vma->vm_start) {
-               int error;
-
                /*
                 * Make sure that map_count on return from munmap() will
                 * not exceed its limit; but let map_count go just above
                 * its limit temporarily, to help free resources as expected.
                 */
+               res = -ENOMEM;
                if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
-                       return -ENOMEM;
+                       goto unlock;
 
-               error = __split_vma(mm, vma, start, 0);
-               if (error)
-                       return error;
+               res = __split_vma(mm, vma, start, 0);
+               if (res)
+                       goto unlock;
                prev = vma;
        }
 
        /* Does it split the last one? */
        last = find_vma(mm, end);
        if (last && end > last->vm_start) {
-               int error = __split_vma(mm, last, end, 1);
-               if (error)
-                       return error;
+               res = __split_vma(mm, last, end, 1);
+               if (res)
+                       goto unlock;
        }
        vma = prev ? prev->vm_next : mm->mmap;
 
@@ -2866,25 +2870,31 @@ int do_munmap(struct mm_struct *mm, unsigned long 
start, size_t len,
                 * split, despite we could. This is unlikely enough
                 * failure that it's not worth optimizing it for.
                 */
-               int error = userfaultfd_unmap_prep(vma, start, end, uf);
-               if (error)
-                       return error;
+               res = userfaultfd_unmap_prep(vma, start, end, uf);
+               if (res)
+                       goto unlock;
        }
 
        /*
         * unlock any mlock()ed ranges before detaching vmas
+        * and check to see if there's any reason we might have to hold
+        * the mmap_sem write-locked while unmapping regions.
         */
-       if (mm->locked_vm) {
-               struct vm_area_struct *tmp = vma;
-               while (tmp && tmp->vm_start < end) {
-                       if (tmp->vm_flags & VM_LOCKED) {
-                               mm->locked_vm -= vma_pages(tmp);
-                               munlock_vma_pages_all(tmp);
-                       }
-                       tmp = tmp->vm_next;
+       downgrade = true;
+
+       for (tmp = vma; tmp && tmp->vm_start < end; tmp = tmp->vm_next) {
+               if (tmp->vm_flags & VM_LOCKED) {
+                       mm->locked_vm -= vma_pages(tmp);
+                       munlock_vma_pages_all(tmp);
                }
+               if (tmp->vm_file &&
+                               has_uprobes(tmp, tmp->vm_start, tmp->vm_end))
+                       downgrade = false;
        }
 
+       if (downgrade)
+               downgrade_write(&mm->mmap_sem);
+
        /*
         * Remove the vma's, and unmap the actual pages
         */
@@ -2896,7 +2906,14 @@ int do_munmap(struct mm_struct *mm, unsigned long start, 
size_t len,
        /* Fix up all other VM information */
        remove_vma_list(mm, vma);
 
-       return 0;
+       res = 0;
+unlock:
+       if (downgrade) {
+               up_read(&mm->mmap_sem);
+       } else {
+               up_write(&mm->mmap_sem);
+       }
+       return res;
 }
 
 int vm_munmap(unsigned long start, size_t len)
@@ -2905,11 +2922,7 @@ int vm_munmap(unsigned long start, size_t len)
        struct mm_struct *mm = current->mm;
        LIST_HEAD(uf);
 
-       if (down_write_killable(&mm->mmap_sem))
-               return -EINTR;
-
        ret = do_munmap(mm, start, len, &uf);
-       up_write(&mm->mmap_sem);
        userfaultfd_unmap_complete(mm, &uf);
        return ret;
 }

Reply via email to