The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.8.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.8.8
------>
commit 960d73471389f5aa89397b91f9ddee887c361170
Author: Davidlohr Bueso <d...@stgolabs.net>
Date:   Tue Oct 20 14:01:29 2015 +0400

    ms/prctl: avoid using mmap_sem for exe_file serialization
    
    This is needed for CRIU.
    
    ML: 6e399cd144d8500ffb5d40fa6848890e2580a80a
    
    https://jira.sw.ru/browse/PSBM-39834
    
    Oleg cleverly suggested using xchg() to set the new mm->exe_file instead
    of calling set_mm_exe_file() which requires some form of serialization --
    mmap_sem in this case.  For archs that do not have atomic rmw instructions
    we still fallback to a spinlock alternative, so this should always be
    safe.  As such, we only need the mmap_sem for looking up the backing
    vm_file, which can be done sharing the lock.  Naturally, this means we
    need to manually deal with both the new and old file reference counting,
    and we need not worry about the MMF_EXE_FILE_CHANGED bits, which can
    probably be deleted in the future anyway.
    
    Signed-off-by: Davidlohr Bueso <dbu...@suse.de>
    Suggested-by: Oleg Nesterov <o...@redhat.com>
    Acked-by: Oleg Nesterov <o...@redhat.com>
    Reviewed-by: Konstantin Khlebnikov <khlebni...@yandex-team.ru>
    Signed-off-by: Andrew Morton <a...@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>
    Signed-off-by: Cyrill Gorcunov <gorcu...@virtuozzo.com>
---
 kernel/sys.c | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 9dd9d57..c8ca093 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2036,14 +2036,13 @@ SYSCALL_DEFINE1(umask, int, mask)
        return mask;
 }
 
-static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
+static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
        struct fd exe;
+       struct file *old_exe, *exe_file;
        struct inode *inode;
        int err;
 
-       VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
        exe = fdget(fd);
        if (!exe.file)
                return -EBADF;
@@ -2067,15 +2066,22 @@ static int prctl_set_mm_exe_file_locked(struct 
mm_struct *mm, unsigned int fd)
        /*
         * Forbid mm->exe_file change if old file still mapped.
         */
+       exe_file = get_mm_exe_file(mm);
        err = -EBUSY;
-       if (mm->exe_file) {
+       if (exe_file) {
                struct vm_area_struct *vma;
 
-               for (vma = mm->mmap; vma; vma = vma->vm_next)
-                       if (vma->vm_file &&
-                           path_equal(&vma->vm_file->f_path,
-                                      &mm->exe_file->f_path))
-                               goto exit;
+               down_read(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                       if (!vma->vm_file)
+                               continue;
+                       if (path_equal(&vma->vm_file->f_path,
+                                      &exe_file->f_path))
+                               goto exit_err;
+               }
+
+               up_read(&mm->mmap_sem);
+               fput(exe_file);
        }
 
        /*
@@ -2089,10 +2095,18 @@ static int prctl_set_mm_exe_file_locked(struct 
mm_struct *mm, unsigned int fd)
                goto exit;
 
        err = 0;
-       set_mm_exe_file(mm, exe.file);  /* this grabs a reference to exe.file */
+       /* set the new file, lockless */
+       get_file(exe.file);
+       old_exe = xchg(&mm->exe_file, exe.file);
+       if (old_exe)
+               fput(old_exe);
 exit:
        fdput(exe);
        return err;
+exit_err:
+       up_read(&mm->mmap_sem);
+       fput(exe_file);
+       goto exit;
 }
 
 /*
@@ -2227,10 +2241,9 @@ static int prctl_set_mm_map(int opt, const void __user 
*addr, unsigned long data
                user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
        }
 
-       down_write(&mm->mmap_sem);
        if (prctl_map.exe_fd != (u32)-1)
-               error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
-       downgrade_write(&mm->mmap_sem);
+               error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
+       down_read(&mm->mmap_sem);
        if (error)
                goto out;
 
@@ -2327,12 +2340,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
        if (!capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
-       if (opt == PR_SET_MM_EXE_FILE) {
-               down_write(&mm->mmap_sem);
-               error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
-               up_write(&mm->mmap_sem);
-               return error;
-       }
+       if (opt == PR_SET_MM_EXE_FILE)
+               return prctl_set_mm_exe_file(mm, (unsigned int)addr);
 
        if (opt == PR_SET_MM_AUXV)
                return prctl_set_auxv(mm, addr, arg4);
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to