Quoting Matt Helsley ([EMAIL PROTECTED]):
> This patch avoids holding the mmap semaphore while walking VMAs in response to
> programs which read or follow the /proc/<pid|self>/exe symlink. This also 
> allows
> us to merge mmu and nommu proc_exe_link() functions. The costs are holding the
> task lock, a separate reference to the executable file stored in the task
> struct, and increased code in fork, exec, and exit paths.
> 
> Changes:
> Clear exe_file field in exit path
> Use task_lock() to protect exe_file between write and read paths

This one does seem safe.

Though do you need to handle the #ifdef __alpha__ in
fs/exec.c:search_binary_handler?

thanks,
-serge

> Signed-off-by: Matt Helsley <[EMAIL PROTECTED]>
> ---
> 
>  fs/exec.c             |    7 +++++--
>  fs/proc/base.c        |   21 +++++++++++++++++++++
>  fs/proc/internal.h    |    1 -
>  fs/proc/task_mmu.c    |   34 ----------------------------------
>  fs/proc/task_nommu.c  |   34 ----------------------------------
>  include/linux/sched.h |    3 ++-
>  kernel/exit.c         |    6 ++++++
>  kernel/fork.c         |    9 ++++++++-
>  8 files changed, 42 insertions(+), 73 deletions(-)
> 
> Index: linux-2.6.22-rc2-mm1/include/linux/sched.h
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/include/linux/sched.h
> +++ linux-2.6.22-rc2-mm1/include/linux/sched.h
> @@ -988,10 +988,11 @@ struct task_struct {
>       int oomkilladj; /* OOM kill score adjustment (bit shift). */
>       char comm[TASK_COMM_LEN]; /* executable name excluding path
>                                    - access with [gs]et_task_comm (which lock
>                                      it with task_lock())
>                                    - initialized normally by flush_old_exec */
> +     struct file *exe_file;
>  /* file system info */
>       int link_count, total_link_count;
>  #ifdef CONFIG_SYSVIPC
>  /* ipc stuff */
>       struct sysv_sem sysvsem;
> @@ -1549,11 +1550,11 @@ static inline int thread_group_empty(str
> 
>  #define delay_group_leader(p) \
>               (thread_group_leader(p) && !thread_group_empty(p))
> 
>  /*
> - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
> + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, ->exe_file, keyring
>   * subscriptions and synchronises with wait4().  Also used in procfs.  Also
>   * pins the final release of task.io_context.  Also protects ->cpuset.
>   *
>   * Nests both inside and outside of read_lock(&tasklist_lock).
>   * It must not be nested with write_lock_irq(&tasklist_lock),
> Index: linux-2.6.22-rc2-mm1/fs/exec.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/exec.c
> +++ linux-2.6.22-rc2-mm1/fs/exec.c
> @@ -1106,12 +1106,15 @@ int search_binary_handler(struct linux_b
>                       read_unlock(&binfmt_lock);
>                       retval = fn(bprm, regs);
>                       if (retval >= 0) {
>                               put_binfmt(fmt);
>                               allow_write_access(bprm->file);
> -                             if (bprm->file)
> -                                     fput(bprm->file);
> +                             task_lock(current);
> +                             if (current->exe_file)
> +                                     fput(current->exe_file);
> +                             current->exe_file = bprm->file;
> +                             task_unlock(current);
>                               bprm->file = NULL;
>                               current->did_exec = 1;
>                               proc_exec_connector(current);
>                               return retval;
>                       }
> Index: linux-2.6.22-rc2-mm1/fs/proc/base.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/base.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/base.c
> @@ -951,10 +951,31 @@ const struct file_operations proc_pid_sc
>       .write          = sched_write,
>       .llseek         = seq_lseek,
>       .release        = seq_release,
>  };
> 
> +static int proc_exe_link(struct inode *inode, struct dentry **dentry,
> +                      struct vfsmount **mnt)
> +{
> +     int error = -ENOENT;
> +     struct task_struct *task;
> +
> +     task = get_proc_task(inode);
> +     if (!task)
> +             return error;
> +     task_lock(task);
> +     if (!task->exe_file)
> +             goto out;
> +     *mnt = mntget(task->exe_file->f_path.mnt);
> +     *dentry = dget(task->exe_file->f_path.dentry);
> +     error = 0;
> +out:
> +     task_unlock(task);
> +     put_task_struct(task);
> +     return error;
> +}
> +
>  static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata 
> *nd)
>  {
>       struct inode *inode = dentry->d_inode;
>       int error = -EACCES;
> 
> Index: linux-2.6.22-rc2-mm1/kernel/exit.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/kernel/exit.c
> +++ linux-2.6.22-rc2-mm1/kernel/exit.c
> @@ -924,10 +924,16 @@ fastcall void do_exit(long code)
>       if (unlikely(tsk->audit_context))
>               audit_free(tsk);
> 
>       taskstats_exit(tsk, group_dead);
> 
> +     task_lock(tsk);
> +     if (tsk->exe_file) {
> +             fput(tsk->exe_file);
> +             tsk->exe_file = NULL;
> +     }
> +     task_unlock(tsk);
>       exit_mm(tsk);
> 
>       if (group_dead)
>               acct_process();
>       exit_sem(tsk);
> Index: linux-2.6.22-rc2-mm1/kernel/fork.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/kernel/fork.c
> +++ linux-2.6.22-rc2-mm1/kernel/fork.c
> @@ -1163,10 +1163,12 @@ static struct task_struct *copy_process(
> 
>       /* ok, now we should be set up.. */
>       p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & 
> CSIGNAL);
>       p->pdeath_signal = 0;
>       p->exit_state = 0;
> +     if (p->exe_file) /* ptr copied in dup_task_struct still needs a ref */
> +             get_file(p->exe_file);
> 
>       /*
>        * Ok, make it visible to the rest of the system.
>        * We dont wake it up yet.
>        */
> @@ -1218,11 +1220,11 @@ static struct task_struct *copy_process(
>       recalc_sigpending();
>       if (signal_pending(current)) {
>               spin_unlock(&current->sighand->siglock);
>               write_unlock_irq(&tasklist_lock);
>               retval = -ERESTARTNOINTR;
> -             goto bad_fork_cleanup_namespaces;
> +             goto bad_fork_cleanup_exe_file;
>       }
> 
>       if (clone_flags & CLONE_THREAD) {
>               p->group_leader = current->group_leader;
>               list_add_tail_rcu(&p->thread_group, 
> &p->group_leader->thread_group);
> @@ -1274,10 +1276,15 @@ static struct task_struct *copy_process(
>               put_user(p->pid, parent_tidptr);
> 
>       proc_fork_connector(p);
>       return p;
> 
> +bad_fork_cleanup_exe_file:
> +     if (p->exe_file) {
> +             fput(p->exe_file);
> +             p->exe_file = NULL;
> +     }
>  bad_fork_cleanup_namespaces:
>       exit_task_namespaces(p);
>  bad_fork_cleanup_keys:
>       exit_keys(p);
>  bad_fork_cleanup_mm:
> Index: linux-2.6.22-rc2-mm1/fs/proc/task_mmu.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/task_mmu.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/task_mmu.c
> @@ -71,44 +71,10 @@ int task_statm(struct mm_struct *mm, int
>       *data = mm->total_vm - mm->shared_vm;
>       *resident = *shared + get_mm_counter(mm, anon_rss);
>       return mm->total_vm;
>  }
> 
> -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct 
> vfsmount **mnt)
> -{
> -     struct vm_area_struct * vma;
> -     int result = -ENOENT;
> -     struct task_struct *task = get_proc_task(inode);
> -     struct mm_struct * mm = NULL;
> -
> -     if (task) {
> -             mm = get_task_mm(task);
> -             put_task_struct(task);
> -     }
> -     if (!mm)
> -             goto out;
> -     down_read(&mm->mmap_sem);
> -
> -     vma = mm->mmap;
> -     while (vma) {
> -             if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
> -                     break;
> -             vma = vma->vm_next;
> -     }
> -
> -     if (vma) {
> -             *mnt = mntget(vma->vm_file->f_path.mnt);
> -             *dentry = dget(vma->vm_file->f_path.dentry);
> -             result = 0;
> -     }
> -
> -     up_read(&mm->mmap_sem);
> -     mmput(mm);
> -out:
> -     return result;
> -}
> -
>  static void pad_len_spaces(struct seq_file *m, int len)
>  {
>       len = 25 + sizeof(void*) * 6 - len;
>       if (len < 1)
>               len = 1;
> Index: linux-2.6.22-rc2-mm1/fs/proc/task_nommu.c
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/task_nommu.c
> +++ linux-2.6.22-rc2-mm1/fs/proc/task_nommu.c
> @@ -102,44 +102,10 @@ int task_statm(struct mm_struct *mm, int
>       up_read(&mm->mmap_sem);
>       *resident = size;
>       return size;
>  }
> 
> -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct 
> vfsmount **mnt)
> -{
> -     struct vm_list_struct *vml;
> -     struct vm_area_struct *vma;
> -     struct task_struct *task = get_proc_task(inode);
> -     struct mm_struct *mm = get_task_mm(task);
> -     int result = -ENOENT;
> -
> -     if (!mm)
> -             goto out;
> -     down_read(&mm->mmap_sem);
> -
> -     vml = mm->context.vmlist;
> -     vma = NULL;
> -     while (vml) {
> -             if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
> -                     vma = vml->vma;
> -                     break;
> -             }
> -             vml = vml->next;
> -     }
> -
> -     if (vma) {
> -             *mnt = mntget(vma->vm_file->f_path.mnt);
> -             *dentry = dget(vma->vm_file->f_path.dentry);
> -             result = 0;
> -     }
> -
> -     up_read(&mm->mmap_sem);
> -     mmput(mm);
> -out:
> -     return result;
> -}
> -
>  /*
>   * display mapping lines for a particular process's /proc/pid/maps
>   */
>  static int show_map(struct seq_file *m, void *_vml)
>  {
> Index: linux-2.6.22-rc2-mm1/fs/proc/internal.h
> ===================================================================
> --- linux-2.6.22-rc2-mm1.orig/fs/proc/internal.h
> +++ linux-2.6.22-rc2-mm1/fs/proc/internal.h
> @@ -38,11 +38,10 @@ extern int nommu_vma_show(struct seq_fil
>  #endif
> 
>  extern int maps_protect;
> 
>  extern void create_seq_entry(char *name, mode_t mode, const struct 
> file_operations *f);
> -extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount 
> **);
>  extern int proc_tid_stat(struct task_struct *,  char *);
>  extern int proc_tgid_stat(struct task_struct *, char *);
>  extern int proc_pid_status(struct task_struct *, char *);
>  extern int proc_pid_statm(struct task_struct *, char *);
>  extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
> 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to