Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace.
To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Factor bprm_execve out of do_execve_common to separate out the copying of arguments to the newe stack, and the rest of exec. In separating bprm_execve from do_execve_common the copying of the arguments onto the new stack happens earlier. As the copying of the arguments does not depend any security hooks, files, the file table, current->in_execve, current->fs->in_exec, bprm->unsafe, or creds this is safe. Likewise the security hook security_creds_for_exec does not depend upon preventing the argument copying from happening. In addition to making it possible to implement kernel_execve that performs the copying differently, this separation of bprm_execve from do_execve_common makes for a nice separation of responsibilities making the exec code easier to navigate. Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com> --- fs/exec.c | 108 +++++++++++++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 50 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index afb168bf5e23..50508892fa71 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1856,44 +1856,16 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execveat_common(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags) +static int bprm_execve(struct linux_binprm *bprm, + int fd, struct filename *filename, int flags) { - struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; int retval; - if (IS_ERR(filename)) - return PTR_ERR(filename); - - /* - * We move the actual failure in case of RLIMIT_NPROC excess from - * set*uid() to execve() because too many poorly written programs - * don't check setuid() return code. Here we additionally recheck - * whether NPROC limit is still exceeded. - */ - if ((current->flags & PF_NPROC_EXCEEDED) && - atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { - retval = -EAGAIN; - goto out_ret; - } - - /* We're below the limit (still or again), so we don't want to make - * further execve() calls fail. */ - current->flags &= ~PF_NPROC_EXCEEDED; - - bprm = alloc_bprm(fd, filename); - if (IS_ERR(bprm)) { - retval = PTR_ERR(bprm); - goto out_ret; - } - retval = unshare_files(&displaced); if (retval) - goto out_free; + return retval; retval = prepare_bprm_creds(bprm); if (retval) @@ -1919,28 +1891,11 @@ static int do_execveat_common(int fd, struct filename *filename, close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - retval = prepare_arg_pages(bprm, argv, envp); - if (retval < 0) - goto out; - /* Set the unchanging part of bprm->cred */ retval = security_bprm_creds_for_exec(bprm); if (retval) goto out; - retval = copy_string_kernel(bprm->filename, bprm); - if (retval < 0) - goto out; - - bprm->exec = bprm->p; - retval = copy_strings(bprm->envc, envp, bprm); - if (retval < 0) - goto out; - - retval = copy_strings(bprm->argc, argv, bprm); - if (retval < 0) - goto out; - retval = exec_binprm(bprm); if (retval < 0) goto out; @@ -1951,8 +1906,6 @@ static int do_execveat_common(int fd, struct filename *filename, rseq_execve(current); acct_update_integrals(current); task_numa_free(current, false); - free_bprm(bprm); - putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1974,6 +1927,61 @@ static int do_execveat_common(int fd, struct filename *filename, out_files: if (displaced) reset_files_struct(displaced); + + return retval; +} + +static int do_execveat_common(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags) +{ + struct linux_binprm *bprm; + int retval; + + if (IS_ERR(filename)) + return PTR_ERR(filename); + + /* + * We move the actual failure in case of RLIMIT_NPROC excess from + * set*uid() to execve() because too many poorly written programs + * don't check setuid() return code. Here we additionally recheck + * whether NPROC limit is still exceeded. + */ + if ((current->flags & PF_NPROC_EXCEEDED) && + atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { + retval = -EAGAIN; + goto out_ret; + } + + /* We're below the limit (still or again), so we don't want to make + * further execve() calls fail. */ + current->flags &= ~PF_NPROC_EXCEEDED; + + bprm = alloc_bprm(fd, filename); + if (IS_ERR(bprm)) { + retval = PTR_ERR(bprm); + goto out_ret; + } + + retval = prepare_arg_pages(bprm, argv, envp); + if (retval < 0) + goto out_free; + + retval = copy_string_kernel(bprm->filename, bprm); + if (retval < 0) + goto out_free; + bprm->exec = bprm->p; + + retval = copy_strings(bprm->envc, envp, bprm); + if (retval < 0) + goto out_free; + + retval = copy_strings(bprm->argc, argv, bprm); + if (retval < 0) + goto out_free; + + retval = bprm_execve(bprm, fd, filename, flags); out_free: free_bprm(bprm); -- 2.25.0