Module Name: src Committed By: ad Date: Sun Jan 12 18:30:58 UTC 2020
Modified Files: src/sys/compat/linux/arch/amd64: linux_exec_machdep.c src/sys/compat/linux/common: linux_exec_elf32.c src/sys/kern: exec_elf.c exec_subr.c kern_exec.c src/sys/sys: exec.h Log Message: Tidy up the vnode locking around execve() on ELF images to acquire and release the locks fewer times. Proposed on tech-kern a very long time go. To generate a diff of this commit: cvs rdiff -u -r1.22 -r1.23 \ src/sys/compat/linux/arch/amd64/linux_exec_machdep.c cvs rdiff -u -r1.99 -r1.100 src/sys/compat/linux/common/linux_exec_elf32.c cvs rdiff -u -r1.100 -r1.101 src/sys/kern/exec_elf.c cvs rdiff -u -r1.82 -r1.83 src/sys/kern/exec_subr.c cvs rdiff -u -r1.486 -r1.487 src/sys/kern/kern_exec.c cvs rdiff -u -r1.157 -r1.158 src/sys/sys/exec.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/compat/linux/arch/amd64/linux_exec_machdep.c diff -u src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.22 src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.23 --- src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.22 Sun Feb 23 12:01:51 2014 +++ src/sys/compat/linux/arch/amd64/linux_exec_machdep.c Sun Jan 12 18:30:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: linux_exec_machdep.c,v 1.22 2014/02/23 12:01:51 njoly Exp $ */ +/* $NetBSD: linux_exec_machdep.c,v 1.23 2020/01/12 18:30:58 ad Exp $ */ /*- * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.22 2014/02/23 12:01:51 njoly Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.23 2020/01/12 18:30:58 ad Exp $"); #define ELFSIZE 64 @@ -156,7 +156,7 @@ ELFNAME2(linux,copyargs)(struct lwp *l, if (ap == NULL) { phsize = eh->e_phnum * sizeof(Elf_Phdr); ph = (Elf_Phdr *)kmem_alloc(phsize, KM_SLEEP); - error = exec_read_from(l, pack->ep_vp, eh->e_phoff, ph, phsize); + error = exec_read(l, pack->ep_vp, eh->e_phoff, ph, phsize, 0); if (error != 0) { for (i = 0; i < eh->e_phnum; i++) { if (ph[i].p_type == PT_PHDR) { Index: src/sys/compat/linux/common/linux_exec_elf32.c diff -u src/sys/compat/linux/common/linux_exec_elf32.c:1.99 src/sys/compat/linux/common/linux_exec_elf32.c:1.100 --- src/sys/compat/linux/common/linux_exec_elf32.c:1.99 Fri Mar 1 11:06:56 2019 +++ src/sys/compat/linux/common/linux_exec_elf32.c Sun Jan 12 18:30:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: linux_exec_elf32.c,v 1.99 2019/03/01 11:06:56 pgoyette Exp $ */ +/* $NetBSD: linux_exec_elf32.c,v 1.100 2020/01/12 18:30:58 ad Exp $ */ /*- * Copyright (c) 1995, 1998, 2000, 2001 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux_exec_elf32.c,v 1.99 2019/03/01 11:06:56 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_exec_elf32.c,v 1.100 2020/01/12 18:30:58 ad Exp $"); #ifndef ELFSIZE /* XXX should die */ @@ -107,7 +107,8 @@ ELFNAME2(linux,atexit_signature)( /* Load the section header table. */ shsize = eh->e_shnum * sizeof(Elf_Shdr); sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize); + error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize, + IO_NODELOCKED); if (error) goto out; @@ -126,8 +127,8 @@ ELFNAME2(linux,atexit_signature)( if (s->sh_name + sigsz > sh[shstrndx].sh_size) continue; - error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf, - sigsz); + error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf, + sigsz, IO_NODELOCKED); if (error) goto out; if (!memcmp(tbuf, signature, sigsz)) { @@ -171,7 +172,8 @@ ELFNAME2(linux,gcc_signature)( shsize = eh->e_shnum * sizeof(Elf_Shdr); sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize); + error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize, + IO_NODELOCKED); if (error) goto out; @@ -189,8 +191,8 @@ ELFNAME2(linux,gcc_signature)( s->sh_size < sizeof(signature) - 1) continue; - error = exec_read_from(l, epp->ep_vp, s->sh_offset, tbuf, - sizeof(signature) - 1); + error = exec_read(l, epp->ep_vp, s->sh_offset, tbuf, + sizeof(signature) - 1, IO_NODELOCKED); if (error) continue; @@ -230,7 +232,8 @@ ELFNAME2(linux,debuglink_signature)(stru /* Load the section header table. */ shsize = eh->e_shnum * sizeof(Elf_Shdr); sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize); + error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize, + IO_NODELOCKED); if (error) goto out; @@ -249,8 +252,8 @@ ELFNAME2(linux,debuglink_signature)(stru if (s->sh_name + sigsz > sh[shstrndx].sh_size) continue; - error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf, - sigsz); + error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf, + sigsz, IO_NODELOCKED); if (error) goto out; if (!memcmp(tbuf, signature, sigsz)) { @@ -290,7 +293,8 @@ ELFNAME2(linux,go_rt0_signature)(struct /* Load the section header table. */ shsize = eh->e_shnum * sizeof(Elf_Shdr); sh = malloc(shsize, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize); + error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize, + IO_NODELOCKED); if (error) goto out; @@ -309,8 +313,8 @@ ELFNAME2(linux,go_rt0_signature)(struct if (s->sh_name + sigsz > sh[shstrndx].sh_size) continue; - error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf, - sigsz); + error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf, + sigsz, IO_NODELOCKED); if (error) goto out; if (!memcmp(tbuf, signature, sigsz)) { @@ -329,8 +333,8 @@ ELFNAME2(linux,go_rt0_signature)(struct sh[i].sh_size = 1024 * 1024; tmp = malloc(sh[i].sh_size, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, sh[i].sh_offset, tmp, - sh[i].sh_size); + error = exec_read(l, epp->ep_vp, sh[i].sh_offset, tmp, + sh[i].sh_size, IO_NODELOCKED); if (error) goto out; @@ -368,7 +372,8 @@ ELFNAME2(linux,signature)(struct lwp *l, phsize = eh->e_phnum * sizeof(Elf_Phdr); ph = (Elf_Phdr *)malloc(phsize, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize); + error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize, + IO_NODELOCKED); if (error) goto out; @@ -383,8 +388,8 @@ ELFNAME2(linux,signature)(struct lwp *l, continue; np = (Elf_Nhdr *)malloc(ephp->p_filesz, M_TEMP, M_WAITOK); - error = exec_read_from(l, epp->ep_vp, ephp->p_offset, np, - ephp->p_filesz); + error = exec_read(l, epp->ep_vp, ephp->p_offset, np, + ephp->p_filesz, IO_NODELOCKED); if (error) goto next; Index: src/sys/kern/exec_elf.c diff -u src/sys/kern/exec_elf.c:1.100 src/sys/kern/exec_elf.c:1.101 --- src/sys/kern/exec_elf.c:1.100 Mon Sep 16 11:11:34 2019 +++ src/sys/kern/exec_elf.c Sun Jan 12 18:30:58 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: exec_elf.c,v 1.100 2019/09/16 11:11:34 christos Exp $ */ +/* $NetBSD: exec_elf.c,v 1.101 2020/01/12 18:30:58 ad Exp $ */ /*- - * Copyright (c) 1994, 2000, 2005, 2015 The NetBSD Foundation, Inc. + * Copyright (c) 1994, 2000, 2005, 2015, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -57,7 +57,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.100 2019/09/16 11:11:34 christos Exp $"); +__KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.101 2020/01/12 18:30:58 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_pax.h" @@ -330,6 +330,8 @@ elf_load_psection(struct exec_vmcmd_set long diff, offset; int vmprot = 0; + KASSERT(VOP_ISLOCKED(vp) != LK_NONE); + /* * If the user specified an address, then we load there. */ @@ -462,14 +464,15 @@ elf_load_interp(struct lwp *l, struct ex */ if (vp->v_type != VREG) { error = EACCES; - goto badunlock; + goto bad; } if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0) - goto badunlock; + goto bad; /* get attributes */ + /* XXX VOP_GETATTR() is the only thing that needs LK_EXCLUSIVE ^ */ if ((error = VOP_GETATTR(vp, &attr, l->l_cred)) != 0) - goto badunlock; + goto bad; /* * Check mount point. Though we're not trying to exec this binary, @@ -478,18 +481,17 @@ elf_load_interp(struct lwp *l, struct ex */ if (vp->v_mount->mnt_flag & MNT_NOEXEC) { error = EACCES; - goto badunlock; + goto bad; } if (vp->v_mount->mnt_flag & MNT_NOSUID) epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID); error = vn_marktext(vp); if (error) - goto badunlock; - - VOP_UNLOCK(vp); + goto bad; - if ((error = exec_read_from(l, vp, 0, &eh, sizeof(eh))) != 0) + error = exec_read(l, vp, 0, &eh, sizeof(eh), IO_NODELOCKED); + if (error != 0) goto bad; if ((error = elf_check_header(&eh)) != 0) @@ -503,7 +505,8 @@ elf_load_interp(struct lwp *l, struct ex phsize = eh.e_phnum * sizeof(Elf_Phdr); ph = kmem_alloc(phsize, KM_SLEEP); - if ((error = exec_read_from(l, vp, eh.e_phoff, ph, phsize)) != 0) + error = exec_read(l, vp, eh.e_phoff, ph, phsize, IO_NODELOCKED); + if (error != 0) goto bad; #ifdef ELF_INTERP_NON_RELOCATABLE @@ -629,16 +632,13 @@ elf_load_interp(struct lwp *l, struct ex * This value is ignored if TOPDOWN. */ *last = addr; - vrele(vp); + vput(vp); return 0; -badunlock: - VOP_UNLOCK(vp); - bad: if (ph != NULL) kmem_free(ph, phsize); - vrele(vp); + vput(vp); return error; } @@ -683,9 +683,13 @@ exec_elf_makecmds(struct lwp *l, struct return ENOEXEC; } + /* XXX only LK_EXCLUSIVE to match all others - allow spinning */ + vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); error = vn_marktext(epp->ep_vp); - if (error) + if (error) { + VOP_UNLOCK(epp->ep_vp); return error; + } /* * Allocate space to hold all the program headers, and read them @@ -694,9 +698,12 @@ exec_elf_makecmds(struct lwp *l, struct phsize = eh->e_phnum * sizeof(Elf_Phdr); ph = kmem_alloc(phsize, KM_SLEEP); - if ((error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize)) != - 0) + error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize, + IO_NODELOCKED); + if (error != 0) { + VOP_UNLOCK(epp->ep_vp); goto bad; + } epp->ep_taddr = epp->ep_tsize = ELFDEFNNAME(NO_ADDR); epp->ep_daddr = epp->ep_dsize = ELFDEFNNAME(NO_ADDR); @@ -708,16 +715,21 @@ exec_elf_makecmds(struct lwp *l, struct DPRINTF("bad interpreter namelen %#jx", (uintmax_t)pp->p_filesz); error = ENOEXEC; + VOP_UNLOCK(epp->ep_vp); goto bad; } interp = PNBUF_GET(); - if ((error = exec_read_from(l, epp->ep_vp, - pp->p_offset, interp, pp->p_filesz)) != 0) + error = exec_read(l, epp->ep_vp, pp->p_offset, interp, + pp->p_filesz, IO_NODELOCKED); + if (error != 0) { + VOP_UNLOCK(epp->ep_vp); goto bad; + } /* Ensure interp is NUL-terminated and of the expected length */ if (strnlen(interp, pp->p_filesz) != pp->p_filesz - 1) { DPRINTF("bad interpreter name"); error = ENOEXEC; + VOP_UNLOCK(epp->ep_vp); goto bad; } break; @@ -738,13 +750,17 @@ exec_elf_makecmds(struct lwp *l, struct error = (*epp->ep_esch->u.elf_probe_func)(l, epp, eh, interp, &startp); - if (error) + if (error) { + VOP_UNLOCK(epp->ep_vp); goto bad; + } pos = (Elf_Addr)startp; } - if (is_dyn && (error = elf_placedynexec(epp, eh, ph)) != 0) + if (is_dyn && (error = elf_placedynexec(epp, eh, ph)) != 0) { + VOP_UNLOCK(epp->ep_vp); goto bad; + } /* * Load all the necessary sections @@ -757,8 +773,10 @@ exec_elf_makecmds(struct lwp *l, struct case PT_LOAD: if ((error = elf_load_psection(&epp->ep_vmcmds, epp->ep_vp, &ph[i], &addr, &size, VMCMD_FIXED)) - != 0) + != 0) { + VOP_UNLOCK(epp->ep_vp); goto bad; + } /* * Consider this as text segment, if it is executable. @@ -801,6 +819,9 @@ exec_elf_makecmds(struct lwp *l, struct } } + /* Now done with the vnode. */ + VOP_UNLOCK(epp->ep_vp); + if (epp->ep_vmcmds.evs_used == 0) { /* No VMCMD; there was no PT_LOAD section, or those * sections were empty */ @@ -900,7 +921,8 @@ netbsd_elf_signature(struct lwp *l, stru phsize = eh->e_phnum * sizeof(Elf_Phdr); ph = kmem_alloc(phsize, KM_SLEEP); - error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize); + error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize, + IO_NODELOCKED); if (error) goto out; @@ -914,8 +936,8 @@ netbsd_elf_signature(struct lwp *l, stru continue; nlen = ph[i].p_filesz; - error = exec_read_from(l, epp->ep_vp, ph[i].p_offset, - nbuf, nlen); + error = exec_read(l, epp->ep_vp, ph[i].p_offset, nbuf, nlen, + IO_NODELOCKED); if (error) continue; Index: src/sys/kern/exec_subr.c diff -u src/sys/kern/exec_subr.c:1.82 src/sys/kern/exec_subr.c:1.83 --- src/sys/kern/exec_subr.c:1.82 Sun Jul 2 16:41:33 2017 +++ src/sys/kern/exec_subr.c Sun Jan 12 18:30:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: exec_subr.c,v 1.82 2017/07/02 16:41:33 joerg Exp $ */ +/* $NetBSD: exec_subr.c,v 1.83 2020/01/12 18:30:58 ad Exp $ */ /* * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.82 2017/07/02 16:41:33 joerg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.83 2020/01/12 18:30:58 ad Exp $"); #include "opt_pax.h" @@ -345,19 +345,21 @@ vmcmd_map_zero(struct lwp *l, struct exe } /* - * exec_read_from(): + * exec_read(): * * Read from vnode into buffer at offset. */ int -exec_read_from(struct lwp *l, struct vnode *vp, u_long off, void *bf, - size_t size) +exec_read(struct lwp *l, struct vnode *vp, u_long off, void *bf, size_t size, + int ioflg) { int error; size_t resid; + KASSERT((ioflg & IO_NODELOCKED) == 0 || VOP_ISLOCKED(vp) != LK_NONE); + if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE, - 0, l->l_cred, &resid, NULL)) != 0) + ioflg, l->l_cred, &resid, NULL)) != 0) return error; /* * See if we got all of it Index: src/sys/kern/kern_exec.c diff -u src/sys/kern/kern_exec.c:1.486 src/sys/kern/kern_exec.c:1.487 --- src/sys/kern/kern_exec.c:1.486 Wed Jan 8 17:38:42 2020 +++ src/sys/kern/kern_exec.c Sun Jan 12 18:30:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exec.c,v 1.486 2020/01/08 17:38:42 ad Exp $ */ +/* $NetBSD: kern_exec.c,v 1.487 2020/01/12 18:30:58 ad Exp $ */ /*- * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.486 2020/01/08 17:38:42 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.487 2020/01/12 18:30:58 ad Exp $"); #include "opt_exec.h" #include "opt_execfmt.h" @@ -409,6 +409,7 @@ check_exec(struct lwp *l, struct exec_pa goto bad1; /* get attributes */ + /* XXX VOP_GETATTR is the only thing that needs LK_EXCLUSIVE here */ if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0) goto bad1; @@ -424,6 +425,12 @@ check_exec(struct lwp *l, struct exec_pa if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0) goto bad1; + /* now we have the file, get the exec header */ + error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0, + UIO_SYSSPACE, IO_NODELOCKED, l->l_cred, &resid, NULL); + if (error) + goto bad1; + /* unlock vp, since we need it unlocked from here on out. */ VOP_UNLOCK(vp); @@ -442,11 +449,6 @@ check_exec(struct lwp *l, struct exec_pa goto bad2; #endif /* PAX_SEGVGUARD */ - /* now we have the file, get the exec header */ - error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0, - UIO_SYSSPACE, 0, l->l_cred, &resid, NULL); - if (error) - goto bad2; epp->ep_hdrvalid = epp->ep_hdrlen - resid; /* @@ -543,7 +545,9 @@ check_exec(struct lwp *l, struct exec_pa */ kill_vmcmds(&epp->ep_vmcmds); +#if NVERIEXEC > 0 || defined(PAX_SEGVGUARD) bad2: +#endif /* * close and release the vnode, restore the old one, free the * pathname buf, and punt. Index: src/sys/sys/exec.h diff -u src/sys/sys/exec.h:1.157 src/sys/sys/exec.h:1.158 --- src/sys/sys/exec.h:1.157 Wed Nov 20 19:37:54 2019 +++ src/sys/sys/exec.h Sun Jan 12 18:30:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: exec.h,v 1.157 2019/11/20 19:37:54 pgoyette Exp $ */ +/* $NetBSD: exec.h,v 1.158 2020/01/12 18:30:58 ad Exp $ */ /*- * Copyright (c) 1992, 1993 @@ -271,8 +271,8 @@ int check_veriexec (struct lwp *, struc int check_exec (struct lwp *, struct exec_package *, struct pathbuf *, char **); int exec_init (int); -int exec_read_from (struct lwp *, struct vnode *, u_long off, - void *, size_t); +int exec_read (struct lwp *, struct vnode *, u_long off, + void *, size_t, int); int exec_setup_stack (struct lwp *, struct exec_package *); void exec_free_emul_arg (struct exec_package *);