Module Name: src
Committed By: ad
Date: Sun Jan 12 18:30:58 UTC 2020
Modified Files:
src/sys/compat/linux/arch/amd64: linux_exec_machdep.c
src/sys/compat/linux/common: linux_exec_elf32.c
src/sys/kern: exec_elf.c exec_subr.c kern_exec.c
src/sys/sys: exec.h
Log Message:
Tidy up the vnode locking around execve() on ELF images to acquire and
release the locks fewer times. Proposed on tech-kern a very long time go.
To generate a diff of this commit:
cvs rdiff -u -r1.22 -r1.23 \
src/sys/compat/linux/arch/amd64/linux_exec_machdep.c
cvs rdiff -u -r1.99 -r1.100 src/sys/compat/linux/common/linux_exec_elf32.c
cvs rdiff -u -r1.100 -r1.101 src/sys/kern/exec_elf.c
cvs rdiff -u -r1.82 -r1.83 src/sys/kern/exec_subr.c
cvs rdiff -u -r1.486 -r1.487 src/sys/kern/kern_exec.c
cvs rdiff -u -r1.157 -r1.158 src/sys/sys/exec.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/compat/linux/arch/amd64/linux_exec_machdep.c
diff -u src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.22 src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.23
--- src/sys/compat/linux/arch/amd64/linux_exec_machdep.c:1.22 Sun Feb 23 12:01:51 2014
+++ src/sys/compat/linux/arch/amd64/linux_exec_machdep.c Sun Jan 12 18:30:58 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: linux_exec_machdep.c,v 1.22 2014/02/23 12:01:51 njoly Exp $ */
+/* $NetBSD: linux_exec_machdep.c,v 1.23 2020/01/12 18:30:58 ad Exp $ */
/*-
* Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.22 2014/02/23 12:01:51 njoly Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.23 2020/01/12 18:30:58 ad Exp $");
#define ELFSIZE 64
@@ -156,7 +156,7 @@ ELFNAME2(linux,copyargs)(struct lwp *l,
if (ap == NULL) {
phsize = eh->e_phnum * sizeof(Elf_Phdr);
ph = (Elf_Phdr *)kmem_alloc(phsize, KM_SLEEP);
- error = exec_read_from(l, pack->ep_vp, eh->e_phoff, ph, phsize);
+ error = exec_read(l, pack->ep_vp, eh->e_phoff, ph, phsize, 0);
if (error != 0) {
for (i = 0; i < eh->e_phnum; i++) {
if (ph[i].p_type == PT_PHDR) {
Index: src/sys/compat/linux/common/linux_exec_elf32.c
diff -u src/sys/compat/linux/common/linux_exec_elf32.c:1.99 src/sys/compat/linux/common/linux_exec_elf32.c:1.100
--- src/sys/compat/linux/common/linux_exec_elf32.c:1.99 Fri Mar 1 11:06:56 2019
+++ src/sys/compat/linux/common/linux_exec_elf32.c Sun Jan 12 18:30:58 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: linux_exec_elf32.c,v 1.99 2019/03/01 11:06:56 pgoyette Exp $ */
+/* $NetBSD: linux_exec_elf32.c,v 1.100 2020/01/12 18:30:58 ad Exp $ */
/*-
* Copyright (c) 1995, 1998, 2000, 2001 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_exec_elf32.c,v 1.99 2019/03/01 11:06:56 pgoyette Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_exec_elf32.c,v 1.100 2020/01/12 18:30:58 ad Exp $");
#ifndef ELFSIZE
/* XXX should die */
@@ -107,7 +107,8 @@ ELFNAME2(linux,atexit_signature)(
/* Load the section header table. */
shsize = eh->e_shnum * sizeof(Elf_Shdr);
sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize);
+ error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -126,8 +127,8 @@ ELFNAME2(linux,atexit_signature)(
if (s->sh_name + sigsz > sh[shstrndx].sh_size)
continue;
- error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf,
- sigsz);
+ error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf,
+ sigsz, IO_NODELOCKED);
if (error)
goto out;
if (!memcmp(tbuf, signature, sigsz)) {
@@ -171,7 +172,8 @@ ELFNAME2(linux,gcc_signature)(
shsize = eh->e_shnum * sizeof(Elf_Shdr);
sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize);
+ error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -189,8 +191,8 @@ ELFNAME2(linux,gcc_signature)(
s->sh_size < sizeof(signature) - 1)
continue;
- error = exec_read_from(l, epp->ep_vp, s->sh_offset, tbuf,
- sizeof(signature) - 1);
+ error = exec_read(l, epp->ep_vp, s->sh_offset, tbuf,
+ sizeof(signature) - 1, IO_NODELOCKED);
if (error)
continue;
@@ -230,7 +232,8 @@ ELFNAME2(linux,debuglink_signature)(stru
/* Load the section header table. */
shsize = eh->e_shnum * sizeof(Elf_Shdr);
sh = (Elf_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize);
+ error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -249,8 +252,8 @@ ELFNAME2(linux,debuglink_signature)(stru
if (s->sh_name + sigsz > sh[shstrndx].sh_size)
continue;
- error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf,
- sigsz);
+ error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf,
+ sigsz, IO_NODELOCKED);
if (error)
goto out;
if (!memcmp(tbuf, signature, sigsz)) {
@@ -290,7 +293,8 @@ ELFNAME2(linux,go_rt0_signature)(struct
/* Load the section header table. */
shsize = eh->e_shnum * sizeof(Elf_Shdr);
sh = malloc(shsize, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, eh->e_shoff, sh, shsize);
+ error = exec_read(l, epp->ep_vp, eh->e_shoff, sh, shsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -309,8 +313,8 @@ ELFNAME2(linux,go_rt0_signature)(struct
if (s->sh_name + sigsz > sh[shstrndx].sh_size)
continue;
- error = exec_read_from(l, epp->ep_vp, stroff + s->sh_name, tbuf,
- sigsz);
+ error = exec_read(l, epp->ep_vp, stroff + s->sh_name, tbuf,
+ sigsz, IO_NODELOCKED);
if (error)
goto out;
if (!memcmp(tbuf, signature, sigsz)) {
@@ -329,8 +333,8 @@ ELFNAME2(linux,go_rt0_signature)(struct
sh[i].sh_size = 1024 * 1024;
tmp = malloc(sh[i].sh_size, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, sh[i].sh_offset, tmp,
- sh[i].sh_size);
+ error = exec_read(l, epp->ep_vp, sh[i].sh_offset, tmp,
+ sh[i].sh_size, IO_NODELOCKED);
if (error)
goto out;
@@ -368,7 +372,8 @@ ELFNAME2(linux,signature)(struct lwp *l,
phsize = eh->e_phnum * sizeof(Elf_Phdr);
ph = (Elf_Phdr *)malloc(phsize, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize);
+ error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -383,8 +388,8 @@ ELFNAME2(linux,signature)(struct lwp *l,
continue;
np = (Elf_Nhdr *)malloc(ephp->p_filesz, M_TEMP, M_WAITOK);
- error = exec_read_from(l, epp->ep_vp, ephp->p_offset, np,
- ephp->p_filesz);
+ error = exec_read(l, epp->ep_vp, ephp->p_offset, np,
+ ephp->p_filesz, IO_NODELOCKED);
if (error)
goto next;
Index: src/sys/kern/exec_elf.c
diff -u src/sys/kern/exec_elf.c:1.100 src/sys/kern/exec_elf.c:1.101
--- src/sys/kern/exec_elf.c:1.100 Mon Sep 16 11:11:34 2019
+++ src/sys/kern/exec_elf.c Sun Jan 12 18:30:58 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: exec_elf.c,v 1.100 2019/09/16 11:11:34 christos Exp $ */
+/* $NetBSD: exec_elf.c,v 1.101 2020/01/12 18:30:58 ad Exp $ */
/*-
- * Copyright (c) 1994, 2000, 2005, 2015 The NetBSD Foundation, Inc.
+ * Copyright (c) 1994, 2000, 2005, 2015, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -57,7 +57,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.100 2019/09/16 11:11:34 christos Exp $");
+__KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.101 2020/01/12 18:30:58 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_pax.h"
@@ -330,6 +330,8 @@ elf_load_psection(struct exec_vmcmd_set
long diff, offset;
int vmprot = 0;
+ KASSERT(VOP_ISLOCKED(vp) != LK_NONE);
+
/*
* If the user specified an address, then we load there.
*/
@@ -462,14 +464,15 @@ elf_load_interp(struct lwp *l, struct ex
*/
if (vp->v_type != VREG) {
error = EACCES;
- goto badunlock;
+ goto bad;
}
if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
- goto badunlock;
+ goto bad;
/* get attributes */
+ /* XXX VOP_GETATTR() is the only thing that needs LK_EXCLUSIVE ^ */
if ((error = VOP_GETATTR(vp, &attr, l->l_cred)) != 0)
- goto badunlock;
+ goto bad;
/*
* Check mount point. Though we're not trying to exec this binary,
@@ -478,18 +481,17 @@ elf_load_interp(struct lwp *l, struct ex
*/
if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
error = EACCES;
- goto badunlock;
+ goto bad;
}
if (vp->v_mount->mnt_flag & MNT_NOSUID)
epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
error = vn_marktext(vp);
if (error)
- goto badunlock;
-
- VOP_UNLOCK(vp);
+ goto bad;
- if ((error = exec_read_from(l, vp, 0, &eh, sizeof(eh))) != 0)
+ error = exec_read(l, vp, 0, &eh, sizeof(eh), IO_NODELOCKED);
+ if (error != 0)
goto bad;
if ((error = elf_check_header(&eh)) != 0)
@@ -503,7 +505,8 @@ elf_load_interp(struct lwp *l, struct ex
phsize = eh.e_phnum * sizeof(Elf_Phdr);
ph = kmem_alloc(phsize, KM_SLEEP);
- if ((error = exec_read_from(l, vp, eh.e_phoff, ph, phsize)) != 0)
+ error = exec_read(l, vp, eh.e_phoff, ph, phsize, IO_NODELOCKED);
+ if (error != 0)
goto bad;
#ifdef ELF_INTERP_NON_RELOCATABLE
@@ -629,16 +632,13 @@ elf_load_interp(struct lwp *l, struct ex
* This value is ignored if TOPDOWN.
*/
*last = addr;
- vrele(vp);
+ vput(vp);
return 0;
-badunlock:
- VOP_UNLOCK(vp);
-
bad:
if (ph != NULL)
kmem_free(ph, phsize);
- vrele(vp);
+ vput(vp);
return error;
}
@@ -683,9 +683,13 @@ exec_elf_makecmds(struct lwp *l, struct
return ENOEXEC;
}
+ /* XXX only LK_EXCLUSIVE to match all others - allow spinning */
+ vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
error = vn_marktext(epp->ep_vp);
- if (error)
+ if (error) {
+ VOP_UNLOCK(epp->ep_vp);
return error;
+ }
/*
* Allocate space to hold all the program headers, and read them
@@ -694,9 +698,12 @@ exec_elf_makecmds(struct lwp *l, struct
phsize = eh->e_phnum * sizeof(Elf_Phdr);
ph = kmem_alloc(phsize, KM_SLEEP);
- if ((error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize)) !=
- 0)
+ error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize,
+ IO_NODELOCKED);
+ if (error != 0) {
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
+ }
epp->ep_taddr = epp->ep_tsize = ELFDEFNNAME(NO_ADDR);
epp->ep_daddr = epp->ep_dsize = ELFDEFNNAME(NO_ADDR);
@@ -708,16 +715,21 @@ exec_elf_makecmds(struct lwp *l, struct
DPRINTF("bad interpreter namelen %#jx",
(uintmax_t)pp->p_filesz);
error = ENOEXEC;
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
}
interp = PNBUF_GET();
- if ((error = exec_read_from(l, epp->ep_vp,
- pp->p_offset, interp, pp->p_filesz)) != 0)
+ error = exec_read(l, epp->ep_vp, pp->p_offset, interp,
+ pp->p_filesz, IO_NODELOCKED);
+ if (error != 0) {
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
+ }
/* Ensure interp is NUL-terminated and of the expected length */
if (strnlen(interp, pp->p_filesz) != pp->p_filesz - 1) {
DPRINTF("bad interpreter name");
error = ENOEXEC;
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
}
break;
@@ -738,13 +750,17 @@ exec_elf_makecmds(struct lwp *l, struct
error = (*epp->ep_esch->u.elf_probe_func)(l, epp, eh, interp,
&startp);
- if (error)
+ if (error) {
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
+ }
pos = (Elf_Addr)startp;
}
- if (is_dyn && (error = elf_placedynexec(epp, eh, ph)) != 0)
+ if (is_dyn && (error = elf_placedynexec(epp, eh, ph)) != 0) {
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
+ }
/*
* Load all the necessary sections
@@ -757,8 +773,10 @@ exec_elf_makecmds(struct lwp *l, struct
case PT_LOAD:
if ((error = elf_load_psection(&epp->ep_vmcmds,
epp->ep_vp, &ph[i], &addr, &size, VMCMD_FIXED))
- != 0)
+ != 0) {
+ VOP_UNLOCK(epp->ep_vp);
goto bad;
+ }
/*
* Consider this as text segment, if it is executable.
@@ -801,6 +819,9 @@ exec_elf_makecmds(struct lwp *l, struct
}
}
+ /* Now done with the vnode. */
+ VOP_UNLOCK(epp->ep_vp);
+
if (epp->ep_vmcmds.evs_used == 0) {
/* No VMCMD; there was no PT_LOAD section, or those
* sections were empty */
@@ -900,7 +921,8 @@ netbsd_elf_signature(struct lwp *l, stru
phsize = eh->e_phnum * sizeof(Elf_Phdr);
ph = kmem_alloc(phsize, KM_SLEEP);
- error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize);
+ error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize,
+ IO_NODELOCKED);
if (error)
goto out;
@@ -914,8 +936,8 @@ netbsd_elf_signature(struct lwp *l, stru
continue;
nlen = ph[i].p_filesz;
- error = exec_read_from(l, epp->ep_vp, ph[i].p_offset,
- nbuf, nlen);
+ error = exec_read(l, epp->ep_vp, ph[i].p_offset, nbuf, nlen,
+ IO_NODELOCKED);
if (error)
continue;
Index: src/sys/kern/exec_subr.c
diff -u src/sys/kern/exec_subr.c:1.82 src/sys/kern/exec_subr.c:1.83
--- src/sys/kern/exec_subr.c:1.82 Sun Jul 2 16:41:33 2017
+++ src/sys/kern/exec_subr.c Sun Jan 12 18:30:58 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: exec_subr.c,v 1.82 2017/07/02 16:41:33 joerg Exp $ */
+/* $NetBSD: exec_subr.c,v 1.83 2020/01/12 18:30:58 ad Exp $ */
/*
* Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.82 2017/07/02 16:41:33 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.83 2020/01/12 18:30:58 ad Exp $");
#include "opt_pax.h"
@@ -345,19 +345,21 @@ vmcmd_map_zero(struct lwp *l, struct exe
}
/*
- * exec_read_from():
+ * exec_read():
*
* Read from vnode into buffer at offset.
*/
int
-exec_read_from(struct lwp *l, struct vnode *vp, u_long off, void *bf,
- size_t size)
+exec_read(struct lwp *l, struct vnode *vp, u_long off, void *bf, size_t size,
+ int ioflg)
{
int error;
size_t resid;
+ KASSERT((ioflg & IO_NODELOCKED) == 0 || VOP_ISLOCKED(vp) != LK_NONE);
+
if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE,
- 0, l->l_cred, &resid, NULL)) != 0)
+ ioflg, l->l_cred, &resid, NULL)) != 0)
return error;
/*
* See if we got all of it
Index: src/sys/kern/kern_exec.c
diff -u src/sys/kern/kern_exec.c:1.486 src/sys/kern/kern_exec.c:1.487
--- src/sys/kern/kern_exec.c:1.486 Wed Jan 8 17:38:42 2020
+++ src/sys/kern/kern_exec.c Sun Jan 12 18:30:58 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_exec.c,v 1.486 2020/01/08 17:38:42 ad Exp $ */
+/* $NetBSD: kern_exec.c,v 1.487 2020/01/12 18:30:58 ad Exp $ */
/*-
* Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.486 2020/01/08 17:38:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.487 2020/01/12 18:30:58 ad Exp $");
#include "opt_exec.h"
#include "opt_execfmt.h"
@@ -409,6 +409,7 @@ check_exec(struct lwp *l, struct exec_pa
goto bad1;
/* get attributes */
+ /* XXX VOP_GETATTR is the only thing that needs LK_EXCLUSIVE here */
if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
goto bad1;
@@ -424,6 +425,12 @@ check_exec(struct lwp *l, struct exec_pa
if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
goto bad1;
+ /* now we have the file, get the exec header */
+ error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
+ UIO_SYSSPACE, IO_NODELOCKED, l->l_cred, &resid, NULL);
+ if (error)
+ goto bad1;
+
/* unlock vp, since we need it unlocked from here on out. */
VOP_UNLOCK(vp);
@@ -442,11 +449,6 @@ check_exec(struct lwp *l, struct exec_pa
goto bad2;
#endif /* PAX_SEGVGUARD */
- /* now we have the file, get the exec header */
- error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
- UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
- if (error)
- goto bad2;
epp->ep_hdrvalid = epp->ep_hdrlen - resid;
/*
@@ -543,7 +545,9 @@ check_exec(struct lwp *l, struct exec_pa
*/
kill_vmcmds(&epp->ep_vmcmds);
+#if NVERIEXEC > 0 || defined(PAX_SEGVGUARD)
bad2:
+#endif
/*
* close and release the vnode, restore the old one, free the
* pathname buf, and punt.
Index: src/sys/sys/exec.h
diff -u src/sys/sys/exec.h:1.157 src/sys/sys/exec.h:1.158
--- src/sys/sys/exec.h:1.157 Wed Nov 20 19:37:54 2019
+++ src/sys/sys/exec.h Sun Jan 12 18:30:58 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: exec.h,v 1.157 2019/11/20 19:37:54 pgoyette Exp $ */
+/* $NetBSD: exec.h,v 1.158 2020/01/12 18:30:58 ad Exp $ */
/*-
* Copyright (c) 1992, 1993
@@ -271,8 +271,8 @@ int check_veriexec (struct lwp *, struc
int check_exec (struct lwp *, struct exec_package *,
struct pathbuf *, char **);
int exec_init (int);
-int exec_read_from (struct lwp *, struct vnode *, u_long off,
- void *, size_t);
+int exec_read (struct lwp *, struct vnode *, u_long off,
+ void *, size_t, int);
int exec_setup_stack (struct lwp *, struct exec_package *);
void exec_free_emul_arg (struct exec_package *);