Theo de Raadt <[email protected]> wrote:
> A few weeks ago a conversation about retguard (a diff is probably
> coming) caused me re-consider & re-read the BROP paper
>
> https://www.scs.stanford.edu/brop/bittau-brop.pdf
new version of the diff. The small piece of locking has been improved
by using a private mutex, and the check function has been renamed a bit.
It is important to check things like coredump (this contains a workaround),
and ptrace, and other special cases.
As seen in subr_log.c, there will be other naked copyin() which need
xonly() checks before them. I made an attempt to protect *all* copyin,
by changing copyin() to do this itself. but it lacks the proc pointer so I
tried curproc, but this doesn't seem to work right, doesn't feel right
either. And furthermore I found there are copyin that occur inside map locks
so that seems like a step too far.
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.159
diff -u -p -u -r1.159 systm.h
--- sys/systm.h 3 Sep 2022 15:29:44 -0000 1.159
+++ sys/systm.h 21 Dec 2022 07:17:32 -0000
@@ -211,6 +211,8 @@ int copyin(const void *, void *, size_t)
int copyout(const void *, void *, size_t);
int copyin32(const uint32_t *, uint32_t *);
+int xonly(struct proc *, const void *, size_t);
+
void random_start(int);
void enqueue_randomness(unsigned int);
void suspend_randomness(void);
Index: kern/exec_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/exec_subr.c,v
retrieving revision 1.64
diff -u -p -u -r1.64 exec_subr.c
--- kern/exec_subr.c 5 Dec 2022 23:18:37 -0000 1.64
+++ kern/exec_subr.c 21 Dec 2022 06:58:19 -0000
@@ -215,6 +215,10 @@ vmcmd_map_pagedvn(struct proc *p, struct
if (cmd->ev_flags & VMCMD_IMMUTABLE)
uvm_map_immutable(&p->p_vmspace->vm_map, cmd->ev_addr,
round_page(cmd->ev_addr + cmd->ev_len), 1);
+ if ((flags & UVM_FLAG_SYSCALL) ||
+ ((cmd->ev_flags & VMCMD_IMMUTABLE) && (cmd->ev_prot &
PROT_EXEC)))
+ uvm_map_xonly(&p->p_vmspace->vm_map,
+ cmd->ev_addr, round_page(cmd->ev_addr +
cmd->ev_len));
}
return (error);
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.301
diff -u -p -u -r1.301 kern_sig.c
--- kern/kern_sig.c 16 Oct 2022 16:27:02 -0000 1.301
+++ kern/kern_sig.c 21 Dec 2022 06:58:19 -0000
@@ -1642,6 +1642,9 @@ coredump(struct proc *p)
atomic_setbits_int(&pr->ps_flags, PS_COREDUMP);
+ /* disable xonly checks, so we can write out text sections if needed */
+ p->p_vmspace->vm_map.xonly_count = 0;
+
/* Don't dump if will exceed file size limit. */
if (USPACE + ptoa(vm->vm_dsize + vm->vm_ssize) >= lim_cur(RLIMIT_CORE))
return (EFBIG);
Index: kern/kern_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_subr.c,v
retrieving revision 1.51
diff -u -p -u -r1.51 kern_subr.c
--- kern/kern_subr.c 14 Aug 2022 01:58:27 -0000 1.51
+++ kern/kern_subr.c 21 Dec 2022 15:50:34 -0000
@@ -78,8 +78,12 @@ uiomove(void *cp, size_t n, struct uio *
sched_pause(preempt);
if (uio->uio_rw == UIO_READ)
error = copyout(cp, iov->iov_base, cnt);
- else
- error = copyin(iov->iov_base, cp, cnt);
+ else {
+ error = xonly(uio->uio_procp,
+ iov->iov_base, cnt);
+ if (error == 0)
+ error = copyin(iov->iov_base, cp, cnt);
+ }
if (error)
return (error);
break;
Index: kern/subr_log.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.75
diff -u -p -u -r1.75 subr_log.c
--- kern/subr_log.c 2 Jul 2022 08:50:42 -0000 1.75
+++ kern/subr_log.c 21 Dec 2022 15:16:41 -0000
@@ -644,6 +644,8 @@ dosendsyslog(struct proc *p, const char
*/
len = MIN(nbyte, sizeof(pri));
if (sflg == UIO_USERSPACE) {
+ if ((error = xonly(p, buf, len)))
+ return (error);
if ((error = copyin(buf, pri, len)))
return (error);
} else
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.305
diff -u -p -u -r1.305 uvm_map.c
--- uvm/uvm_map.c 18 Dec 2022 23:41:17 -0000 1.305
+++ uvm/uvm_map.c 21 Dec 2022 07:17:52 -0000
@@ -2500,6 +2500,7 @@ uvm_map_setup(struct vm_map *map, pmap_t
else
rw_init(&map->lock, "kmmaplk");
mtx_init(&map->mtx, IPL_VM);
+ mtx_init(&map->xonly_mtx, IPL_VM);
mtx_init(&map->flags_lock, IPL_VM);
/* Configure the allocators. */
@@ -3472,6 +3473,7 @@ uvmspace_exec(struct proc *p, vaddr_t st
uvmspace_free(ovm);
}
+ p->p_vmspace->vm_map.xonly_count = 0;
/* Release dead entries */
uvm_unmap_detach(&dead_entries, 0);
@@ -4258,9 +4260,75 @@ uvm_map_syscall(struct vm_map *map, vadd
entry = RBT_NEXT(uvm_map_addr, entry);
}
+ /* Add libc's text segment to the XONLY list */
+ mtx_enter(&map->xonly_mtx);
+ if (map->xonly_count < UVM_MAP_XONLY_MAX) {
+ //printf("%d xsysc %lx-%lx\n", map->xonly_count, start, end);
+ map->xonly[map->xonly_count].start = start;
+ map->xonly[map->xonly_count].end = end;
+ map->xonly_count++;
+ }
+ mtx_leave(&map->xonly_mtx);
+
map->wserial++;
map->flags |= VM_MAP_SYSCALL_ONCE;
vm_map_unlock(map);
+ return (0);
+}
+
+/*
+ * xonly: if the address is in an x-only region, return EFAULT
+ */
+int
+xonly(struct proc *p, const void *vstart, size_t len)
+{
+ struct vm_map *map = &p->p_vmspace->vm_map;
+ const vaddr_t start = (vaddr_t)vstart;
+ const vaddr_t end = start + len;
+ int i, r = 0;
+
+ /*
+ * When system calls are registered and msyscall(2) is blocked,
+ * there are no new calls to setup xonly regions
+ */
+ if ((map->flags & VM_MAP_SYSCALL_ONCE) == 0)
+ mtx_enter(&map->xonly_mtx);
+ for (i = 0; i < map->xonly_count; i++) {
+ vaddr_t s = map->xonly[i].start, e = map->xonly[i].end;
+
+ if ((start >= s && start < e) || (end >= s && end < e)) {
+ r = EFAULT;
+ break;
+ }
+ }
+ if ((map->flags & VM_MAP_SYSCALL_ONCE) == 0)
+ mtx_leave(&map->xonly_mtx);
+ return (r);
+}
+
+/*
+ * uvm_map_xonly: remember regions which are X-only for uiomove()
+ *
+ * => map must be unlocked
+ */
+int
+uvm_map_xonly(struct vm_map *map, vaddr_t start, vaddr_t end)
+{
+ if (start > end)
+ return EINVAL;
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return 0;
+
+ mtx_enter(&map->xonly_mtx);
+ if (map->xonly_count < UVM_MAP_XONLY_MAX) {
+ //printf("%d xonly %lx-%lx\n", map->xonly_count, start, end);
+ map->xonly[map->xonly_count].start = start;
+ map->xonly[map->xonly_count].end = end;
+ map->xonly_count++;
+ }
+ mtx_leave(&map->xonly_mtx);
return (0);
}
Index: uvm/uvm_map.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.h,v
retrieving revision 1.81
diff -u -p -u -r1.81 uvm_map.h
--- uvm/uvm_map.h 17 Nov 2022 23:26:07 -0000 1.81
+++ uvm/uvm_map.h 21 Dec 2022 07:17:18 -0000
@@ -168,6 +168,12 @@ struct vm_map_entry {
vsize_t fspace_augment; /* max(fspace) in subtree */
};
+struct uvm_xonly {
+ vaddr_t start;
+ vaddr_t end;
+};
+#define UVM_MAP_XONLY_MAX 4 /* main, sigtramp, ld.so, libc.so */
+
#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0)
TAILQ_HEAD(uvm_map_deadq, vm_map_entry); /* dead entry queue */
@@ -309,11 +315,15 @@ struct vm_map {
struct uvm_addr_state *uaddr_any[4]; /* More selectors. */
struct uvm_addr_state *uaddr_brk_stack; /* Brk/stack selector. */
+ struct uvm_xonly xonly[UVM_MAP_XONLY_MAX];
+ int xonly_count;
+
/*
* XXX struct mutex changes size because of compile options, so place
* place after fields which are inspected by libkvm / procmap(8)
*/
struct rwlock lock; /* Non-intrsafe lock */
+ struct mutex xonly_mtx; /* for xonly variables */
struct mutex mtx; /* Intrsafe lock */
struct mutex flags_lock; /* flags lock */
};
@@ -354,6 +364,7 @@ int uvm_map_extract(struct vm_map *, va
struct vm_map * uvm_map_create(pmap_t, vaddr_t, vaddr_t, int);
vaddr_t uvm_map_pie(vaddr_t);
vaddr_t uvm_map_hint(struct vmspace *, vm_prot_t, vaddr_t,
vaddr_t);
+int uvm_map_xonly(struct vm_map *, vaddr_t, vaddr_t);
int uvm_map_syscall(struct vm_map *, vaddr_t, vaddr_t);
int uvm_map_immutable(struct vm_map *, vaddr_t, vaddr_t, int);
int uvm_map_inherit(struct vm_map *, vaddr_t, vaddr_t,
vm_inherit_t);