Stefan (stefan@) and I have been working for a few months on this diff, with help from a few others.
At every trap and system call, it checks if the stack-pointer is on a page that is marked MAP_STACK. execve() is changed to create such mappings for the process stack. Also, libpthread is taught the new MAP_STACK flag to use with mmap(). There is no corresponding system call which can set MAP_FLAG on an existing page, you can only set the flag by mapping new memory into place. That is a piece of the security model. The purpose of this change is to twart stack pivots, which apparently have gained some popularity in JIT ROP attacks. It makes it difficult to place the ROP stack in regular data memory, and then perform a system call from it. Workarounds are cumbersome, increasing the need for far more gadgetry. But also the trap case -- if any memory experiences a demand page fault, the same check will occur and potentially also kill the process. We have experimented a little with performing this check during device interrupts, but there are some locking concerns and performance may then become a concern. It'll be best to gain experience from handle of syncronous trap cases first. chrome and other applications I use run fine! I'm asking for some feedback to discover what ports this breaks, we'd like to know. Those would be ports which try to (unconvenionally) create their stacks in malloc()'d memory or inside another datastructure. Most of them are probably easily fixed ... Index: lib/libc/sys/mmap.2 =================================================================== RCS file: /cvs/src/lib/libc/sys/mmap.2,v retrieving revision 1.56 diff -u -p -u -r1.56 mmap.2 --- lib/libc/sys/mmap.2 20 Jul 2017 18:22:25 -0000 1.56 +++ lib/libc/sys/mmap.2 6 Jan 2018 18:56:04 -0000 @@ -153,6 +153,11 @@ mappings) must be multiples of the page size. Existing mappings in the address range will be replaced. Use of this option is discouraged. +.It Dv MAP_STACK +Indicate that the mapping is used as a stack. +This flag must be used in combination with +.Dv MAP_ANON and +.Dv MAP_PRIVATE . .El .Pp Finally, the following flags are also provided for Index: lib/libc/sys/sigaltstack.2 =================================================================== RCS file: /cvs/src/lib/libc/sys/sigaltstack.2,v retrieving revision 1.19 diff -u -p -u -r1.19 sigaltstack.2 --- lib/libc/sys/sigaltstack.2 31 May 2015 23:54:25 -0000 1.19 +++ lib/libc/sys/sigaltstack.2 6 Jan 2018 18:56:04 -0000 @@ -92,6 +92,15 @@ field will contain the value if the thread is currently on a signal stack and .Dv SS_DISABLE if the signal stack is currently disabled. +.Pp +The stack must be allocated using +.Xr mmap 2 +with +.Ar MAP_STACK +to inform the kernel that the memory is being used as a stack. +Otherwise, the first system call performed while operating on +that stack will deliver +.Dv SIGABRT . .Sh NOTES The value .Dv SIGSTKSZ @@ -99,7 +108,8 @@ is defined to be the number of bytes/cha the usual case when allocating an alternate stack area. The following code fragment is typically used to allocate an alternate stack. .Bd -literal -offset indent -if ((sigstk.ss_sp = malloc(SIGSTKSZ)) == NULL) +if ((sigstk.ss_sp = mmap(NULL, SIGSTKSZ, PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_ANON | MAP_STACK, -1, 0)) == NULL) /* error return */ sigstk.ss_size = SIGSTKSZ; sigstk.ss_flags = 0; Index: lib/librthread/rthread_stack.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_stack.c,v retrieving revision 1.17 diff -u -p -u -r1.17 rthread_stack.c --- lib/librthread/rthread_stack.c 5 Sep 2017 02:40:54 -0000 1.17 +++ lib/librthread/rthread_stack.c 6 Jan 2018 18:56:04 -0000 @@ -92,7 +92,7 @@ _rthread_alloc_stack(pthread_t thread) /* actually allocate the real stack */ base = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + MAP_PRIVATE | MAP_ANON | MAP_STACK, -1, 0); if (base == MAP_FAILED) { free(stack); return (NULL); Index: sys/arch/amd64/amd64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v retrieving revision 1.63 diff -u -p -u -r1.63 trap.c --- sys/arch/amd64/amd64/trap.c 5 Jan 2018 11:10:25 -0000 1.63 +++ sys/arch/amd64/amd64/trap.c 6 Jan 2018 19:05:06 -0000 @@ -175,9 +175,29 @@ trap(struct trapframe *frame) #endif if (!KERNELMODE(frame->tf_cs, frame->tf_rflags)) { + vaddr_t sp = PROC_STACK(p); + type |= T_USER; p->p_md.md_regs = frame; refreshcreds(p); + + if (p->p_vmspace->vm_map.serial != p->p_spserial || + p->p_spstart == 0 || sp < p->p_spstart || + sp >= p->p_spend) { + KERNEL_LOCK(); + if (!uvm_map_check_stack_range(p, sp)) { + struct sigaction sa; + + memset(&sa, 0, sizeof sa); + sa.sa_handler = SIG_DFL; + setsigvec(p, SIGABRT, &sa); + sv.sival_ptr = (void *)frame->tf_rip; + trapsignal(p, SIGABRT, type & ~T_USER, + ILL_BADSTK, sv); + } + + KERNEL_UNLOCK(); + } } switch (type) { Index: sys/kern/exec_subr.c =================================================================== RCS file: /cvs/src/sys/kern/exec_subr.c,v retrieving revision 1.52 diff -u -p -u -r1.52 exec_subr.c --- sys/kern/exec_subr.c 18 May 2017 18:50:32 -0000 1.52 +++ sys/kern/exec_subr.c 6 Jan 2018 18:56:04 -0000 @@ -276,7 +276,8 @@ vmcmd_map_zero(struct proc *p, struct ex return (uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(cmd->ev_prot, PROT_MASK, MAP_INHERIT_COPY, - MADV_NORMAL, UVM_FLAG_FIXED|UVM_FLAG_COPYONW))); + MADV_NORMAL, UVM_FLAG_FIXED|UVM_FLAG_COPYONW | + (cmd->ev_flags & VMCMD_STACK ? UVM_FLAG_STACK : 0)))); } /* @@ -372,17 +373,19 @@ exec_setup_stack(struct proc *p, struct #ifdef MACHINE_STACK_GROWS_UP NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, ((epp->ep_minsaddr - epp->ep_ssize) - epp->ep_maxsaddr), - epp->ep_maxsaddr + epp->ep_ssize, NULLVP, 0, PROT_NONE); - NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, epp->ep_ssize, + epp->ep_maxsaddr + epp->ep_ssize, NULLVP, 0, + PROT_NONE); + NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, epp->ep_ssize, epp->ep_maxsaddr, NULLVP, 0, - PROT_READ | PROT_WRITE); + PROT_READ | PROT_WRITE, VMCMD_STACK); #else NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, ((epp->ep_minsaddr - epp->ep_ssize) - epp->ep_maxsaddr), - epp->ep_maxsaddr, NULLVP, 0, PROT_NONE); - NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, epp->ep_ssize, + epp->ep_maxsaddr, NULLVP, 0, + PROT_NONE); + NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, epp->ep_ssize, (epp->ep_minsaddr - epp->ep_ssize), NULLVP, 0, - PROT_READ | PROT_WRITE); + PROT_READ | PROT_WRITE, VMCMD_STACK); #endif return (0); Index: sys/kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.272 diff -u -p -u -r1.272 init_main.c --- sys/kern/init_main.c 1 Jan 2018 08:23:19 -0000 1.272 +++ sys/kern/init_main.c 6 Jan 2018 18:56:04 -0000 @@ -651,7 +651,7 @@ start_init(void *arg) if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_MASK, MAP_INHERIT_COPY, - MADV_NORMAL, UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW))) + MADV_NORMAL, UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW|UVM_FLAG_STACK))) panic("init: couldn't allocate argument space"); for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) { Index: sys/sys/exec.h =================================================================== RCS file: /cvs/src/sys/sys/exec.h,v retrieving revision 1.36 diff -u -p -u -r1.36 exec.h --- sys/sys/exec.h 8 Feb 2017 21:04:44 -0000 1.36 +++ sys/sys/exec.h 6 Jan 2018 18:56:04 -0000 @@ -98,6 +98,7 @@ struct exec_vmcmd { int ev_flags; #define VMCMD_RELATIVE 0x0001 /* ev_addr is relative to base entry */ #define VMCMD_BASE 0x0002 /* marks a base entry */ +#define VMCMD_STACK 0x0004 /* create with UVM_FLAG_STACK */ }; #define EXEC_DEFAULT_VMCMD_SETSIZE 8 /* # of cmds in set to start */ Index: sys/sys/mman.h =================================================================== RCS file: /cvs/src/sys/sys/mman.h,v retrieving revision 1.29 diff -u -p -u -r1.29 mman.h --- sys/sys/mman.h 1 Jun 2016 04:53:54 -0000 1.29 +++ sys/sys/mman.h 6 Jan 2018 18:56:04 -0000 @@ -59,8 +59,9 @@ #define MAP_ANON 0x1000 /* allocated from memory, swap space */ #define MAP_ANONYMOUS MAP_ANON /* alternate POSIX spelling */ #define __MAP_NOFAULT 0x2000 +#define MAP_STACK 0x4000 /* mapping is used for a stack */ -#define MAP_FLAGMASK 0x3ff7 +#define MAP_FLAGMASK 0x7ff7 #ifndef _KERNEL /* Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.244 diff -u -p -u -r1.244 proc.h --- sys/sys/proc.h 19 Dec 2017 10:04:59 -0000 1.244 +++ sys/sys/proc.h 6 Jan 2018 18:56:04 -0000 @@ -330,6 +330,10 @@ struct proc { #define p_startcopy p_sigmask sigset_t p_sigmask; /* Current signal mask. */ + u_int p_spserial; + vaddr_t p_spstart; + vaddr_t p_spend; + u_char p_priority; /* Process priority. */ u_char p_usrpri; /* User-priority based on p_estcpu and ps_nice. */ int p_pledge_syscall; /* Cache of current syscall */ Index: sys/sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v retrieving revision 1.18 diff -u -p -u -r1.18 syscall_mi.h --- sys/sys/syscall_mi.h 20 Apr 2017 15:21:51 -0000 1.18 +++ sys/sys/syscall_mi.h 6 Jan 2018 19:00:58 -0000 @@ -32,6 +32,7 @@ */ #include <sys/pledge.h> +#include <uvm/uvm_extern.h> #ifdef KTRACE #include <sys/ktrace.h> @@ -48,6 +49,7 @@ mi_syscall(struct proc *p, register_t co uint64_t tval; int lock = !(callp->sy_flags & SY_NOLOCK); int error, pledged; + vaddr_t sp = PROC_STACK(p); /* refresh the thread's cache of the process's creds */ refreshcreds(p); @@ -65,7 +67,22 @@ mi_syscall(struct proc *p, register_t co } #endif - if (lock) + if (p->p_vmspace->vm_map.serial != p->p_spserial || + p->p_spstart == 0 || sp < p->p_spstart || sp >= p->p_spend) { + KERNEL_LOCK(); + + if (!uvm_map_check_stack_range(p, sp)) { + struct sigaction sa; + + memset(&sa, 0, sizeof sa); + sa.sa_handler = SIG_DFL; + setsigvec(p, SIGABRT, &sa); + psignal(p, SIGABRT); + KERNEL_UNLOCK(); + return (EPERM); + } + lock = 1; + } else if (lock) KERNEL_LOCK(); pledged = (p->p_p->ps_flags & PS_PLEDGE); if (pledged && (error = pledge_syscall(p, code, &tval))) { Index: sys/uvm/uvm.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm.h,v retrieving revision 1.61 diff -u -p -u -r1.61 uvm.h --- sys/uvm/uvm.h 11 Aug 2016 01:17:33 -0000 1.61 +++ sys/uvm/uvm.h 6 Jan 2018 18:56:04 -0000 @@ -88,6 +88,7 @@ struct uvm { #define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */ #define UVM_ET_HOLE 0x10 /* no backend */ #define UVM_ET_NOFAULT 0x20 /* don't fault */ +#define UVM_ET_STACK 0x40 /* this is a stack */ #define UVM_ET_FREEMAPPED 0x80 /* map entry is on free list (DEBUG) */ #define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0) @@ -96,6 +97,7 @@ struct uvm { #define UVM_ET_ISNEEDSCOPY(E) (((E)->etype & UVM_ET_NEEDSCOPY) != 0) #define UVM_ET_ISHOLE(E) (((E)->etype & UVM_ET_HOLE) != 0) #define UVM_ET_ISNOFAULT(E) (((E)->etype & UVM_ET_NOFAULT) != 0) +#define UVM_ET_ISSTACK(E) (((E)->etype & UVM_ET_STACK) != 0) #ifdef _KERNEL Index: sys/uvm/uvm_extern.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.142 diff -u -p -u -r1.142 uvm_extern.h --- sys/uvm/uvm_extern.h 30 Apr 2017 13:04:49 -0000 1.142 +++ sys/uvm/uvm_extern.h 6 Jan 2018 18:56:04 -0000 @@ -111,7 +111,7 @@ typedef int vm_prot_t; #define UVM_FLAG_QUERY 0x0400000 /* do everything, except actual execution */ #define UVM_FLAG_NOFAULT 0x0800000 /* don't fault */ #define UVM_FLAG_UNMAP 0x1000000 /* unmap to make space */ - +#define UVM_FLAG_STACK 0x2000000 /* page may contain a stack */ /* macros to extract info */ #define UVM_PROTECTION(X) ((X) & PROT_MASK) Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.92 diff -u -p -u -r1.92 uvm_fault.c --- sys/uvm/uvm_fault.c 20 Jul 2017 18:22:25 -0000 1.92 +++ sys/uvm/uvm_fault.c 6 Jan 2018 18:56:04 -0000 @@ -234,7 +234,8 @@ uvmfault_amapcopy(struct uvm_faultinfo * /* copy if needed. */ if (UVM_ET_ISNEEDSCOPY(ufi->entry)) - amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, + amap_copy(ufi->map, ufi->entry, M_NOWAIT, + UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE, ufi->orig_rvaddr, ufi->orig_rvaddr + 1); /* didn't work? must be out of RAM. sleep. */ Index: sys/uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.233 diff -u -p -u -r1.233 uvm_map.c --- sys/uvm/uvm_map.c 30 Nov 2017 00:36:10 -0000 1.233 +++ sys/uvm/uvm_map.c 6 Jan 2018 19:00:25 -0000 @@ -1064,6 +1064,11 @@ uvm_mapanon(struct vm_map *map, vaddr_t entry->inheritance = inherit; entry->wired_count = 0; entry->advice = advice; + if (flags & UVM_FLAG_STACK) { + entry->etype |= UVM_ET_STACK; + if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) + map->serial++; + } if (flags & UVM_FLAG_COPYONW) { entry->etype |= UVM_ET_COPYONWRITE; if ((flags & UVM_FLAG_OVERLAY) == 0) @@ -1320,6 +1325,11 @@ uvm_map(struct vm_map *map, vaddr_t *add entry->inheritance = inherit; entry->wired_count = 0; entry->advice = advice; + if (flags & UVM_FLAG_STACK) { + entry->etype |= UVM_ET_STACK; + if (flags & UVM_FLAG_UNMAP) + map->serial++; + } if (uobj) entry->etype |= UVM_ET_OBJ; else if (flags & UVM_FLAG_HOLE) @@ -1746,6 +1756,41 @@ uvm_map_lookup_entry(struct vm_map *map, } /* + * Inside a vm_map find the sp address and verify MAP_STACK, and also + * remember low and high regions of that of region which is marked + * with MAP_STACK. Return TRUE. + * If sp isn't in a MAP_STACK region return FALSE. + */ +boolean_t +uvm_map_check_stack_range(struct proc *p, vaddr_t sp) +{ + vm_map_t map = &p->p_vmspace->vm_map; + vm_map_entry_t entry; + + if (sp < map->min_offset || sp >= map->max_offset) + return(FALSE); + + /* lock map */ + vm_map_lock_read(map); + + /* lookup */ + if (!uvm_map_lookup_entry(map, trunc_page(sp), &entry)) { + vm_map_unlock_read(map); + return(FALSE); + } + + if ((entry->etype & UVM_ET_STACK) == 0) { + vm_map_unlock_read(map); + return (FALSE); + } + p->p_spstart = entry->start; + p->p_spend = entry->end; + p->p_spserial = map->serial; + vm_map_unlock_read(map); + return(TRUE); +} + +/* * uvm_map_pie: return a random load address for a PIE executable * properly aligned. */ @@ -1975,6 +2020,10 @@ uvm_unmap_remove(struct vm_map *map, vad } } + /* A stack has been removed.. */ + if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) + map->serial++; + /* Kill entry. */ uvm_unmap_kill_entry(map, entry); @@ -2084,7 +2133,8 @@ uvm_map_pageable_wire(struct vm_map *map UVM_ET_ISNEEDSCOPY(iter) && ((iter->protection & PROT_WRITE) || iter->object.uvm_obj == NULL)) { - amap_copy(map, iter, M_WAITOK, TRUE, + amap_copy(map, iter, M_WAITOK, + UVM_ET_ISSTACK(iter) ? FALSE : TRUE, iter->start, iter->end); } iter->wired_count++; @@ -4165,7 +4215,8 @@ uvm_map_extract(struct vm_map *srcmap, v for (entry = first; entry != NULL && entry->start < end; entry = RBT_NEXT(uvm_map_addr, entry)) { if (UVM_ET_ISNEEDSCOPY(entry)) - amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); + amap_copy(srcmap, entry, M_NOWAIT, + UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); if (UVM_ET_ISNEEDSCOPY(entry)) { /* * amap_copy failure Index: sys/uvm/uvm_map.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.h,v retrieving revision 1.59 diff -u -p -u -r1.59 uvm_map.h --- sys/uvm/uvm_map.h 16 Sep 2016 03:39:25 -0000 1.59 +++ sys/uvm/uvm_map.h 6 Jan 2018 18:58:23 -0000 @@ -292,6 +292,7 @@ struct vm_map { struct pmap * pmap; /* Physical map */ struct rwlock lock; /* Lock for map data */ struct mutex mtx; + u_int serial; /* signals stack changes */ struct uvm_map_addr addr; /* Entry tree, by addr */ @@ -393,6 +394,7 @@ int uvm_map_inherit(vm_map_t, vaddr_t, int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int); void uvm_map_init(void); boolean_t uvm_map_lookup_entry(vm_map_t, vaddr_t, vm_map_entry_t *); +boolean_t uvm_map_check_stack_range(struct proc *, vaddr_t sp); int uvm_map_replace(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t, int); int uvm_map_reserve(vm_map_t, vsize_t, vaddr_t, vsize_t, Index: sys/uvm/uvm_mmap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v retrieving revision 1.144 diff -u -p -u -r1.144 uvm_mmap.c --- sys/uvm/uvm_mmap.c 2 Jan 2018 06:38:45 -0000 1.144 +++ sys/uvm/uvm_mmap.c 6 Jan 2018 18:56:04 -0000 @@ -393,6 +393,12 @@ sys_mmap(struct proc *p, void *v, regist return (EINVAL); if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE) return (EINVAL); + if (flags & MAP_STACK) { + if ((flags & (MAP_ANON|MAP_PRIVATE)) != (MAP_ANON|MAP_PRIVATE)) + return (EINVAL); + if (flags & ~(MAP_STACK|MAP_FIXED|MAP_ANON|MAP_PRIVATE)) + return (EINVAL); + } if (size == 0) return (EINVAL); @@ -670,7 +676,6 @@ sys_munmap(struct proc *p, void *v, regi TAILQ_INIT(&dead_entries); uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE); - vm_map_unlock(map); /* and unlock */ uvm_unmap_detach(&dead_entries, 0); @@ -1048,6 +1053,8 @@ uvm_mmapanon(vm_map_t map, vaddr_t *addr else /* shared: create amap now */ uvmflag |= UVM_FLAG_OVERLAY; + if (flags & MAP_STACK) + uvmflag |= UVM_FLAG_STACK; /* set up mapping flags */ uvmflag = UVM_MAPFLAG(prot, maxprot, @@ -1154,6 +1161,8 @@ uvm_mmapfile(vm_map_t map, vaddr_t *addr uvmflag |= UVM_FLAG_COPYONW; if (flags & __MAP_NOFAULT) uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY); + if (flags & MAP_STACK) + uvmflag |= UVM_FLAG_STACK; /* set up mapping flags */ uvmflag = UVM_MAPFLAG(prot, maxprot,