Hi all, at the moment, PAX mprotect makes it very expensive to implement any form of JIT. It is not possible to switch a page from writeable to executable. It is not possible to use anonymous memory for JIT in multi-threaded applications as you can't have distinct writable and executable mappings. The only ways JIT can work right now is by either disabling PAX mprotect or creating a temporary file on disk. That's not only silly, but IMO actively harmful. Considering that some important components like libffi fall into this category, the answer can't be "disable PAX mprotect on bin/python*" and various other places.
I've attached three patches to this mail: (1) Implement a new flag for mremap to allow duplicating a mapping (M_REMAPDUP). This patch is functional by itself. (2) A hack for allow mprotect to switch between W and X, but still honoring W^X. This is a hack and needs to be carefully rethought, since I believe the way pax is currently implemented is wrong. Consider it a PoC. (3) A patch for devel/libffi to show how the first two parts can be implemented to obtain JIT. With this patch the libffi test suite passes with active PAX mprotect and ASLR. I find the availability of two separate mappings quite an acceptable compromise. It would allow us to easily improve security for most binaries by mapping the GOT read-only as far as possible by keeping the write mapping separate. But that's a separate topic. Joerg
Index: sys/uvm/uvm_map.c =================================================================== RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_map.c,v retrieving revision 1.342 diff -u -p -r1.342 uvm_map.c --- sys/uvm/uvm_map.c 1 Dec 2016 02:09:03 -0000 1.342 +++ sys/uvm/uvm_map.c 24 Feb 2017 01:40:35 -0000 @@ -2988,7 +2988,7 @@ uvm_map_protect(struct vm_map *map, vadd error = EINVAL; goto out; } - if ((new_prot & current->max_protection) != new_prot) { + if (!set_max && (new_prot & current->max_protection) != new_prot) { error = EACCES; goto out; } @@ -3021,10 +3021,8 @@ uvm_map_protect(struct vm_map *map, vadd UVM_MAP_CLIP_END(map, current, end); old_prot = current->protection; if (set_max) - current->protection = - (current->max_protection = new_prot) & old_prot; - else - current->protection = new_prot; + current->max_protection = new_prot; + current->protection = new_prot; /* * update physical map if necessary. worry about copy-on-write Index: sys/uvm/uvm_mmap.c =================================================================== RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_mmap.c,v retrieving revision 1.162 diff -u -p -r1.162 uvm_mmap.c --- sys/uvm/uvm_mmap.c 9 Aug 2016 12:17:04 -0000 1.162 +++ sys/uvm/uvm_mmap.c 24 Feb 2017 00:48:05 -0000 @@ -603,7 +603,7 @@ sys_mprotect(struct lwp *l, const struct struct proc *p = l->l_proc; vaddr_t addr; vsize_t size, pageoff; - vm_prot_t prot; + vm_prot_t oprot, prot, maxprot; int error; /* @@ -612,7 +612,12 @@ sys_mprotect(struct lwp *l, const struct addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); - prot = SCARG(uap, prot) & VM_PROT_ALL; + prot = oprot = SCARG(uap, prot) & VM_PROT_ALL; + + maxprot = prot; + PAX_MPROTECT_ADJUST(l, &prot, &maxprot); + if (prot != oprot) + return EPERM; /* * align the address to a page boundary and adjust the size accordingly. @@ -628,7 +633,7 @@ sys_mprotect(struct lwp *l, const struct return EINVAL; error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, - false); + true); return error; }
Index: sys/sys/mman.h =================================================================== RCS file: /home/joerg/repo/netbsd/src/sys/sys/mman.h,v retrieving revision 1.50 diff -u -p -r1.50 mman.h --- sys/sys/mman.h 1 Jun 2016 00:46:44 -0000 1.50 +++ sys/sys/mman.h 23 Feb 2017 22:52:43 -0000 @@ -82,6 +82,7 @@ typedef __off_t off_t; /* file offset /* * Other flags */ +#define MAP_REMAPDUP 0x0004 /* mremap only: duplicate the mapping */ #define MAP_FIXED 0x0010 /* map addr must be exactly as requested */ #define MAP_RENAME 0x0020 /* Sun: rename private pages to file */ #define MAP_NORESERVE 0x0040 /* Sun: don't reserve needed swap area */ Index: sys/uvm/uvm_mremap.c =================================================================== RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_mremap.c,v retrieving revision 1.18 diff -u -p -r1.18 uvm_mremap.c --- sys/uvm/uvm_mremap.c 26 Nov 2015 13:15:34 -0000 1.18 +++ sys/uvm/uvm_mremap.c 23 Feb 2017 22:56:00 -0000 @@ -120,6 +120,7 @@ uvm_mremap(struct vm_map *oldmap, vaddr_ vaddr_t align = 0; int error = 0; const bool fixed = (flags & MAP_FIXED) != 0; + const bool duplicate = (flags & MAP_REMAPDUP) != 0; if (fixed) { newva = *newvap; @@ -165,7 +166,8 @@ uvm_mremap(struct vm_map *oldmap, vaddr_ * check the easy cases first. */ - if ((!fixed || newva == oldva) && newmap == oldmap && + if (!duplicate && + (!fixed || newva == oldva) && newmap == oldmap && (align == 0 || (oldva & (align - 1)) == 0)) { vaddr_t va; @@ -240,7 +242,7 @@ extend: * remove original entries unless we did in-place extend. */ - if (oldva != newva || oldmap != newmap) { + if (!duplicate && (oldva != newva || oldmap != newmap)) { uvm_unmap(oldmap, oldva, oldva + oldsize); } done: @@ -278,7 +280,7 @@ sys_mremap(struct lwp *l, const struct s newva = (vaddr_t)SCARG(uap, new_address); newsize = (vsize_t)(SCARG(uap, new_size)); - if ((flags & ~(MAP_FIXED | MAP_ALIGNMENT_MASK)) != 0) { + if ((flags & ~(MAP_FIXED | MAP_REMAPDUP | MAP_ALIGNMENT_MASK)) != 0) { error = EINVAL; goto done; }
$NetBSD$ --- src/closures.c.orig 2017-02-24 01:00:47.888238256 +0000 +++ src/closures.c @@ -33,6 +33,12 @@ #include <ffi.h> #include <ffi_common.h> +#ifdef __NetBSD__ +#include <sys/param.h> +#endif + +#if __NetBSD_Version__ - 799005900 < 0 + #if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE # if __gnu_linux__ && !defined(__ANDROID__) /* This macro indicates it may be forbidden to map anonymous memory @@ -686,3 +692,57 @@ ffi_closure_free (void *ptr) # endif /* ! FFI_MMAP_EXEC_WRIT */ #endif /* FFI_CLOSURES */ + +#else +#include <sys/mman.h> + +#include <stddef.h> +#include <unistd.h> + +static const size_t overhead = + (sizeof(max_align_t) > sizeof(void *) + sizeof(size_t)) ? + sizeof(max_align_t) + : sizeof(void *) + sizeof(size_t); + +void * +ffi_closure_alloc (size_t size, void **code) +{ + if (!code) + return NULL; + + static size_t page_size; + if (!page_size) + page_size = sysconf(_SC_PAGESIZE); + size_t rounded_size = (size + overhead + page_size - 1) & ~(page_size - 1); + void *dataseg = mmap(NULL, rounded_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (dataseg == MAP_FAILED) + return NULL; + void *codeseg = mremap(dataseg, rounded_size, NULL, rounded_size, MAP_REMAPDUP); + if (codeseg == MAP_FAILED) { + munmap(dataseg, rounded_size); + return NULL; + } + if (mprotect(codeseg, rounded_size, PROT_READ | PROT_EXEC) == -1) { + munmap(codeseg, rounded_size); + munmap(dataseg, rounded_size); + return NULL; + } + memcpy(dataseg, &rounded_size, sizeof(rounded_size)); + memcpy((void *)((uintptr_t)dataseg + sizeof(size_t)), &codeseg, sizeof(void *)); + *code = (void *)((uintptr_t)codeseg + overhead); + return (void *)((uintptr_t)dataseg + overhead); +} + +void +ffi_closure_free (void *ptr) +{ + void *dataseg = (void *)((uintptr_t)ptr - overhead); + size_t rounded_size; + void *codeseg; + memcpy(&rounded_size, dataseg, sizeof(rounded_size)); + memcpy(&codeseg, (void *)((uintptr_t)dataseg + sizeof(size_t)), sizeof(void *)); + munmap(dataseg, rounded_size); + munmap(codeseg, rounded_size); +} + +#endif