> Date: Tue, 30 Sep 2014 23:12:10 +0200 (CEST) > From: Mark Kettenis <mark.kette...@xs4all.nl> > > The diff below intorduces a new flag for mmap(2) that creates mappings > that cannot fault. Normally, if you mmap a file, and your mapping is > larger than the mapped file, memory access to full pages beyond the > end of the file will fault. Depending on the OS you will get a > SIGSEGV or SIGBUS and if you don't catch those, you die. This is > especially nasty if you use file descriptor passing to share the file > descriptor with some other process and this other proces ftruncates > the file without telling you. > > The new xserver that matthieu@ just imported has the new xshm > extension which mmaps file descriptors passed by clients through file > descriptor passing. To protect itself from being trivially DOSed by a > malicious (or careless) client, it keeps a list of mappings and > installs a SIGBUS signal handler that checks whether the fault address > matches any of these mappings. In that case it mmaps a private > anonymous page on top of the faulting address and returns. Since > OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't > work for us, so I made sure matthieu@ disabled this functionality for > now. But the new xshm extension would actually be a nice thing to > have as it circumvents certain problems with the traditional xshm > extension that we have because of privsep. And file descriptor > passing is also being used for DRI3 which we may want to support one > day. Oh, and Wayland, which some people claim will replace X any day > now, heavily uses mapping file descriptors passed over sockets as > well. > > We could of course change the xserver code to also trap SIGSEGV. But > this workaround is rather ugly. So my idea is to make X use this new > flag and disable the stupid busfault code. > > The diff is remarkably simple. We already have the infrastructure in > place to replace mapped pages with anons to support MAP_PRIVATE and > copy-on-write. This diff simply leverages that infrastructure to > replace a page that can't be read from the underlying object by an > anonymous pages. Some open issues: > > * I need to check whether all combinations of flag actually make > sense. Should we only support __MAP_NOFAULT with non-anonymous > mappings? > > * Should we only fixup the fault for access beyond the end of the > mapped object (VM_PAGER_BAD) and still fault for actual IO erors > (VM_PAGER_ERROR)? > > * Should the flag be exported without the leading underscores since > we actually want to encourage its use? > > Thoughts?
Even though this diff has been committed, I'm still interested in what people think about the issues above. > Index: sys/mman.h > =================================================================== > RCS file: /cvs/src/sys/sys/mman.h,v > retrieving revision 1.26 > diff -u -p -r1.26 mman.h > --- sys/mman.h 10 Jul 2014 19:00:23 -0000 1.26 > +++ sys/mman.h 30 Sep 2014 20:34:42 -0000 > @@ -58,8 +58,9 @@ > #define __MAP_NOREPLACE 0x0800 /* fail if address not available */ > #define MAP_ANON 0x1000 /* allocated from memory, swap space */ > #define MAP_ANONYMOUS MAP_ANON /* alternate POSIX spelling */ > +#define __MAP_NOFAULT 0x2000 > > -#define MAP_FLAGMASK 0x1ff7 > +#define MAP_FLAGMASK 0x3ff7 > > #ifdef _KERNEL > /* > Index: uvm/uvm.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm.h,v > retrieving revision 1.56 > diff -u -p -r1.56 uvm.h > --- uvm/uvm.h 11 Jul 2014 16:35:40 -0000 1.56 > +++ uvm/uvm.h 30 Sep 2014 21:03:43 -0000 > @@ -90,7 +90,8 @@ struct uvm { > #define UVM_ET_SUBMAP 0x02 /* it is a vm_map submap */ > #define UVM_ET_COPYONWRITE 0x04 /* copy_on_write */ > #define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */ > -#define UVM_ET_HOLE 0x10 /* no backend */ > +#define UVM_ET_HOLE 0x10 /* no backend */ > +#define UVM_ET_NOFAULT 0x20 /* don't fault */ > #define UVM_ET_FREEMAPPED 0x80 /* map entry is on free list (DEBUG) */ > > #define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0) > @@ -98,6 +99,7 @@ struct uvm { > #define UVM_ET_ISCOPYONWRITE(E) (((E)->etype & UVM_ET_COPYONWRITE) != 0) > #define UVM_ET_ISNEEDSCOPY(E) (((E)->etype & UVM_ET_NEEDSCOPY) != 0) > #define UVM_ET_ISHOLE(E) (((E)->etype & UVM_ET_HOLE) != 0) > +#define UVM_ET_ISNOFAULT(E) (((E)->etype & UVM_ET_NOFAULT) != 0) > > #ifdef _KERNEL > > Index: uvm/uvm_extern.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v > retrieving revision 1.119 > diff -u -p -r1.119 uvm_extern.h > --- uvm/uvm_extern.h 11 Jul 2014 16:35:40 -0000 1.119 > +++ uvm/uvm_extern.h 30 Sep 2014 20:08:36 -0000 > @@ -148,14 +148,15 @@ typedef int vm_prot_t; > #define UVM_ADV_MASK 0x7 /* mask */ > > /* mapping flags */ > -#define UVM_FLAG_FIXED 0x010000 /* find space */ > -#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */ > -#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */ > -#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */ > -#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */ > -#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */ > -#define UVM_FLAG_HOLE 0x400000 /* no backend */ > -#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution > */ > +#define UVM_FLAG_FIXED 0x0010000 /* find space */ > +#define UVM_FLAG_OVERLAY 0x0020000 /* establish overlay */ > +#define UVM_FLAG_NOMERGE 0x0040000 /* don't merge map entries */ > +#define UVM_FLAG_COPYONW 0x0080000 /* set copy_on_write flag */ > +#define UVM_FLAG_AMAPPAD 0x0100000 /* for bss: pad amap to reduce malloc() */ > +#define UVM_FLAG_TRYLOCK 0x0200000 /* fail if we can not lock map */ > +#define UVM_FLAG_HOLE 0x0400000 /* no backend */ > +#define UVM_FLAG_QUERY 0x0800000 /* do everything, except actual execution > */ > +#define UVM_FLAG_NOFAULT 0x1000000 /* don't fault */ > > /* macros to extract info */ > #define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK) > Index: uvm/uvm_fault.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_fault.c,v > retrieving revision 1.77 > diff -u -p -r1.77 uvm_fault.c > --- uvm/uvm_fault.c 7 Sep 2014 08:17:44 -0000 1.77 > +++ uvm/uvm_fault.c 30 Sep 2014 19:44:28 -0000 > @@ -1114,7 +1114,11 @@ Case2: > goto ReFault; > } > > - return (EACCES); /* XXX i/o error */ > + if (!UVM_ET_ISNOFAULT(ufi.entry)) > + return (EACCES); /* XXX i/o error */ > + > + uobjpage = PGO_DONTCARE; > + promote = TRUE; > } > > /* re-verify the state of the world. */ > @@ -1132,7 +1136,7 @@ Case2: > } > > /* didn't get the lock? release the page and retry. */ > - if (locked == FALSE) { > + if (locked == FALSE && uobjpage != PGO_DONTCARE) { > uvm_lock_pageq(); > /* make sure it is in queues */ > uvm_pageactivate(uobjpage); > Index: uvm/uvm_map.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_map.c,v > retrieving revision 1.175 > diff -u -p -r1.175 uvm_map.c > --- uvm/uvm_map.c 14 Aug 2014 17:21:38 -0000 1.175 > +++ uvm/uvm_map.c 30 Sep 2014 20:07:53 -0000 > @@ -1142,6 +1142,8 @@ uvm_map(struct vm_map *map, vaddr_t *add > entry->etype |= UVM_ET_OBJ; > else if (flags & UVM_FLAG_HOLE) > entry->etype |= UVM_ET_HOLE; > + if (flags & UVM_FLAG_NOFAULT) > + entry->etype |= UVM_ET_NOFAULT; > if (flags & UVM_FLAG_COPYONW) { > entry->etype |= UVM_ET_COPYONWRITE; > if ((flags & UVM_FLAG_OVERLAY) == 0) > Index: uvm/uvm_mmap.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v > retrieving revision 1.98 > diff -u -p -r1.98 uvm_mmap.c > --- uvm/uvm_mmap.c 12 Jul 2014 18:44:01 -0000 1.98 > +++ uvm/uvm_mmap.c 30 Sep 2014 19:58:55 -0000 > @@ -1004,6 +1004,8 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vs > > if ((flags & MAP_SHARED) == 0) > uvmflag |= UVM_FLAG_COPYONW; > + if (flags & __MAP_NOFAULT) > + uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY); > } > > /* set up mapping flags */ > >