> Date: Tue, 30 Sep 2014 23:12:10 +0200 (CEST)
> From: Mark Kettenis <mark.kette...@xs4all.nl>
> 
> The diff below intorduces a new flag for mmap(2) that creates mappings
> that cannot fault.  Normally, if you mmap a file, and your mapping is
> larger than the mapped file, memory access to full pages beyond the
> end of the file will fault.  Depending on the OS you will get a
> SIGSEGV or SIGBUS and if you don't catch those, you die.  This is
> especially nasty if you use file descriptor passing to share the file
> descriptor with some other process and this other proces ftruncates
> the file without telling you.
> 
> The new xserver that matthieu@ just imported has the new xshm
> extension which mmaps file descriptors passed by clients through file
> descriptor passing.  To protect itself from being trivially DOSed by a
> malicious (or careless) client, it keeps a list of mappings and
> installs a SIGBUS signal handler that checks whether the fault address
> matches any of these mappings.  In that case it mmaps a private
> anonymous page on top of the faulting address and returns.  Since
> OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't
> work for us, so I made sure matthieu@ disabled this functionality for
> now.  But the new xshm extension would actually be a nice thing to
> have as it circumvents certain problems with the traditional xshm
> extension that we have because of privsep.  And file descriptor
> passing is also being used for DRI3 which we may want to support one
> day.  Oh, and Wayland, which some people claim will replace X any day
> now, heavily uses mapping file descriptors passed over sockets as
> well.
> 
> We could of course change the xserver code to also trap SIGSEGV.  But
> this workaround is rather ugly.  So my idea is to make X use this new
> flag and disable the stupid busfault code.
> 
> The diff is remarkably simple.  We already have the infrastructure in
> place to replace mapped pages with anons to support MAP_PRIVATE and
> copy-on-write.  This diff simply leverages that infrastructure to
> replace a page that can't be read from the underlying object by an
> anonymous pages.  Some open issues:
> 
>  * I need to check whether all combinations of flag actually make
>    sense.  Should we only support __MAP_NOFAULT with non-anonymous
>    mappings?
> 
>  * Should we only fixup the fault for access beyond the end of the
>    mapped object (VM_PAGER_BAD) and still fault for actual IO erors
>    (VM_PAGER_ERROR)?
> 
>  * Should the flag be exported without the leading underscores since
>    we actually want to encourage its use?
> 
> Thoughts?

Even though this diff has been committed, I'm still interested in what
people think about the issues above.

> Index: sys/mman.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mman.h,v
> retrieving revision 1.26
> diff -u -p -r1.26 mman.h
> --- sys/mman.h        10 Jul 2014 19:00:23 -0000      1.26
> +++ sys/mman.h        30 Sep 2014 20:34:42 -0000
> @@ -58,8 +58,9 @@
>  #define      __MAP_NOREPLACE 0x0800  /* fail if address not available */
>  #define      MAP_ANON        0x1000  /* allocated from memory, swap space */
>  #define      MAP_ANONYMOUS   MAP_ANON        /* alternate POSIX spelling */
> +#define      __MAP_NOFAULT   0x2000
>  
> -#define      MAP_FLAGMASK    0x1ff7
> +#define      MAP_FLAGMASK    0x3ff7
>  
>  #ifdef _KERNEL
>  /*
> Index: uvm/uvm.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm.h,v
> retrieving revision 1.56
> diff -u -p -r1.56 uvm.h
> --- uvm/uvm.h 11 Jul 2014 16:35:40 -0000      1.56
> +++ uvm/uvm.h 30 Sep 2014 21:03:43 -0000
> @@ -90,7 +90,8 @@ struct uvm {
>  #define UVM_ET_SUBMAP                0x02    /* it is a vm_map submap */
>  #define UVM_ET_COPYONWRITE   0x04    /* copy_on_write */
>  #define UVM_ET_NEEDSCOPY     0x08    /* needs_copy */
> -#define      UVM_ET_HOLE             0x10    /* no backend */
> +#define UVM_ET_HOLE          0x10    /* no backend */
> +#define UVM_ET_NOFAULT               0x20    /* don't fault */
>  #define UVM_ET_FREEMAPPED    0x80    /* map entry is on free list (DEBUG) */
>  
>  #define UVM_ET_ISOBJ(E)              (((E)->etype & UVM_ET_OBJ) != 0)
> @@ -98,6 +99,7 @@ struct uvm {
>  #define UVM_ET_ISCOPYONWRITE(E)      (((E)->etype & UVM_ET_COPYONWRITE) != 0)
>  #define UVM_ET_ISNEEDSCOPY(E)        (((E)->etype & UVM_ET_NEEDSCOPY) != 0)
>  #define UVM_ET_ISHOLE(E)     (((E)->etype & UVM_ET_HOLE) != 0)
> +#define UVM_ET_ISNOFAULT(E)  (((E)->etype & UVM_ET_NOFAULT) != 0)
>  
>  #ifdef _KERNEL
>  
> Index: uvm/uvm_extern.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.119
> diff -u -p -r1.119 uvm_extern.h
> --- uvm/uvm_extern.h  11 Jul 2014 16:35:40 -0000      1.119
> +++ uvm/uvm_extern.h  30 Sep 2014 20:08:36 -0000
> @@ -148,14 +148,15 @@ typedef int             vm_prot_t;
>  #define UVM_ADV_MASK 0x7     /* mask */
>  
>  /* mapping flags */
> -#define UVM_FLAG_FIXED   0x010000 /* find space */
> -#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
> -#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
> -#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
> -#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
> -#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
> -#define      UVM_FLAG_HOLE    0x400000 /* no backend */
> -#define UVM_FLAG_QUERY   0x800000 /* do everything, except actual execution 
> */
> +#define UVM_FLAG_FIXED   0x0010000 /* find space */
> +#define UVM_FLAG_OVERLAY 0x0020000 /* establish overlay */
> +#define UVM_FLAG_NOMERGE 0x0040000 /* don't merge map entries */
> +#define UVM_FLAG_COPYONW 0x0080000 /* set copy_on_write flag */
> +#define UVM_FLAG_AMAPPAD 0x0100000 /* for bss: pad amap to reduce malloc() */
> +#define UVM_FLAG_TRYLOCK 0x0200000 /* fail if we can not lock map */
> +#define UVM_FLAG_HOLE    0x0400000 /* no backend */
> +#define UVM_FLAG_QUERY   0x0800000 /* do everything, except actual execution 
> */
> +#define UVM_FLAG_NOFAULT 0x1000000 /* don't fault */
>  
>  /* macros to extract info */
>  #define UVM_PROTECTION(X)    ((X) & UVM_PROT_MASK)
> Index: uvm/uvm_fault.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 uvm_fault.c
> --- uvm/uvm_fault.c   7 Sep 2014 08:17:44 -0000       1.77
> +++ uvm/uvm_fault.c   30 Sep 2014 19:44:28 -0000
> @@ -1114,7 +1114,11 @@ Case2:
>                               goto ReFault;
>                       }
>  
> -                     return (EACCES); /* XXX i/o error */
> +                     if (!UVM_ET_ISNOFAULT(ufi.entry))
> +                             return (EACCES); /* XXX i/o error */
> +
> +                     uobjpage = PGO_DONTCARE;        
> +                     promote = TRUE;
>               }
>  
>               /* re-verify the state of the world.  */
> @@ -1132,7 +1136,7 @@ Case2:
>               }
>  
>               /* didn't get the lock?   release the page and retry. */
> -             if (locked == FALSE) {
> +             if (locked == FALSE && uobjpage != PGO_DONTCARE) {
>                       uvm_lock_pageq();
>                       /* make sure it is in queues */
>                       uvm_pageactivate(uobjpage);
> Index: uvm/uvm_map.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 uvm_map.c
> --- uvm/uvm_map.c     14 Aug 2014 17:21:38 -0000      1.175
> +++ uvm/uvm_map.c     30 Sep 2014 20:07:53 -0000
> @@ -1142,6 +1142,8 @@ uvm_map(struct vm_map *map, vaddr_t *add
>               entry->etype |= UVM_ET_OBJ;
>       else if (flags & UVM_FLAG_HOLE)
>               entry->etype |= UVM_ET_HOLE;
> +     if (flags & UVM_FLAG_NOFAULT)
> +             entry->etype |= UVM_ET_NOFAULT;
>       if (flags & UVM_FLAG_COPYONW) {
>               entry->etype |= UVM_ET_COPYONWRITE;
>               if ((flags & UVM_FLAG_OVERLAY) == 0)
> Index: uvm/uvm_mmap.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 uvm_mmap.c
> --- uvm/uvm_mmap.c    12 Jul 2014 18:44:01 -0000      1.98
> +++ uvm/uvm_mmap.c    30 Sep 2014 19:58:55 -0000
> @@ -1004,6 +1004,8 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vs
>  
>               if ((flags & MAP_SHARED) == 0)
>                       uvmflag |= UVM_FLAG_COPYONW;
> +             if (flags & __MAP_NOFAULT)
> +                     uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
>       }
>  
>       /* set up mapping flags */
> 
> 

Reply via email to