The diff below intorduces a new flag for mmap(2) that creates mappings
that cannot fault.  Normally, if you mmap a file, and your mapping is
larger than the mapped file, memory access to full pages beyond the
end of the file will fault.  Depending on the OS you will get a
SIGSEGV or SIGBUS and if you don't catch those, you die.  This is
especially nasty if you use file descriptor passing to share the file
descriptor with some other process and this other proces ftruncates
the file without telling you.

The new xserver that matthieu@ just imported has the new xshm
extension which mmaps file descriptors passed by clients through file
descriptor passing.  To protect itself from being trivially DOSed by a
malicious (or careless) client, it keeps a list of mappings and
installs a SIGBUS signal handler that checks whether the fault address
matches any of these mappings.  In that case it mmaps a private
anonymous page on top of the faulting address and returns.  Since
OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't
work for us, so I made sure matthieu@ disabled this functionality for
now.  But the new xshm extension would actually be a nice thing to
have as it circumvents certain problems with the traditional xshm
extension that we have because of privsep.  And file descriptor
passing is also being used for DRI3 which we may want to support one
day.  Oh, and Wayland, which some people claim will replace X any day
now, heavily uses mapping file descriptors passed over sockets as
well.

We could of course change the xserver code to also trap SIGSEGV.  But
this workaround is rather ugly.  So my idea is to make X use this new
flag and disable the stupid busfault code.

The diff is remarkably simple.  We already have the infrastructure in
place to replace mapped pages with anons to support MAP_PRIVATE and
copy-on-write.  This diff simply leverages that infrastructure to
replace a page that can't be read from the underlying object by an
anonymous pages.  Some open issues:

 * I need to check whether all combinations of flag actually make
   sense.  Should we only support __MAP_NOFAULT with non-anonymous
   mappings?

 * Should we only fixup the fault for access beyond the end of the
   mapped object (VM_PAGER_BAD) and still fault for actual IO erors
   (VM_PAGER_ERROR)?

 * Should the flag be exported without the leading underscores since
   we actually want to encourage its use?

Thoughts?


Index: sys/mman.h
===================================================================
RCS file: /cvs/src/sys/sys/mman.h,v
retrieving revision 1.26
diff -u -p -r1.26 mman.h
--- sys/mman.h  10 Jul 2014 19:00:23 -0000      1.26
+++ sys/mman.h  30 Sep 2014 20:34:42 -0000
@@ -58,8 +58,9 @@
 #define        __MAP_NOREPLACE 0x0800  /* fail if address not available */
 #define        MAP_ANON        0x1000  /* allocated from memory, swap space */
 #define        MAP_ANONYMOUS   MAP_ANON        /* alternate POSIX spelling */
+#define        __MAP_NOFAULT   0x2000
 
-#define        MAP_FLAGMASK    0x1ff7
+#define        MAP_FLAGMASK    0x3ff7
 
 #ifdef _KERNEL
 /*
Index: uvm/uvm.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm.h,v
retrieving revision 1.56
diff -u -p -r1.56 uvm.h
--- uvm/uvm.h   11 Jul 2014 16:35:40 -0000      1.56
+++ uvm/uvm.h   30 Sep 2014 21:03:43 -0000
@@ -90,7 +90,8 @@ struct uvm {
 #define UVM_ET_SUBMAP          0x02    /* it is a vm_map submap */
 #define UVM_ET_COPYONWRITE     0x04    /* copy_on_write */
 #define UVM_ET_NEEDSCOPY       0x08    /* needs_copy */
-#define        UVM_ET_HOLE             0x10    /* no backend */
+#define UVM_ET_HOLE            0x10    /* no backend */
+#define UVM_ET_NOFAULT         0x20    /* don't fault */
 #define UVM_ET_FREEMAPPED      0x80    /* map entry is on free list (DEBUG) */
 
 #define UVM_ET_ISOBJ(E)                (((E)->etype & UVM_ET_OBJ) != 0)
@@ -98,6 +99,7 @@ struct uvm {
 #define UVM_ET_ISCOPYONWRITE(E)        (((E)->etype & UVM_ET_COPYONWRITE) != 0)
 #define UVM_ET_ISNEEDSCOPY(E)  (((E)->etype & UVM_ET_NEEDSCOPY) != 0)
 #define UVM_ET_ISHOLE(E)       (((E)->etype & UVM_ET_HOLE) != 0)
+#define UVM_ET_ISNOFAULT(E)    (((E)->etype & UVM_ET_NOFAULT) != 0)
 
 #ifdef _KERNEL
 
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h    11 Jul 2014 16:35:40 -0000      1.119
+++ uvm/uvm_extern.h    30 Sep 2014 20:08:36 -0000
@@ -148,14 +148,15 @@ typedef int               vm_prot_t;
 #define UVM_ADV_MASK   0x7     /* mask */
 
 /* mapping flags */
-#define UVM_FLAG_FIXED   0x010000 /* find space */
-#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
-#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
-#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
-#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
-#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
-#define        UVM_FLAG_HOLE    0x400000 /* no backend */
-#define UVM_FLAG_QUERY   0x800000 /* do everything, except actual execution */
+#define UVM_FLAG_FIXED   0x0010000 /* find space */
+#define UVM_FLAG_OVERLAY 0x0020000 /* establish overlay */
+#define UVM_FLAG_NOMERGE 0x0040000 /* don't merge map entries */
+#define UVM_FLAG_COPYONW 0x0080000 /* set copy_on_write flag */
+#define UVM_FLAG_AMAPPAD 0x0100000 /* for bss: pad amap to reduce malloc() */
+#define UVM_FLAG_TRYLOCK 0x0200000 /* fail if we can not lock map */
+#define UVM_FLAG_HOLE    0x0400000 /* no backend */
+#define UVM_FLAG_QUERY   0x0800000 /* do everything, except actual execution */
+#define UVM_FLAG_NOFAULT 0x1000000 /* don't fault */
 
 /* macros to extract info */
 #define UVM_PROTECTION(X)      ((X) & UVM_PROT_MASK)
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.77
diff -u -p -r1.77 uvm_fault.c
--- uvm/uvm_fault.c     7 Sep 2014 08:17:44 -0000       1.77
+++ uvm/uvm_fault.c     30 Sep 2014 19:44:28 -0000
@@ -1114,7 +1114,11 @@ Case2:
                                goto ReFault;
                        }
 
-                       return (EACCES); /* XXX i/o error */
+                       if (!UVM_ET_ISNOFAULT(ufi.entry))
+                               return (EACCES); /* XXX i/o error */
+
+                       uobjpage = PGO_DONTCARE;        
+                       promote = TRUE;
                }
 
                /* re-verify the state of the world.  */
@@ -1132,7 +1136,7 @@ Case2:
                }
 
                /* didn't get the lock?   release the page and retry. */
-               if (locked == FALSE) {
+               if (locked == FALSE && uobjpage != PGO_DONTCARE) {
                        uvm_lock_pageq();
                        /* make sure it is in queues */
                        uvm_pageactivate(uobjpage);
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.175
diff -u -p -r1.175 uvm_map.c
--- uvm/uvm_map.c       14 Aug 2014 17:21:38 -0000      1.175
+++ uvm/uvm_map.c       30 Sep 2014 20:07:53 -0000
@@ -1142,6 +1142,8 @@ uvm_map(struct vm_map *map, vaddr_t *add
                entry->etype |= UVM_ET_OBJ;
        else if (flags & UVM_FLAG_HOLE)
                entry->etype |= UVM_ET_HOLE;
+       if (flags & UVM_FLAG_NOFAULT)
+               entry->etype |= UVM_ET_NOFAULT;
        if (flags & UVM_FLAG_COPYONW) {
                entry->etype |= UVM_ET_COPYONWRITE;
                if ((flags & UVM_FLAG_OVERLAY) == 0)
Index: uvm/uvm_mmap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.98
diff -u -p -r1.98 uvm_mmap.c
--- uvm/uvm_mmap.c      12 Jul 2014 18:44:01 -0000      1.98
+++ uvm/uvm_mmap.c      30 Sep 2014 19:58:55 -0000
@@ -1004,6 +1004,8 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vs
 
                if ((flags & MAP_SHARED) == 0)
                        uvmflag |= UVM_FLAG_COPYONW;
+               if (flags & __MAP_NOFAULT)
+                       uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
        }
 
        /* set up mapping flags */

Reply via email to