Author: alc
Date: Mon Dec 20 22:49:31 2010
New Revision: 216604
URL: http://svn.freebsd.org/changeset/base/216604

Log:
  Introduce vm_fault_hold() and use it to (1) eliminate a long-standing race
  condition in proc_rwmem() and to (2) simplify the implementation of the
  cxgb driver's vm_fault_hold_user_pages().  Specifically, in proc_rwmem()
  the requested read or write could fail because the targeted page could be
  reclaimed between the calls to vm_fault() and vm_page_hold().
  
  In collaboration with:        kib@
  MFC after:    6 weeks

Modified:
  head/sys/dev/cxgb/ulp/tom/cxgb_vm.c
  head/sys/kern/sys_process.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_map.h

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_vm.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_vm.c Mon Dec 20 21:12:18 2010        
(r216603)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_vm.c Mon Dec 20 22:49:31 2010        
(r216604)
@@ -66,7 +66,7 @@ vm_fault_hold_user_pages(vm_map_t map, v
     int count, vm_prot_t prot)
 {
        vm_offset_t end, va;
-       int faults, rv;
+       int faults;
        pmap_t pmap;
        vm_page_t m, *pages;
        
@@ -122,20 +122,11 @@ vm_fault_hold_user_pages(vm_map_t map, v
        /*
         * Pages either have insufficient permissions or are not present
         * trigger a fault where neccessary
-        * 
         */
-       rv = 0;
        for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) {
-               /*
-                * Account for a very narrow race where the page may be
-                * taken away from us before it is held
-                */
-               while (*pages == NULL) {
-                       rv = vm_fault(map, va, prot, VM_FAULT_NORMAL);
-                       if (rv) 
-                               goto error;
-                       *pages = pmap_extract_and_hold(pmap, va, prot);
-               }
+               if (*pages == NULL && vm_fault_hold(map, va, prot,
+                   VM_FAULT_NORMAL, pages) != KERN_SUCCESS)
+                       goto error;
        }
        return (0);
 error: 

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c Mon Dec 20 21:12:18 2010        (r216603)
+++ head/sys/kern/sys_process.c Mon Dec 20 22:49:31 2010        (r216604)
@@ -237,10 +237,9 @@ int
 proc_rwmem(struct proc *p, struct uio *uio)
 {
        vm_map_t map;
-       vm_object_t backing_object, object;
        vm_offset_t pageno;             /* page number */
        vm_prot_t reqprot;
-       int error, writing;
+       int error, fault_flags, page_offset, writing;
 
        /*
         * Assert that someone has locked this vmspace.  (Should be
@@ -255,26 +254,24 @@ proc_rwmem(struct proc *p, struct uio *u
         */
        map = &p->p_vmspace->vm_map;
 
+       /*
+        * If we are writing, then we request vm_fault() to create a private
+        * copy of each page.  Since these copies will not be writeable by the
+        * process, we must explicity request that they be dirtied.
+        */
        writing = uio->uio_rw == UIO_WRITE;
        reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
+       fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
 
        /*
         * Only map in one page at a time.  We don't have to, but it
         * makes things easier.  This way is trivial - right?
         */
        do {
-               vm_map_t tmap;
                vm_offset_t uva;
-               int page_offset;                /* offset into page */
-               vm_map_entry_t out_entry;
-               vm_prot_t out_prot;
-               boolean_t wired;
-               vm_pindex_t pindex;
                u_int len;
                vm_page_t m;
 
-               object = NULL;
-
                uva = (vm_offset_t)uio->uio_offset;
 
                /*
@@ -289,10 +286,10 @@ proc_rwmem(struct proc *p, struct uio *u
                len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
                /*
-                * Fault the page on behalf of the process
+                * Fault and hold the page on behalf of the process.
                 */
-               error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
-               if (error) {
+               error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
+               if (error != KERN_SUCCESS) {
                        if (error == KERN_RESOURCE_SHORTAGE)
                                error = ENOMEM;
                        else
@@ -301,61 +298,18 @@ proc_rwmem(struct proc *p, struct uio *u
                }
 
                /*
-                * Now we need to get the page.  out_entry and wired
-                * aren't used.  One would think the vm code
-                * would be a *bit* nicer...  We use tmap because
-                * vm_map_lookup() can change the map argument.
-                */
-               tmap = map;
-               error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry,
-                   &object, &pindex, &out_prot, &wired);
-               if (error) {
-                       error = EFAULT;
-                       break;
-               }
-               VM_OBJECT_LOCK(object);
-               while ((m = vm_page_lookup(object, pindex)) == NULL &&
-                   !writing &&
-                   (backing_object = object->backing_object) != NULL) {
-                       /*
-                        * Allow fallback to backing objects if we are reading.
-                        */
-                       VM_OBJECT_LOCK(backing_object);
-                       pindex += OFF_TO_IDX(object->backing_object_offset);
-                       VM_OBJECT_UNLOCK(object);
-                       object = backing_object;
-               }
-               if (writing && m != NULL) {
-                       vm_page_dirty(m);
-                       vm_pager_page_unswapped(m);
-               }
-               VM_OBJECT_UNLOCK(object);
-               if (m == NULL) {
-                       vm_map_lookup_done(tmap, out_entry);
-                       error = EFAULT;
-                       break;
-               }
-
-               /*
-                * Hold the page in memory.
-                */
-               vm_page_lock(m);
-               vm_page_hold(m);
-               vm_page_unlock(m);
-
-               /*
-                * We're done with tmap now.
-                */
-               vm_map_lookup_done(tmap, out_entry);
-
-               /*
                 * Now do the i/o move.
                 */
                error = uiomove_fromphys(&m, page_offset, len, uio);
 
                /* Make the I-cache coherent for breakpoints. */
-               if (!error && writing && (out_prot & VM_PROT_EXECUTE))
-                       vm_sync_icache(map, uva, len);
+               if (writing && error == 0) {
+                       vm_map_lock_read(map);
+                       if (vm_map_check_protection(map, pageno, pageno +
+                           PAGE_SIZE, VM_PROT_EXECUTE))
+                               vm_sync_icache(map, uva, len);
+                       vm_map_unlock_read(map);
+               }
 
                /*
                 * Release the page.

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h     Mon Dec 20 21:12:18 2010        (r216603)
+++ head/sys/vm/vm_extern.h     Mon Dec 20 22:49:31 2010        (r216604)
@@ -61,6 +61,8 @@ int useracc(void *, int, int);
 int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
 void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
     vm_ooffset_t *);
+int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, vm_page_t *m_hold);
 void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
 int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
 int vm_forkproc(struct thread *, struct proc *, struct thread *, struct 
vmspace *, int);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Mon Dec 20 21:12:18 2010        (r216603)
+++ head/sys/vm/vm_fault.c      Mon Dec 20 22:49:31 2010        (r216604)
@@ -201,13 +201,20 @@ unlock_and_deallocate(struct faultstate 
  *     KERN_SUCCESS is returned if the page fault is handled; otherwise,
  *     a standard error specifying why the fault is fatal is returned.
  *
- *
  *     The map in question must be referenced, and remains so.
  *     Caller may hold no locks.
  */
 int
 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
-        int fault_flags)
+    int fault_flags)
+{
+
+       return (vm_fault_hold(map, vaddr, fault_type, fault_flags, NULL));
+}
+
+int
+vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, vm_page_t *m_hold)
 {
        vm_prot_t prot;
        int is_first_object_locked, result;
@@ -880,7 +887,8 @@ vnode_locked:
        if (hardfault)
                fs.entry->lastr = fs.pindex + faultcount - behind;
 
-       if (prot & VM_PROT_WRITE) {
+       if ((prot & VM_PROT_WRITE) != 0 ||
+           (fault_flags & VM_FAULT_DIRTY) != 0) {
                vm_object_set_writeable_dirty(fs.object);
 
                /*
@@ -906,8 +914,9 @@ vnode_locked:
                 * Also tell the backing pager, if any, that it should remove
                 * any swap backing since the page is now dirty.
                 */
-               if ((fault_type & VM_PROT_WRITE) != 0 &&
-                   (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) {
+               if (((fault_type & VM_PROT_WRITE) != 0 &&
+                   (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
+                   (fault_flags & VM_FAULT_DIRTY) != 0) {
                        vm_page_dirty(fs.m);
                        vm_pager_page_unswapped(fs.m);
                }
@@ -949,6 +958,10 @@ vnode_locked:
                        vm_page_unwire(fs.m, 1);
        } else
                vm_page_activate(fs.m);
+       if (m_hold != NULL) {
+               *m_hold = fs.m;
+               vm_page_hold(fs.m);
+       }
        vm_page_unlock(fs.m);
        vm_page_wakeup(fs.m);
 

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h        Mon Dec 20 21:12:18 2010        (r216603)
+++ head/sys/vm/vm_map.h        Mon Dec 20 22:49:31 2010        (r216604)
@@ -325,6 +325,7 @@ long vmspace_wired_count(struct vmspace 
  */
 #define VM_FAULT_NORMAL 0              /* Nothing special */
 #define VM_FAULT_CHANGE_WIRING 1       /* Change the wiring as appropriate */
+#define        VM_FAULT_DIRTY 2                /* Dirty the page; use 
w/VM_PROT_COPY */
 
 /*
  * The following "find_space" options are supported by vm_map_find()
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to