Out of interest, how would this exhibit itself?

On 14/06/2018 20:41, Konstantin Belousov wrote:
Author: kib
Date: Thu Jun 14 19:41:02 2018
New Revision: 335171
URL: https://svnweb.freebsd.org/changeset/base/335171

Log:
   Handle the race between fork/vm_object_split() and faults.
If fault started before vmspace_fork() locked the map, and then during
   fork, vm_map_copy_entry()->vm_object_split() is executed, it is
   possible that the fault instantiate the page into the original object
   when the page was already copied into the new object (see
   vm_map_split() for the orig/new objects terminology). This can happen
   if split found a busy page (e.g. from the fault) and slept dropping
   the objects lock, which allows the swap pager to instantiate
   read-behind pages for the fault.  Then the restart of the scan can see
   a page in the scanned range, where it was already copied to the upper
   object.
Fix it by instantiating the read-ahead pages before
   swap_pager_getpages() method drops the lock to allocate pbuf.  The
   object scan would see the whole range prefilled with the busy pages
   and not proceed the range.
Note that vm_fault rechecks the map generation count after the object
   unlock, so that it restarts the handling if raced with split, and
   re-lookups the right page from the upper object.
In collaboration with: alc
   Tested by:   pho
   Sponsored by:        The FreeBSD Foundation
   MFC after:   1 week

Modified:
   head/sys/vm/swap_pager.c

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c    Thu Jun 14 19:01:40 2018        (r335170)
+++ head/sys/vm/swap_pager.c    Thu Jun 14 19:41:02 2018        (r335171)
@@ -1096,21 +1096,24 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
      int *rahead)
  {
        struct buf *bp;
-       vm_page_t mpred, msucc, p;
+       vm_page_t bm, mpred, msucc, p;
        vm_pindex_t pindex;
        daddr_t blk;
-       int i, j, maxahead, maxbehind, reqcount, shift;
+       int i, maxahead, maxbehind, reqcount;
reqcount = count; - VM_OBJECT_WUNLOCK(object);
-       bp = getpbuf(&nsw_rcount);
-       VM_OBJECT_WLOCK(object);
-
-       if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) {
-               relpbuf(bp, &nsw_rcount);
+       /*
+        * Determine the final number of read-behind pages and
+        * allocate them BEFORE releasing the object lock.  Otherwise,
+        * there can be a problematic race with vm_object_split().
+        * Specifically, vm_object_split() might first transfer pages
+        * that precede ma[0] in the current object to a new object,
+        * and then this function incorrectly recreates those pages as
+        * read-behind pages in the current object.
+        */
+       if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead))
                return (VM_PAGER_FAIL);
-       }
/*
         * Clip the readahead and readbehind ranges to exclude resident pages.
@@ -1132,35 +1135,31 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
                        *rbehind = pindex - mpred->pindex - 1;
        }
+ bm = ma[0];
+       for (i = 0; i < count; i++)
+               ma[i]->oflags |= VPO_SWAPINPROG;
+
        /*
         * Allocate readahead and readbehind pages.
         */
-       shift = rbehind != NULL ? *rbehind : 0;
-       if (shift != 0) {
-               for (i = 1; i <= shift; i++) {
+       if (rbehind != NULL) {
+               for (i = 1; i <= *rbehind; i++) {
                        p = vm_page_alloc(object, ma[0]->pindex - i,
                            VM_ALLOC_NORMAL);
-                       if (p == NULL) {
-                               /* Shift allocated pages to the left. */
-                               for (j = 0; j < i - 1; j++)
-                                       bp->b_pages[j] =
-                                           bp->b_pages[j + shift - i + 1];
+                       if (p == NULL)
                                break;
-                       }
-                       bp->b_pages[shift - i] = p;
+                       p->oflags |= VPO_SWAPINPROG;
+                       bm = p;
                }
-               shift = i - 1;
-               *rbehind = shift;
+               *rbehind = i - 1;
        }
-       for (i = 0; i < reqcount; i++)
-               bp->b_pages[i + shift] = ma[i];
        if (rahead != NULL) {
                for (i = 0; i < *rahead; i++) {
                        p = vm_page_alloc(object,
                            ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL);
                        if (p == NULL)
                                break;
-                       bp->b_pages[shift + reqcount + i] = p;
+                       p->oflags |= VPO_SWAPINPROG;
                }
                *rahead = i;
        }
@@ -1171,15 +1170,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
vm_object_pip_add(object, count); - for (i = 0; i < count; i++)
-               bp->b_pages[i]->oflags |= VPO_SWAPINPROG;
-
-       pindex = bp->b_pages[0]->pindex;
+       pindex = bm->pindex;
        blk = swp_pager_meta_ctl(object, pindex, 0);
        KASSERT(blk != SWAPBLK_NONE,
            ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
VM_OBJECT_WUNLOCK(object);
+       bp = getpbuf(&nsw_rcount);
+       /* Pages cannot leave the object while busy. */
+       for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
+               MPASS(p->pindex == bm->pindex + i);
+               bp->b_pages[i] = p;
+       }
bp->b_flags |= B_PAGING;
        bp->b_iocmd = BIO_READ;


_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to