Author: markj
Date: Mon Feb 17 15:09:40 2020
New Revision: 358024
URL: https://svnweb.freebsd.org/changeset/base/358024

Log:
  Fix object locking races in swapoff(2).
  
  swap_pager_swapoff_object()'s goal is to allocate pages for all valid
  swap blocks belonging to the object, for which there is no resident
  page.  If the page corresponding to a block is already resident and
  valid, the block can simply be discarded.
  
  The existing implementation tries to minimize the number of I/Os used.
  For each cluster of swap blocks, it finds maximal runs of valid swap
  blocks not resident in memory, and valid resident pages.  During this
  processing, the object lock may be dropped in several places: when
  calling getpages, or when blocking on a busy page in
  vm_page_grab_pages().  While the lock is dropped, another thread may
  free swap blocks, causing getpages to page in stale data.
  
  Fix the problem following a suggestion from Jeff: use getpages'
  readahead capability to perform clustering rather than doing it
  ourselves.  The simplies the code a bit without reintroducing the old
  behaviour of performing one I/O per page.
  
  Reviewed by:  jeff
  Reported by:  dhw, gallatin
  Tested by:    pho
  MFC after:    2 weeks
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D23664

Modified:
  head/sys/vm/swap_pager.c

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c    Mon Feb 17 14:54:21 2020        (r358023)
+++ head/sys/vm/swap_pager.c    Mon Feb 17 15:09:40 2020        (r358024)
@@ -1188,8 +1188,8 @@ swap_pager_unswapped(vm_page_t m)
  *     The pages in "ma" must be busied and will remain busied upon return.
  */
 static int
-swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind,
-    int *rahead)
+swap_pager_getpages_locked(vm_object_t object, vm_page_t *ma, int count,
+    int *rbehind, int *rahead)
 {
        struct buf *bp;
        vm_page_t bm, mpred, msucc, p;
@@ -1197,7 +1197,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
        daddr_t blk;
        int i, maxahead, maxbehind, reqcount;
 
-       VM_OBJECT_WLOCK(object);
+       VM_OBJECT_ASSERT_WLOCKED(object);
        reqcount = count;
 
        KASSERT(object->type == OBJT_SWAP,
@@ -1352,6 +1352,15 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
         */
 }
 
+static int
+swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count,
+    int *rbehind, int *rahead)
+{
+
+       VM_OBJECT_WLOCK(object);
+       return (swap_pager_getpages_locked(object, ma, count, rbehind, rahead));
+}
+
 /*
  *     swap_pager_getpages_async():
  *
@@ -1712,72 +1721,11 @@ swp_pager_force_dirty(vm_page_t m)
 {
 
        vm_page_dirty(m);
-#ifdef INVARIANTS
-       if (!vm_page_wired(m) && m->a.queue == PQ_NONE)
-               panic("page %p is neither wired nor queued", m);
-#endif
-       vm_page_xunbusy(m);
        swap_pager_unswapped(m);
-}
-
-static void
-swp_pager_force_launder(vm_page_t m)
-{
-
-       vm_page_dirty(m);
        vm_page_launder(m);
-       vm_page_xunbusy(m);
-       swap_pager_unswapped(m);
 }
 
 /*
- * SWP_PAGER_FORCE_PAGEIN() - force swap blocks to be paged in
- *
- *     This routine dissociates pages starting at the given index within an
- *     object from their backing store, paging them in if they do not reside
- *     in memory.  Pages that are paged in are marked dirty and placed in the
- *     laundry queue.  Pages are marked dirty because they no longer have
- *     backing store.  They are placed in the laundry queue because they have
- *     not been accessed recently.  Otherwise, they would already reside in
- *     memory.
- */
-static void
-swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex, int npages)
-{
-       vm_page_t ma[npages];
-       int i, j;
-
-       KASSERT(npages > 0, ("%s: No pages", __func__));
-       KASSERT(npages <= MAXPHYS / PAGE_SIZE,
-           ("%s: Too many pages: %d", __func__, npages));
-       KASSERT(object->type == OBJT_SWAP,
-           ("%s: Object not swappable", __func__));
-       vm_object_pip_add(object, npages);
-       vm_page_grab_pages(object, pindex, VM_ALLOC_NORMAL, ma, npages);
-       for (i = j = 0;; i++) {
-               /* Count nonresident pages, to page-in all at once. */
-               if (i < npages && ma[i]->valid != VM_PAGE_BITS_ALL)
-                       continue;
-               if (j < i) {
-                       VM_OBJECT_WUNLOCK(object);
-                       /* Page-in nonresident pages. Mark for laundering. */
-                       if (swap_pager_getpages(object, &ma[j], i - j, NULL,
-                           NULL) != VM_PAGER_OK)
-                               panic("%s: read from swap failed", __func__);
-                       VM_OBJECT_WLOCK(object);
-                       do {
-                               swp_pager_force_launder(ma[j]);
-                       } while (++j < i);
-               }
-               if (i == npages)
-                       break;
-               /* Mark dirty a resident page. */
-               swp_pager_force_dirty(ma[j++]);
-       }
-       vm_object_pip_wakeupn(object, npages);
-}
-
-/*
  *     swap_pager_swapoff_object:
  *
  *     Page in all of the pages that have been paged out for an object
@@ -1787,62 +1735,95 @@ static void
 swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
 {
        struct swblk *sb;
-       vm_pindex_t pi, s_pindex;
-       daddr_t blk, n_blks, s_blk;
-       int i;
+       vm_page_t m;
+       vm_pindex_t pi;
+       daddr_t blk;
+       int i, nv, rahead, rv;
 
        KASSERT(object->type == OBJT_SWAP,
            ("%s: Object not swappable", __func__));
-       n_blks = 0;
+
        for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
            &object->un_pager.swp.swp_blks, pi)) != NULL; ) {
+               if ((object->flags & OBJ_DEAD) != 0) {
+                       /*
+                        * Make sure that pending writes finish before
+                        * returning.
+                        */
+                       vm_object_pip_wait(object, "swpoff");
+                       swp_pager_meta_free_all(object);
+                       break;
+               }
                for (i = 0; i < SWAP_META_PAGES; i++) {
-                       blk = sb->d[i];
-                       if (!swp_pager_isondev(blk, sp))
-                               blk = SWAPBLK_NONE;
-
                        /*
-                        * If there are no blocks/pages accumulated, start a new
-                        * accumulation here.
+                        * Count the number of contiguous valid blocks.
                         */
-                       if (n_blks == 0) {
-                               if (blk != SWAPBLK_NONE) {
-                                       s_blk = blk;
-                                       s_pindex = sb->p + i;
-                                       n_blks = 1;
-                               }
-                               continue;
+                       for (nv = 0; nv < SWAP_META_PAGES - i; nv++) {
+                               blk = sb->d[i + nv];
+                               if (!swp_pager_isondev(blk, sp) ||
+                                   blk == SWAPBLK_NONE)
+                                       break;
                        }
+                       if (nv == 0)
+                               continue;
 
                        /*
-                        * If the accumulation can be extended without breaking
-                        * the sequence of consecutive blocks and pages that
-                        * swp_pager_force_pagein() depends on, do so.
+                        * Look for a page corresponding to the first
+                        * valid block and ensure that any pending paging
+                        * operations on it are complete.  If the page is valid,
+                        * mark it dirty and free the swap block.  Try to batch
+                        * this operation since it may cause sp to be freed,
+                        * meaning that we must restart the scan.  Avoid busying
+                        * valid pages since we may block forever on kernel
+                        * stack pages.
                         */
-                       if (n_blks < MAXPHYS / PAGE_SIZE &&
-                           s_blk + n_blks == blk &&
-                           s_pindex + n_blks == sb->p + i) {
-                               ++n_blks;
-                               continue;
+                       m = vm_page_lookup(object, sb->p + i);
+                       if (m == NULL) {
+                               m = vm_page_alloc(object, sb->p + i,
+                                   VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
+                               if (m == NULL)
+                                       break;
+                       } else {
+                               if ((m->oflags & VPO_SWAPINPROG) != 0) {
+                                       m->oflags |= VPO_SWAPSLEEP;
+                                       VM_OBJECT_SLEEP(object, &object->handle,
+                                           PSWP, "swpoff", 0);
+                                       break;
+                               }
+                               if (vm_page_all_valid(m)) {
+                                       do {
+                                               swp_pager_force_dirty(m);
+                                       } while (--nv > 0 &&
+                                           (m = vm_page_next(m)) != NULL &&
+                                           vm_page_all_valid(m) &&
+                                           (m->oflags & VPO_SWAPINPROG) == 0);
+                                       break;
+                               }
+                               if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL))
+                                       break;
                        }
 
+                       vm_object_pip_add(object, 1);
+                       rahead = SWAP_META_PAGES;
+                       rv = swap_pager_getpages_locked(object, &m, 1, NULL,
+                           &rahead);
+                       if (rv != VM_PAGER_OK)
+                               panic("%s: read from swap failed: %d",
+                                   __func__, rv);
+                       vm_object_pip_wakeupn(object, 1);
+                       VM_OBJECT_WLOCK(object);
+                       vm_page_xunbusy(m);
+
                        /*
-                        * The sequence of consecutive blocks and pages cannot
-                        * be extended, so page them all in here.  Then,
-                        * because doing so involves releasing and reacquiring
-                        * a lock that protects the swap block pctrie, do not
-                        * rely on the current swap block.  Break this loop and
-                        * re-fetch the same pindex from the pctrie again.
+                        * The object lock was dropped so we must restart the
+                        * scan of this swap block.  Pages paged in during this
+                        * iteration will be marked dirty in a future iteration.
                         */
-                       swp_pager_force_pagein(object, s_pindex, n_blks);
-                       n_blks = 0;
                        break;
                }
                if (i == SWAP_META_PAGES)
                        pi = sb->p + SWAP_META_PAGES;
        }
-       if (n_blks > 0)
-               swp_pager_force_pagein(object, s_pindex, n_blks);
 }
 
 /*
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to