[PATCH 1/4] swap: Split up try_to_unuse()

Richard Purdie Mon, 08 Jan 2007 05:51:06 -0800

Split the core of try_to_unuse() into a new function,
try_to_unuse_entry().


These patches are a based on a patch by Nick Piggin and some of my own
patches/bugfixes as discussed on LKML.

Signed-off-by: Richard Purdie <[EMAIL PROTECTED]>

---
 mm/swapfile.c |  321 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 170 insertions(+), 151 deletions(-)

Index: git/mm/swapfile.c
===================================================================
--- git.orig/mm/swapfile.c      2007-01-07 21:39:29.000000000 +0000
+++ git/mm/swapfile.c   2007-01-08 11:39:27.000000000 +0000
@@ -638,6 +638,174 @@ static int unuse_mm(struct mm_struct *mm
        return 0;
 }
 
+static int try_to_unuse_entry(swp_entry_t entry, unsigned short *swap_map,
+                       struct page *page, struct mm_struct **start_mm_p)
+{
+       struct mm_struct *start_mm;
+       unsigned short swcount;
+       int retval = 0;
+       int shmem;
+
+       if (start_mm_p)
+               start_mm = *start_mm_p;
+       else {
+               start_mm = &init_mm;
+               atomic_inc(&init_mm.mm_users);
+       }
+
+       /*
+        * Don't hold on to start_mm if it looks like exiting.
+        */
+       if (atomic_read(&start_mm->mm_users) == 1) {
+               mmput(start_mm);
+               start_mm = &init_mm;
+               atomic_inc(&init_mm.mm_users);
+       }
+
+       /*
+        * Wait for and lock page.  When do_swap_page races with try_to_unuse,
+        * do_swap_page can handle the fault much faster than try_to_unuse can
+        * locate the entry.  This apparently redundant "wait_on_page_locked"
+        * lets try_to_unuse defer to do_swap_page in such a case - in some
+        * tests, do_swap_page and try_to_unuse repeatedly compete.
+        */
+
+       wait_on_page_locked(page);
+       wait_on_page_writeback(page);
+       lock_page(page);
+       wait_on_page_writeback(page);
+retry:
+
+       /*
+        * Remove all references to entry.
+        * Whenever we reach init_mm, there's no address space to search, but
+        * use it as a reminder to search shmem.
+        */
+       shmem = 0;
+       swcount = *swap_map;
+       if (swcount > 1) {
+               if (start_mm == &init_mm)
+                       shmem = shmem_unuse(entry, page);
+               else
+                       retval = unuse_mm(start_mm, entry, page);
+       }
+       if (*swap_map > 1) {
+               int set_start_mm = (*swap_map >= swcount);
+               struct list_head *p = &start_mm->mmlist;
+               struct mm_struct *new_start_mm = start_mm;
+               struct mm_struct *prev_mm = start_mm;
+               struct mm_struct *mm;
+
+               atomic_inc(&new_start_mm->mm_users);
+               atomic_inc(&prev_mm->mm_users);
+               spin_lock(&mmlist_lock);
+               while (*swap_map > 1 && !retval &&
+                               (p = p->next) != &start_mm->mmlist) {
+                       mm = list_entry(p, struct mm_struct, mmlist);
+                       if (!atomic_inc_not_zero(&mm->mm_users))
+                               continue;
+                       spin_unlock(&mmlist_lock);
+                       mmput(prev_mm);
+                       prev_mm = mm;
+
+                       cond_resched();
+
+                       swcount = *swap_map;
+                       if (swcount <= 1)
+                               ;
+                       else if (mm == &init_mm) {
+                               set_start_mm = 1;
+                               shmem = shmem_unuse(entry, page);
+                       } else
+                               retval = unuse_mm(mm, entry, page);
+                       if (set_start_mm && *swap_map < swcount) {
+                               mmput(new_start_mm);
+                               atomic_inc(&mm->mm_users);
+                               new_start_mm = mm;
+                               set_start_mm = 0;
+                       }
+                       spin_lock(&mmlist_lock);
+               }
+               spin_unlock(&mmlist_lock);
+               mmput(prev_mm);
+               mmput(start_mm);
+               start_mm = new_start_mm;
+       }
+       if (retval)
+               goto unuse_err;
+
+       /*
+        * How could swap count reach 0x7fff when the maximum pid is 0x7fff,
+        * and there's no way to repeat a swap page within an mm (except in
+        * shmem, where it's the shared object which takes the reference
+        * count)?  We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
+        *
+        * If that's wrong, then we should worry more about exit_mmap() and
+        * do_munmap() cases described above: we might be resetting
+        * SWAP_MAP_MAX too early here.  We know "Undead"s can happen, they're
+        * okay, so don't report them; but do report if we reset SWAP_MAP_MAX.
+        */
+       if (*swap_map == SWAP_MAP_MAX) {
+               spin_lock(&swap_lock);
+               *swap_map = 1;
+               spin_unlock(&swap_lock);
+               if (printk_ratelimit())
+                       printk(KERN_WARNING
+                               "try_to_unuse_entry: cleared swap entry 
overflow\n");
+       }
+
+       /*
+        * If a reference remains (rare), we would like to leave the page in
+        * the swap cache; but try_to_unmap could then re-duplicate the entry
+        * once we drop page lock, so we might loop indefinitely; also, that
+        * page could not be swapped out to other storage meanwhile.  So:
+        * delete from cache even if there's another reference, after ensuring
+        * that the data has been saved to disk - since if the reference
+        * remains (rarer), it will be read from disk into another page.
+        * Splitting into two pages would be incorrect if swap supported
+        * "shared private" pages, but they are handled by tmpfs files.
+        *
+        * Note shmem_unuse already deleted a swappage from the swap cache,
+        * unless the move to filepage failed: in which case it left swappage
+        * in cache, lowered its swap count to pass quickly through the loops
+        * above, and now we must reincrement count to try again later.
+        */
+       if (PageSwapCache(page)) {
+               if ((*swap_map > 1) && PageDirty(page)) {
+                       struct writeback_control wbc = {
+                               .sync_mode = WB_SYNC_NONE,
+                       };
+
+                       swap_writepage(page, &wbc);
+                       lock_page(page);
+                       wait_on_page_writeback(page);
+                       goto retry;
+               }
+
+               if (shmem)
+                       swap_duplicate(entry);
+               else
+                       delete_from_swap_cache(page);
+       }
+
+       /*
+        * So we could skip searching mms once swap count went to 1, we did not
+        * mark any present ptes as dirty: must mark page dirty so shrink_list
+        * will preserve it.
+        */
+       SetPageDirty(page);
+unuse_err:
+       unlock_page(page);
+       page_cache_release(page);
+
+       if (start_mm_p)
+               *start_mm_p = start_mm;
+       else
+               mmput(start_mm);
+
+       return retval;
+}
+
 /*
  * Scan swap_map from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
@@ -686,13 +854,10 @@ static int try_to_unuse(unsigned int typ
        struct swap_info_struct * si = &swap_info[type];
        struct mm_struct *start_mm;
        unsigned short *swap_map;
-       unsigned short swcount;
        struct page *page;
        swp_entry_t entry;
        unsigned int i = 0;
        int retval = 0;
-       int reset_overflow = 0;
-       int shmem;
 
        /*
         * When searching mms for an entry, a good strategy is to
@@ -744,152 +909,10 @@ static int try_to_unuse(unsigned int typ
                        break;
                }
 
-               /*
-                * Don't hold on to start_mm if it looks like exiting.
-                */
-               if (atomic_read(&start_mm->mm_users) == 1) {
-                       mmput(start_mm);
-                       start_mm = &init_mm;
-                       atomic_inc(&init_mm.mm_users);
-               }
-
-               /*
-                * Wait for and lock page.  When do_swap_page races with
-                * try_to_unuse, do_swap_page can handle the fault much
-                * faster than try_to_unuse can locate the entry.  This
-                * apparently redundant "wait_on_page_locked" lets try_to_unuse
-                * defer to do_swap_page in such a case - in some tests,
-                * do_swap_page and try_to_unuse repeatedly compete.
-                */
-               wait_on_page_locked(page);
-               wait_on_page_writeback(page);
-               lock_page(page);
-               wait_on_page_writeback(page);
-
-               /*
-                * Remove all references to entry.
-                * Whenever we reach init_mm, there's no address space
-                * to search, but use it as a reminder to search shmem.
-                */
-               shmem = 0;
-               swcount = *swap_map;
-               if (swcount > 1) {
-                       if (start_mm == &init_mm)
-                               shmem = shmem_unuse(entry, page);
-                       else
-                               retval = unuse_mm(start_mm, entry, page);
-               }
-               if (*swap_map > 1) {
-                       int set_start_mm = (*swap_map >= swcount);
-                       struct list_head *p = &start_mm->mmlist;
-                       struct mm_struct *new_start_mm = start_mm;
-                       struct mm_struct *prev_mm = start_mm;
-                       struct mm_struct *mm;
-
-                       atomic_inc(&new_start_mm->mm_users);
-                       atomic_inc(&prev_mm->mm_users);
-                       spin_lock(&mmlist_lock);
-                       while (*swap_map > 1 && !retval &&
-                                       (p = p->next) != &start_mm->mmlist) {
-                               mm = list_entry(p, struct mm_struct, mmlist);
-                               if (!atomic_inc_not_zero(&mm->mm_users))
-                                       continue;
-                               spin_unlock(&mmlist_lock);
-                               mmput(prev_mm);
-                               prev_mm = mm;
-
-                               cond_resched();
+               retval = try_to_unuse_entry(entry, swap_map, page, &start_mm);
 
-                               swcount = *swap_map;
-                               if (swcount <= 1)
-                                       ;
-                               else if (mm == &init_mm) {
-                                       set_start_mm = 1;
-                                       shmem = shmem_unuse(entry, page);
-                               } else
-                                       retval = unuse_mm(mm, entry, page);
-                               if (set_start_mm && *swap_map < swcount) {
-                                       mmput(new_start_mm);
-                                       atomic_inc(&mm->mm_users);
-                                       new_start_mm = mm;
-                                       set_start_mm = 0;
-                               }
-                               spin_lock(&mmlist_lock);
-                       }
-                       spin_unlock(&mmlist_lock);
-                       mmput(prev_mm);
-                       mmput(start_mm);
-                       start_mm = new_start_mm;
-               }
-               if (retval) {
-                       unlock_page(page);
-                       page_cache_release(page);
+               if (retval)
                        break;
-               }
-
-               /*
-                * How could swap count reach 0x7fff when the maximum
-                * pid is 0x7fff, and there's no way to repeat a swap
-                * page within an mm (except in shmem, where it's the
-                * shared object which takes the reference count)?
-                * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
-                *
-                * If that's wrong, then we should worry more about
-                * exit_mmap() and do_munmap() cases described above:
-                * we might be resetting SWAP_MAP_MAX too early here.
-                * We know "Undead"s can happen, they're okay, so don't
-                * report them; but do report if we reset SWAP_MAP_MAX.
-                */
-               if (*swap_map == SWAP_MAP_MAX) {
-                       spin_lock(&swap_lock);
-                       *swap_map = 1;
-                       spin_unlock(&swap_lock);
-                       reset_overflow = 1;
-               }
-
-               /*
-                * If a reference remains (rare), we would like to leave
-                * the page in the swap cache; but try_to_unmap could
-                * then re-duplicate the entry once we drop page lock,
-                * so we might loop indefinitely; also, that page could
-                * not be swapped out to other storage meanwhile.  So:
-                * delete from cache even if there's another reference,
-                * after ensuring that the data has been saved to disk -
-                * since if the reference remains (rarer), it will be
-                * read from disk into another page.  Splitting into two
-                * pages would be incorrect if swap supported "shared
-                * private" pages, but they are handled by tmpfs files.
-                *
-                * Note shmem_unuse already deleted a swappage from
-                * the swap cache, unless the move to filepage failed:
-                * in which case it left swappage in cache, lowered its
-                * swap count to pass quickly through the loops above,
-                * and now we must reincrement count to try again later.
-                */
-               if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
-                       struct writeback_control wbc = {
-                               .sync_mode = WB_SYNC_NONE,
-                       };
-
-                       swap_writepage(page, &wbc);
-                       lock_page(page);
-                       wait_on_page_writeback(page);
-               }
-               if (PageSwapCache(page)) {
-                       if (shmem)
-                               swap_duplicate(entry);
-                       else
-                               delete_from_swap_cache(page);
-               }
-
-               /*
-                * So we could skip searching mms once swap count went
-                * to 1, we did not mark any present ptes as dirty: must
-                * mark page dirty so shrink_list will preserve it.
-                */
-               SetPageDirty(page);
-               unlock_page(page);
-               page_cache_release(page);
 
                /*
                 * Make sure that we aren't completely killing
@@ -899,10 +922,6 @@ static int try_to_unuse(unsigned int typ
        }
 
        mmput(start_mm);
-       if (reset_overflow) {
-               printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
-               swap_overflow = 0;
-       }
        return retval;
 }
 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/4] swap: Split up try_to_unuse()

Reply via email to