[RFC PATCH 2/4] mm: split code for unusing swap entries from try_to_unuse

Krzysztof Kozlowski Mon, 05 Aug 2013 23:44:01 -0700

Move out the code for unusing swap entries from loop in try_to_unuse()
to separate function: try_to_unuse_swp_entry(). Export this new function
in swapfile.h just like try_to_unuse() is exported.


Signed-off-by: Krzysztof Kozlowski <k.kozlow...@samsung.com>
---
 include/linux/swapfile.h |    2 +
 mm/swapfile.c            |  354 ++++++++++++++++++++++++----------------------
 2 files changed, 187 insertions(+), 169 deletions(-)

diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e282624..68c24a7 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -9,5 +9,7 @@ extern spinlock_t swap_lock;
 extern struct swap_list_t swap_list;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
+extern int try_to_unuse_swp_entry(struct mm_struct **start_mm,
+               struct swap_info_struct *si, swp_entry_t entry);
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 36af6ee..331d0b8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1100,6 +1100,189 @@ static unsigned int find_next_to_unuse(struct 
swap_info_struct *si,
 }
 
 /*
+ * Returns:
+ *  - negative on error,
+ *  - 0 on success (entry unused)
+ */
+int try_to_unuse_swp_entry(struct mm_struct **start_mm,
+               struct swap_info_struct *si, swp_entry_t entry)
+{
+       pgoff_t offset = swp_offset(entry);
+       unsigned char *swap_map;
+       unsigned char swcount;
+       struct page *page;
+       int retval = 0;
+
+       if (signal_pending(current)) {
+               retval = -EINTR;
+               goto out;
+       }
+
+       /*
+        * Get a page for the entry, using the existing swap
+        * cache page if there is one.  Otherwise, get a clean
+        * page and read the swap into it.
+        */
+       swap_map = &si->swap_map[offset];
+       page = read_swap_cache_async(entry,
+                               GFP_HIGHUSER_MOVABLE, NULL, 0);
+       if (!page) {
+               /*
+                * Either swap_duplicate() failed because entry
+                * has been freed independently, and will not be
+                * reused since sys_swapoff() already disabled
+                * allocation from here, or alloc_page() failed.
+                */
+               if (!*swap_map)
+                       retval = 0;
+               else
+                       retval = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * Don't hold on to start_mm if it looks like exiting.
+        */
+       if (atomic_read(&(*start_mm)->mm_users) == 1) {
+               mmput(*start_mm);
+               *start_mm = &init_mm;
+               atomic_inc(&init_mm.mm_users);
+       }
+
+       /*
+        * Wait for and lock page.  When do_swap_page races with
+        * try_to_unuse, do_swap_page can handle the fault much
+        * faster than try_to_unuse can locate the entry.  This
+        * apparently redundant "wait_on_page_locked" lets try_to_unuse
+        * defer to do_swap_page in such a case - in some tests,
+        * do_swap_page and try_to_unuse repeatedly compete.
+        */
+       wait_on_page_locked(page);
+       wait_on_page_writeback(page);
+       lock_page(page);
+       wait_on_page_writeback(page);
+
+       /*
+        * Remove all references to entry.
+        */
+       swcount = *swap_map;
+       if (swap_count(swcount) == SWAP_MAP_SHMEM) {
+               retval = shmem_unuse(entry, page);
+               VM_BUG_ON(retval > 0);
+               /* page has already been unlocked and released */
+               goto out;
+       }
+       if (swap_count(swcount) && *start_mm != &init_mm)
+               retval = unuse_mm(*start_mm, entry, page);
+
+       if (swap_count(*swap_map)) {
+               int set_start_mm = (*swap_map >= swcount);
+               struct list_head *p = &(*start_mm)->mmlist;
+               struct mm_struct *new_start_mm = *start_mm;
+               struct mm_struct *prev_mm = *start_mm;
+               struct mm_struct *mm;
+
+               atomic_inc(&new_start_mm->mm_users);
+               atomic_inc(&prev_mm->mm_users);
+               spin_lock(&mmlist_lock);
+               while (swap_count(*swap_map) && !retval &&
+                               (p = p->next) != &(*start_mm)->mmlist) {
+                       mm = list_entry(p, struct mm_struct, mmlist);
+                       if (!atomic_inc_not_zero(&mm->mm_users))
+                               continue;
+                       spin_unlock(&mmlist_lock);
+                       mmput(prev_mm);
+                       prev_mm = mm;
+
+                       cond_resched();
+
+                       swcount = *swap_map;
+                       if (!swap_count(swcount)) /* any usage ? */
+                               ;
+                       else if (mm == &init_mm)
+                               set_start_mm = 1;
+                       else
+                               retval = unuse_mm(mm, entry, page);
+
+                       if (set_start_mm && *swap_map < swcount) {
+                               mmput(new_start_mm);
+                               atomic_inc(&mm->mm_users);
+                               new_start_mm = mm;
+                               set_start_mm = 0;
+                       }
+                       spin_lock(&mmlist_lock);
+               }
+               spin_unlock(&mmlist_lock);
+               mmput(prev_mm);
+               mmput(*start_mm);
+               *start_mm = new_start_mm;
+       }
+       if (retval) {
+               unlock_page(page);
+               page_cache_release(page);
+               goto out;
+       }
+
+       /*
+        * If a reference remains (rare), we would like to leave
+        * the page in the swap cache; but try_to_unmap could
+        * then re-duplicate the entry once we drop page lock,
+        * so we might loop indefinitely; also, that page could
+        * not be swapped out to other storage meanwhile.  So:
+        * delete from cache even if there's another reference,
+        * after ensuring that the data has been saved to disk -
+        * since if the reference remains (rarer), it will be
+        * read from disk into another page.  Splitting into two
+        * pages would be incorrect if swap supported "shared
+        * private" pages, but they are handled by tmpfs files.
+        *
+        * Given how unuse_vma() targets one particular offset
+        * in an anon_vma, once the anon_vma has been determined,
+        * this splitting happens to be just what is needed to
+        * handle where KSM pages have been swapped out: re-reading
+        * is unnecessarily slow, but we can fix that later on.
+        */
+       if (swap_count(*swap_map) &&
+            PageDirty(page) && PageSwapCache(page)) {
+               struct writeback_control wbc = {
+                       .sync_mode = WB_SYNC_NONE,
+               };
+
+               swap_writepage(page, &wbc);
+               lock_page(page);
+               wait_on_page_writeback(page);
+       }
+
+       /*
+        * It is conceivable that a racing task removed this page from
+        * swap cache just before we acquired the page lock at the top,
+        * or while we dropped it in unuse_mm().  The page might even
+        * be back in swap cache on another swap area: that we must not
+        * delete, since it may not have been written out to swap yet.
+        */
+       if (PageSwapCache(page) &&
+           likely(page_private(page) == entry.val))
+               delete_from_swap_cache(page);
+
+       /*
+        * So we could skip searching mms once swap count went
+        * to 1, we did not mark any present ptes as dirty: must
+        * mark page dirty so shrink_page_list will preserve it.
+        */
+       SetPageDirty(page);
+       unlock_page(page);
+       page_cache_release(page);
+
+       /*
+        * Make sure that we aren't completely killing
+        * interactive performance.
+        */
+       cond_resched();
+out:
+       return retval;
+}
+
+/*
  * We completely avoid races by reading each swap page in advance,
  * and then search for the process using it.  All the necessary
  * page table adjustments can then be made atomically.
@@ -1112,10 +1295,6 @@ int try_to_unuse(unsigned int type, bool frontswap,
 {
        struct swap_info_struct *si = swap_info[type];
        struct mm_struct *start_mm;
-       unsigned char *swap_map;
-       unsigned char swcount;
-       struct page *page;
-       swp_entry_t entry;
        unsigned int i = 0;
        int retval = 0;
 
@@ -1142,172 +1321,9 @@ int try_to_unuse(unsigned int type, bool frontswap,
         * there are races when an instance of an entry might be missed.
         */
        while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
-               if (signal_pending(current)) {
-                       retval = -EINTR;
-                       break;
-               }
-
-               /*
-                * Get a page for the entry, using the existing swap
-                * cache page if there is one.  Otherwise, get a clean
-                * page and read the swap into it.
-                */
-               swap_map = &si->swap_map[i];
-               entry = swp_entry(type, i);
-               page = read_swap_cache_async(entry,
-                                       GFP_HIGHUSER_MOVABLE, NULL, 0);
-               if (!page) {
-                       /*
-                        * Either swap_duplicate() failed because entry
-                        * has been freed independently, and will not be
-                        * reused since sys_swapoff() already disabled
-                        * allocation from here, or alloc_page() failed.
-                        */
-                       if (!*swap_map)
-                               continue;
-                       retval = -ENOMEM;
-                       break;
-               }
-
-               /*
-                * Don't hold on to start_mm if it looks like exiting.
-                */
-               if (atomic_read(&start_mm->mm_users) == 1) {
-                       mmput(start_mm);
-                       start_mm = &init_mm;
-                       atomic_inc(&init_mm.mm_users);
-               }
-
-               /*
-                * Wait for and lock page.  When do_swap_page races with
-                * try_to_unuse, do_swap_page can handle the fault much
-                * faster than try_to_unuse can locate the entry.  This
-                * apparently redundant "wait_on_page_locked" lets try_to_unuse
-                * defer to do_swap_page in such a case - in some tests,
-                * do_swap_page and try_to_unuse repeatedly compete.
-                */
-               wait_on_page_locked(page);
-               wait_on_page_writeback(page);
-               lock_page(page);
-               wait_on_page_writeback(page);
-
-               /*
-                * Remove all references to entry.
-                */
-               swcount = *swap_map;
-               if (swap_count(swcount) == SWAP_MAP_SHMEM) {
-                       retval = shmem_unuse(entry, page);
-                       /* page has already been unlocked and released */
-                       if (retval < 0)
-                               break;
-                       continue;
-               }
-               if (swap_count(swcount) && start_mm != &init_mm)
-                       retval = unuse_mm(start_mm, entry, page);
-
-               if (swap_count(*swap_map)) {
-                       int set_start_mm = (*swap_map >= swcount);
-                       struct list_head *p = &start_mm->mmlist;
-                       struct mm_struct *new_start_mm = start_mm;
-                       struct mm_struct *prev_mm = start_mm;
-                       struct mm_struct *mm;
-
-                       atomic_inc(&new_start_mm->mm_users);
-                       atomic_inc(&prev_mm->mm_users);
-                       spin_lock(&mmlist_lock);
-                       while (swap_count(*swap_map) && !retval &&
-                                       (p = p->next) != &start_mm->mmlist) {
-                               mm = list_entry(p, struct mm_struct, mmlist);
-                               if (!atomic_inc_not_zero(&mm->mm_users))
-                                       continue;
-                               spin_unlock(&mmlist_lock);
-                               mmput(prev_mm);
-                               prev_mm = mm;
-
-                               cond_resched();
-
-                               swcount = *swap_map;
-                               if (!swap_count(swcount)) /* any usage ? */
-                                       ;
-                               else if (mm == &init_mm)
-                                       set_start_mm = 1;
-                               else
-                                       retval = unuse_mm(mm, entry, page);
-
-                               if (set_start_mm && *swap_map < swcount) {
-                                       mmput(new_start_mm);
-                                       atomic_inc(&mm->mm_users);
-                                       new_start_mm = mm;
-                                       set_start_mm = 0;
-                               }
-                               spin_lock(&mmlist_lock);
-                       }
-                       spin_unlock(&mmlist_lock);
-                       mmput(prev_mm);
-                       mmput(start_mm);
-                       start_mm = new_start_mm;
-               }
-               if (retval) {
-                       unlock_page(page);
-                       page_cache_release(page);
+               if (try_to_unuse_swp_entry(&start_mm, si,
+                                       swp_entry(type, i)) != 0)
                        break;
-               }
-
-               /*
-                * If a reference remains (rare), we would like to leave
-                * the page in the swap cache; but try_to_unmap could
-                * then re-duplicate the entry once we drop page lock,
-                * so we might loop indefinitely; also, that page could
-                * not be swapped out to other storage meanwhile.  So:
-                * delete from cache even if there's another reference,
-                * after ensuring that the data has been saved to disk -
-                * since if the reference remains (rarer), it will be
-                * read from disk into another page.  Splitting into two
-                * pages would be incorrect if swap supported "shared
-                * private" pages, but they are handled by tmpfs files.
-                *
-                * Given how unuse_vma() targets one particular offset
-                * in an anon_vma, once the anon_vma has been determined,
-                * this splitting happens to be just what is needed to
-                * handle where KSM pages have been swapped out: re-reading
-                * is unnecessarily slow, but we can fix that later on.
-                */
-               if (swap_count(*swap_map) &&
-                    PageDirty(page) && PageSwapCache(page)) {
-                       struct writeback_control wbc = {
-                               .sync_mode = WB_SYNC_NONE,
-                       };
-
-                       swap_writepage(page, &wbc);
-                       lock_page(page);
-                       wait_on_page_writeback(page);
-               }
-
-               /*
-                * It is conceivable that a racing task removed this page from
-                * swap cache just before we acquired the page lock at the top,
-                * or while we dropped it in unuse_mm().  The page might even
-                * be back in swap cache on another swap area: that we must not
-                * delete, since it may not have been written out to swap yet.
-                */
-               if (PageSwapCache(page) &&
-                   likely(page_private(page) == entry.val))
-                       delete_from_swap_cache(page);
-
-               /*
-                * So we could skip searching mms once swap count went
-                * to 1, we did not mark any present ptes as dirty: must
-                * mark page dirty so shrink_page_list will preserve it.
-                */
-               SetPageDirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-
-               /*
-                * Make sure that we aren't completely killing
-                * interactive performance.
-                */
-               cond_resched();
                if (frontswap && pages_to_unuse > 0) {
                        if (!--pages_to_unuse)
                                break;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH 2/4] mm: split code for unusing swap entries from try_to_unuse

Reply via email to