Commit-ID:  eb4f84b1aaad78ca07e274b81410ec9d60abe434
Gitweb:     http://git.kernel.org/tip/eb4f84b1aaad78ca07e274b81410ec9d60abe434
Author:     Ingo Molnar <[email protected]>
AuthorDate: Sat, 20 Oct 2012 23:06:00 +0200
Committer:  Ingo Molnar <[email protected]>
CommitDate: Sun, 21 Oct 2012 14:40:42 +0200

numa, mm: Fix 4K migration races

__unmap_and_move() can fail with -EAGAIN:

 - if we race with swapout picking up the page

 - if migrate_page_move_mapping() sees the
   page count changing unexpectedly

 - if two threads are trying to migrate the same page

There are other, more theoretical races as well, such as
the possibility of a pte being WSS mapped twice.

Fix them and clean up the code flow.

Signed-off-by: Ingo Molnar <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Rik van Riel <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
---
 mm/memory.c  |   44 ++++++++++++++++++++++++++++++++++++--------
 mm/migrate.c |   22 ++++++++++++----------
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index c0de477..2cc8a29 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3483,11 +3483,13 @@ static int do_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
        struct page *page = NULL;
        int node, page_nid = -1;
        spinlock_t *ptl;
+       int account = 1;
+       int locked = 0;
 
        ptl = pte_lockptr(mm, pmd);
        spin_lock(ptl);
        if (unlikely(!pte_same(*ptep, entry)))
-               goto unlock;
+               goto out_unlock;
 
        page = vm_normal_page(vma, address, entry);
        if (page) {
@@ -3498,20 +3500,25 @@ static int do_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
                        goto migrate;
        }
 
-fixup:
+out_pte_upgrade_unlock:
        flush_cache_page(vma, address, pte_pfn(entry));
 
        ptep_modify_prot_start(mm, address, ptep);
        entry = pte_modify(entry, vma->vm_page_prot);
        ptep_modify_prot_commit(mm, address, ptep, entry);
 
+       /* No TLB flush needed because we upgraded the PTE */
+
        update_mmu_cache(vma, address, ptep);
 
-unlock:
+out_unlock:
        pte_unmap_unlock(ptep, ptl);
 out:
        if (page) {
-               task_numa_fault(page_nid, 1);
+               if (locked)
+                       unlock_page(page);
+               if (account)
+                       task_numa_fault(page_nid, 1);
                put_page(page);
        }
 
@@ -3520,19 +3527,40 @@ out:
 migrate:
        pte_unmap_unlock(ptep, ptl);
 
+       locked = 1;
+       lock_page(page);
+
+       /*
+        * We have to do this again, to make sure
+        * we have not raced with a pte update
+        * during the lock_page():
+        */
+       ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (!pte_same(*ptep, entry)) {
+               account = 0;
+               goto out_unlock;
+       }
+       pte_unmap_unlock(ptep, ptl);
+
        if (!migrate_misplaced_page(page, node)) {
+               /*
+                * Successful migration - account the fault.
+                * Note, we don't fix up the pte, that will
+                * happen on the next fault.
+                */
                page_nid = node;
+               put_page(page);
+
                goto out;
        }
 
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (!pte_same(*ptep, entry)) {
-               put_page(page);
-               page = NULL;
-               goto unlock;
+               account = 0;
+               goto out_unlock;
        }
 
-       goto fixup;
+       goto out_pte_upgrade_unlock;
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index 72d1056..6d16bff 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1449,20 +1449,22 @@ int migrate_misplaced_page(struct page *page, int node)
        }
 
        if (isolate_lru_page(page)) {
-               ret = -EBUSY;
-               goto put_new;
+               put_page(newpage);
+               return -EBUSY;
        }
 
        inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
        ret = __unmap_and_move(page, newpage, 0, 0, MIGRATE_FAULT);
-       /*
-        * A page that has been migrated has all references removed and will be
-        * freed. A page that has not been migrated will have kepts its
-        * references and be restored.
-        */
-       dec_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
-       putback_lru_page(page);
-put_new:
+
+       if (ret != -EAGAIN) {
+               /*
+                * A page that has been migrated has all references removed and 
will be
+                * freed. A page that has not been migrated will have kepts its
+                * references and be restored.
+                */
+               dec_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+               putback_lru_page(page);
+       }
        /*
         * Move the new page to the LRU. If migration was not successful
         * then this will free the page.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to