From: Zi Yan <[email protected]>

The pagewalker runs while only holding the mmap_sem for read. The pud can
be set asynchronously, while also holding the mmap_sem for read.

This follows the same way as the commit:
mm/pagewalk: use READ_ONCE when reading the PUD entry unlocked"

Signed-off-by: Zi Yan <[email protected]>
---
 fs/proc/task_mmu.c       | 69 ++++++++++++++++++++++++++--------------
 include/linux/pagewalk.h |  2 +-
 mm/madvise.c             | 59 ++++++++++++++++++----------------
 mm/memcontrol.c          | 30 +++++++++++------
 mm/mempolicy.c           | 15 ++++++---
 mm/mincore.c             | 10 +++---
 mm/pagewalk.c            | 21 ++++++------
 7 files changed, 124 insertions(+), 82 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 069978777423..a21484b1414d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -570,28 +570,33 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long 
addr,
 }
 #endif
 
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-                          struct mm_walk *walk)
+static int smaps_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
 {
        struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
-               smaps_pmd_entry(pmd, addr, walk);
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
+               smaps_pmd_entry(pmdp, addr, walk);
                spin_unlock(ptl);
                goto out;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                goto out;
        /*
         * The mmap_lock held all the way back in m_start() is what
         * keeps khugepaged out of here and from collapsing things
         * in here.
         */
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE)
                smaps_pte_entry(pte, addr, walk);
        pte_unmap_unlock(pte - 1, ptl);
@@ -1091,7 +1096,7 @@ static inline void clear_soft_dirty_pmd(struct 
vm_area_struct *vma,
 }
 #endif
 
-static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+static int clear_refs_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 {
        struct clear_refs_private *cp = walk->private;
@@ -1100,20 +1105,25 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
long addr,
        spinlock_t *ptl;
        struct page *page;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
                if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
-                       clear_soft_dirty_pmd(vma, addr, pmd);
+                       clear_soft_dirty_pmd(vma, addr, pmdp);
                        goto out;
                }
 
-               if (!pmd_present(*pmd))
+               if (!pmd_present(pmd))
                        goto out;
 
-               page = pmd_page(*pmd);
+               page = pmd_page(pmd);
 
                /* Clear accessed and referenced bits. */
-               pmdp_test_and_clear_young(vma, addr, pmd);
+               pmdp_test_and_clear_young(vma, addr, pmdp);
                test_and_clear_page_young(page);
                ClearPageReferenced(page);
 out:
@@ -1121,10 +1131,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
long addr,
                return 0;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                ptent = *pte;
 
@@ -1388,8 +1398,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct 
pagemapread *pm,
        return make_pme(frame, flags);
 }
 
-static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long 
end,
-                            struct mm_walk *walk)
+static int pagemap_pmd_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
 {
        struct vm_area_struct *vma = walk->vma;
        struct pagemapread *pm = walk->private;
@@ -1401,9 +1411,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
        ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
                u64 flags = 0, frame = 0;
-               pmd_t pmd = *pmdp;
                struct page *page = NULL;
 
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
+
                if (vma->vm_flags & VM_SOFTDIRTY)
                        flags |= PM_SOFT_DIRTY;
 
@@ -1456,7 +1471,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
                return err;
        }
 
-       if (pmd_trans_unstable(pmdp))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -1768,7 +1783,7 @@ static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
 }
 #endif
 
-static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+static int gather_pte_stats(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                unsigned long end, struct mm_walk *walk)
 {
        struct numa_maps *md = walk->private;
@@ -1778,22 +1793,28 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long 
addr,
        pte_t *pte;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
                struct page *page;
 
-               page = can_gather_numa_stats_pmd(*pmd, vma, addr);
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
+
+               page = can_gather_numa_stats_pmd(pmd, vma, addr);
                if (page)
-                       gather_stats(page, md, pmd_dirty(*pmd),
+                       gather_stats(page, md, pmd_dirty(pmd),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
                spin_unlock(ptl);
                return 0;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 #endif
-       orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
        do {
                struct page *page = can_gather_numa_stats(*pte, vma, addr);
                if (!page)
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 6caf28aadafb..686b57e94a9f 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -41,7 +41,7 @@ struct mm_walk_ops {
                         unsigned long next, struct mm_walk *walk);
        int (*pud_entry)(pud_t pud, pud_t *pudp, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
-       int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+       int (*pmd_entry)(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
        int (*pte_entry)(pte_t *pte, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
diff --git a/mm/madvise.c b/mm/madvise.c
index ae266dfede8a..16e7b8eadb13 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -183,14 +183,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
 }
 
 #ifdef CONFIG_SWAP
-static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
+static int swapin_walk_pmd_entry(pmd_t pmd, pmd_t *pmdp, unsigned long start,
        unsigned long end, struct mm_walk *walk)
 {
        pte_t *orig_pte;
        struct vm_area_struct *vma = walk->private;
        unsigned long index;
 
-       if (pmd_none_or_trans_huge_or_clear_bad(pmd))
+       if (pmd_none_or_trans_huge_or_clear_bad(&pmd))
                return 0;
 
        for (index = start; index != end; index += PAGE_SIZE) {
@@ -199,7 +199,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long 
start,
                struct page *page;
                spinlock_t *ptl;
 
-               orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
+               orig_pte = pte_offset_map_lock(vma->vm_mm, pmdp, start, &ptl);
                pte = *(orig_pte + ((index - start) / PAGE_SIZE));
                pte_unmap_unlock(orig_pte, ptl);
 
@@ -304,7 +304,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
        return 0;
 }
 
-static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+static int madvise_cold_or_pageout_pte_range(pmd_t pmd, pmd_t *pmdp,
                                unsigned long addr, unsigned long end,
                                struct mm_walk *walk)
 {
@@ -322,26 +322,29 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                return -EINTR;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (pmd_trans_huge(*pmd)) {
-               pmd_t orig_pmd;
+       if (pmd_trans_huge(pmd)) {
                unsigned long next = pmd_addr_end(addr, end);
 
                tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
-               ptl = pmd_trans_huge_lock(pmd, vma);
+               ptl = pmd_trans_huge_lock(pmdp, vma);
                if (!ptl)
                        return 0;
 
-               orig_pmd = *pmd;
-               if (is_huge_zero_pmd(orig_pmd))
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       goto huge_unlock;
+               }
+
+               if (is_huge_zero_pmd(pmd))
                        goto huge_unlock;
 
-               if (unlikely(!pmd_present(orig_pmd))) {
+               if (unlikely(!pmd_present(pmd))) {
                        VM_BUG_ON(thp_migration_supported() &&
-                                       !is_pmd_migration_entry(orig_pmd));
+                                       !is_pmd_migration_entry(pmd));
                        goto huge_unlock;
                }
 
-               page = pmd_page(orig_pmd);
+               page = pmd_page(pmd);
 
                /* Do not interfere with other mappings of this page */
                if (page_mapcount(page) != 1)
@@ -361,12 +364,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                        return 0;
                }
 
-               if (pmd_young(orig_pmd)) {
-                       pmdp_invalidate(vma, addr, pmd);
-                       orig_pmd = pmd_mkold(orig_pmd);
+               if (pmd_young(pmd)) {
+                       pmdp_invalidate(vma, addr, pmdp);
+                       pmd = pmd_mkold(pmd);
 
-                       set_pmd_at(mm, addr, pmd, orig_pmd);
-                       tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+                       set_pmd_at(mm, addr, pmdp, pmd);
+                       tlb_remove_pmd_tlb_entry(tlb, pmdp, addr);
                }
 
                ClearPageReferenced(page);
@@ -388,11 +391,11 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
        }
 
 regular_page:
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 #endif
        tlb_change_page_size(tlb, PAGE_SIZE);
-       orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
        flush_tlb_batched_pending(mm);
        arch_enter_lazy_mmu_mode();
        for (; addr < end; pte++, addr += PAGE_SIZE) {
@@ -424,12 +427,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                        if (split_huge_page(page)) {
                                unlock_page(page);
                                put_page(page);
-                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               pte_offset_map_lock(mm, pmdp, addr, &ptl);
                                break;
                        }
                        unlock_page(page);
                        put_page(page);
-                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
                        pte--;
                        addr -= PAGE_SIZE;
                        continue;
@@ -566,7 +569,7 @@ static long madvise_pageout(struct vm_area_struct *vma,
        return 0;
 }
 
-static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+static int madvise_free_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 
 {
@@ -580,15 +583,15 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned 
long addr,
        unsigned long next;
 
        next = pmd_addr_end(addr, end);
-       if (pmd_trans_huge(*pmd))
-               if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next))
+       if (pmd_trans_huge(pmd))
+               if (madvise_free_huge_pmd(tlb, vma, pmdp, addr, next))
                        goto next;
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 
        tlb_change_page_size(tlb, PAGE_SIZE);
-       orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+       orig_pte = pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
        flush_tlb_batched_pending(mm);
        arch_enter_lazy_mmu_mode();
        for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -634,12 +637,12 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned 
long addr,
                        if (split_huge_page(page)) {
                                unlock_page(page);
                                put_page(page);
-                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               pte_offset_map_lock(mm, pmdp, addr, &ptl);
                                goto out;
                        }
                        unlock_page(page);
                        put_page(page);
-                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
                        pte--;
                        addr -= PAGE_SIZE;
                        continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9c4a0851348f..b28f620c1c5b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5827,7 +5827,7 @@ static inline enum mc_target_type 
get_mctgt_type_thp(struct vm_area_struct *vma,
 }
 #endif
 
-static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
+static int mem_cgroup_count_precharge_pte_range(pmd_t pmd, pmd_t *pmdp,
                                        unsigned long addr, unsigned long end,
                                        struct mm_walk *walk)
 {
@@ -5835,22 +5835,27 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t 
*pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
                /*
                 * Note their can not be MC_TARGET_DEVICE for now as we do not
                 * support transparent huge page with MEMORY_DEVICE_PRIVATE but
                 * this might change.
                 */
-               if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
+               if (get_mctgt_type_thp(vma, addr, pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
                return 0;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE)
                if (get_mctgt_type(vma, addr, *pte, NULL))
                        mc.precharge++; /* increment precharge temporarily */
@@ -6023,7 +6028,7 @@ static void mem_cgroup_cancel_attach(struct 
cgroup_taskset *tset)
                mem_cgroup_clear_mc();
 }
 
-static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
+static int mem_cgroup_move_charge_pte_range(pmd_t pmd, pmd_t *pmdp,
                                unsigned long addr, unsigned long end,
                                struct mm_walk *walk)
 {
@@ -6035,13 +6040,18 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
                }
-               target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
+               target_type = get_mctgt_type_thp(vma, addr, pmd, &target);
                if (target_type == MC_TARGET_PAGE) {
                        page = target.page;
                        if (!isolate_lru_page(page)) {
@@ -6066,10 +6076,10 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                return 0;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 retry:
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+       pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
        for (; addr != end; addr += PAGE_SIZE) {
                pte_t ptent = *(pte++);
                bool device = false;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index eddbe4e56c73..731a7710395f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -516,7 +516,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, 
unsigned long addr,
  * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
  *        on a node that does not follow the policy.
  */
-static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+static int queue_pages_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                        unsigned long end, struct mm_walk *walk)
 {
        struct vm_area_struct *vma = walk->vma;
@@ -528,18 +528,23 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned 
long addr,
        pte_t *pte;
        spinlock_t *ptl;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
-               ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
+               if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+                       walk->action = ACTION_AGAIN;
+                       spin_unlock(ptl);
+                       return 0;
+               }
+               ret = queue_pages_pmd(pmdp, ptl, addr, end, walk);
                if (ret != 2)
                        return ret;
        }
        /* THP was split, fall through to pte walk */
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(&pmd))
                return 0;
 
-       pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                if (!pte_present(*pte))
                        continue;
diff --git a/mm/mincore.c b/mm/mincore.c
index 02db1a834021..168661f32aaa 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -96,8 +96,8 @@ static int mincore_unmapped_range(unsigned long addr, 
unsigned long end,
        return 0;
 }
 
-static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-                       struct mm_walk *walk)
+static int mincore_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
 {
        spinlock_t *ptl;
        struct vm_area_struct *vma = walk->vma;
@@ -105,19 +105,19 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long 
addr, unsigned long end,
        unsigned char *vec = walk->private;
        int nr = (end - addr) >> PAGE_SHIFT;
 
-       ptl = pmd_trans_huge_lock(pmd, vma);
+       ptl = pmd_trans_huge_lock(pmdp, vma);
        if (ptl) {
                memset(vec, 1, nr);
                spin_unlock(ptl);
                goto out;
        }
 
-       if (pmd_trans_unstable(pmd)) {
+       if (pmd_trans_unstable(&pmd)) {
                __mincore_unmapped_range(addr, end, vma, vec);
                goto out;
        }
 
-       ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       ptep = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
        for (; addr != end; ptep++, addr += PAGE_SIZE) {
                pte_t pte = *ptep;
 
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 15d1e423b4a3..a3752c82a7b2 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -61,17 +61,19 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
 static int walk_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                          struct mm_walk *walk)
 {
-       pmd_t *pmd;
+       pmd_t *pmdp;
+       pmd_t pmd;
        unsigned long next;
        const struct mm_walk_ops *ops = walk->ops;
        int err = 0;
        int depth = real_depth(3);
 
-       pmd = pmd_offset(&pud, addr);
+       pmdp = pmd_offset(&pud, addr);
        do {
 again:
+               pmd = READ_ONCE(*pmdp);
                next = pmd_addr_end(addr, end);
-               if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
+               if (pmd_none(pmd) || (!walk->vma && !walk->no_vma)) {
                        if (ops->pte_hole)
                                err = ops->pte_hole(addr, next, depth, walk);
                        if (err)
@@ -86,7 +88,7 @@ static int walk_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
                 * needs to know about pmd_trans_huge() pmds
                 */
                if (ops->pmd_entry)
-                       err = ops->pmd_entry(pmd, addr, next, walk);
+                       err = ops->pmd_entry(pmd, pmdp, addr, next, walk);
                if (err)
                        break;
 
@@ -97,21 +99,22 @@ static int walk_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
                 * Check this here so we only break down trans_huge
                 * pages when we _need_ to
                 */
-               if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
+               if ((!walk->vma && (pmd_leaf(pmd) || !pmd_present(pmd))) ||
                    walk->action == ACTION_CONTINUE ||
                    !(ops->pte_entry))
                        continue;
 
                if (walk->vma) {
-                       split_huge_pmd(walk->vma, pmd, addr);
-                       if (pmd_trans_unstable(pmd))
+                       split_huge_pmd(walk->vma, pmdp, addr);
+                       pmd = READ_ONCE(*pmdp);
+                       if (pmd_trans_unstable(&pmd))
                                goto again;
                }
 
-               err = walk_pte_range(pmd, addr, next, walk);
+               err = walk_pte_range(pmdp, addr, next, walk);
                if (err)
                        break;
-       } while (pmd++, addr = next, addr != end);
+       } while (pmdp++, addr = next, addr != end);
 
        return err;
 }
-- 
2.28.0

Reply via email to