From: Thomas Hellstrom <thellst...@vmware.com>

A huge pud page can theoretically be faulted in racing with pmd_alloc()
in __handle_mm_fault(). That will lead to pmd_alloc() returning an
invalid pmd pointer. Fix this by adding a pud_trans_unstable() function
similar to pmd_trans_unstable() and check whether the pud is really stable
before using the pmd pointer.

Race:
Thread 1:             Thread 2:                 Comment
create_huge_pud()                               Fallback - not taken.
                      create_huge_pud()         Taken.
pmd_alloc()                                     Returns an invalid pointer.

Cc: Matthew Wilcox <wi...@infradead.org>
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Signed-off-by: Thomas Hellstrom <thellst...@vmware.com>
---
 include/asm-generic/pgtable.h | 25 +++++++++++++++++++++++++
 mm/memory.c                   |  6 ++++++
 2 files changed, 31 insertions(+)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 818691846c90..70c2058230ba 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -912,6 +912,31 @@ static inline int pud_trans_huge(pud_t pud)
 }
 #endif
 
+/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
+static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
+{
+       pud_t pudval = READ_ONCE(*pud);
+
+       if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
+               return 1;
+       if (unlikely(pud_bad(pudval))) {
+               pud_clear_bad(pud);
+               return 1;
+       }
+       return 0;
+}
+
+/* See pmd_trans_unstable for discussion. */
+static inline int pud_trans_unstable(pud_t *pud)
+{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&                    \
+       defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+       return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
+#else
+       return 0;
+#endif
+}
+
 #ifndef pmd_read_atomic
 static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 {
diff --git a/mm/memory.c b/mm/memory.c
index b1ca51a079f2..43ff372f4f07 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3914,6 +3914,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct 
*vma,
        vmf.pud = pud_alloc(mm, p4d, address);
        if (!vmf.pud)
                return VM_FAULT_OOM;
+retry_pud:
        if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
                ret = create_huge_pud(&vmf);
                if (!(ret & VM_FAULT_FALLBACK))
@@ -3940,6 +3941,11 @@ static vm_fault_t __handle_mm_fault(struct 
vm_area_struct *vma,
        vmf.pmd = pmd_alloc(mm, vmf.pud, address);
        if (!vmf.pmd)
                return VM_FAULT_OOM;
+
+       /* Huge pud page fault raced with pmd_alloc? */
+       if (pud_trans_unstable(vmf.pud))
+               goto retry_pud;
+
        if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
                ret = create_huge_pmd(&vmf);
                if (!(ret & VM_FAULT_FALLBACK))
-- 
2.21.0

Reply via email to