We need to do as we do now for soft-offline, and take poisoned pages
off the buddy allocator.
Otherwise we could face [1] as well.

[1] https://lore.kernel.org/linux-mm/20190826104144.GA7849@linux/T/#u

Signed-off-by: Oscar Salvador <[email protected]>
---
 mm/memory-failure.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 48eb314598e0..3d491c0d3f91 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -791,6 +791,14 @@ static int me_swapcache_clean(struct page *p, unsigned 
long pfn)
                return MF_FAILED;
 }
 
+static int me_huge_free_page(struct page *p)
+{
+       if (page_handle_poison(p, true, false))
+               return MF_RECOVERED;
+       else
+               return MF_FAILED;
+}
+
 /*
  * Huge pages. Needs work.
  * Issues:
@@ -818,8 +826,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
                 */
                if (PageAnon(hpage))
                        put_page(hpage);
-               dissolve_free_huge_page(p);
-               res = MF_RECOVERED;
+               res = me_huge_free_page(p);
                lock_page(hpage);
        }
 
@@ -1145,8 +1152,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int 
flags)
                        }
                }
                unlock_page(head);
-               dissolve_free_huge_page(p);
-               action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+               res = me_huge_free_page(p);
+               if (res == MF_FAILED)
+                       num_poisoned_pages_dec();
+               action_result(pfn, MF_MSG_FREE_HUGE, res);
                return 0;
        }
 
@@ -1307,6 +1316,12 @@ int memory_failure(unsigned long pfn, int flags)
 
        if (PageHuge(p))
                return memory_failure_hugetlb(pfn, flags);
+
+       if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+               action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
+               return 0;
+       }
+
        if (TestSetPageHWPoison(p)) {
                pr_err("Memory failure: %#lx: already hardware poisoned\n",
                        pfn);
@@ -1328,10 +1343,10 @@ int memory_failure(unsigned long pfn, int flags)
         * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
         */
        if (!get_hwpoison_page(p)) {
-               if (is_free_buddy_page(p)) {
-                       action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
+               if (is_free_buddy_page(p) && page_handle_poison(p, true, 
false)) {
+                       action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
                        return 0;
-               } else {
+               } else if(!is_free_buddy_page(p)) {
                        action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, 
MF_IGNORED);
                        return -EBUSY;
                }
@@ -1354,8 +1369,8 @@ int memory_failure(unsigned long pfn, int flags)
         */
        shake_page(p, 0);
        /* shake_page could have turned it free. */
-       if (!PageLRU(p) && is_free_buddy_page(p)) {
-               action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
+       if (!PageLRU(p) && is_free_buddy_page(p) && page_handle_poison(p, true, 
false)) {
+               action_result(pfn, MF_MSG_BUDDY_2ND, MF_RECOVERED);
                return 0;
        }
 
-- 
2.12.3

Reply via email to