The huge page may be reallocated in soft_offline_page, because
MIGRATE_ISOLATE can not keep the page until after setting PG_hwpoison.
alloc_huge_page()
        dequeue_huge_page_vma()
                dequeue_huge_page_node()
If the huge page was reallocated, we need to try offline it again.

Signed-off-by: Xishi Qiu <qiuxi...@huawei.com>
---
 mm/memory-failure.c |   21 ++++++++++++++++++---
 1 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b7c1716..f384249 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1505,8 +1505,11 @@ static int soft_offline_huge_page(struct page *page, int 
flags)
                if (ret > 0)
                        ret = -EIO;
        } else {
+               ret = dequeue_hwpoisoned_huge_page(hpage);
+               /* If the page was reallocated, we need to try again. */
+               if (ret)
+                       return -EAGAIN;
                set_page_hwpoison_huge_page(hpage);
-               dequeue_hwpoisoned_huge_page(hpage);
                atomic_long_add(1 << compound_order(hpage),
                                &num_poisoned_pages);
        }
@@ -1624,10 +1627,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
  */
 int soft_offline_page(struct page *page, int flags)
 {
-       int ret;
+       int ret, retry_max = 3;
        unsigned long pfn = page_to_pfn(page);
        struct page *hpage = compound_trans_head(page);
 
+retry:
        if (PageHWPoison(page)) {
                pr_info("soft offline: %#lx page already poisoned\n", pfn);
                return -EBUSY;
@@ -1663,8 +1667,15 @@ int soft_offline_page(struct page *page, int flags)
                        ret = __soft_offline_page(page, flags);
        } else if (ret == 0) { /* for free pages */
                if (PageHuge(page)) {
+                       ret = dequeue_hwpoisoned_huge_page(hpage);
+                       /* If the page was reallocated, we need to try again. */
+                       if (ret) {
+                               unset_migratetype_isolate(page,
+                                               MIGRATE_MOVABLE);
+                               if (retry_max-- > 0)
+                                       goto retry;
+                       }
                        set_page_hwpoison_huge_page(hpage);
-                       dequeue_hwpoisoned_huge_page(hpage);
                        atomic_long_add(1 << compound_order(hpage),
                                        &num_poisoned_pages);
                } else {
@@ -1673,5 +1684,9 @@ int soft_offline_page(struct page *page, int flags)
                }
        }
        unset_migratetype_isolate(page, MIGRATE_MOVABLE);
+
+       if (ret == -EAGAIN && retry_max-- > 0)
+               goto retry;
+
        return ret;
 }
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to